@toiroakr/lines-db 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/bin/cli.js +378 -415
- package/dist/index.cjs +195 -327
- package/dist/index.d.cts +64 -84
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.ts +64 -84
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +197 -328
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
- package/src/cli.ts +226 -126
- package/src/database.ts +296 -52
- package/src/index.ts +2 -2
- package/src/jsonl-migration.ts +24 -56
- package/src/schema.ts +37 -32
- package/src/types.ts +21 -0
- package/src/validator.test.ts +0 -507
- package/src/validator.ts +0 -441
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { createRequire } from "node:module";
|
|
2
|
-
import { access, mkdir, readFile, readdir,
|
|
2
|
+
import { access, mkdir, readFile, readdir, writeFile } from "node:fs/promises";
|
|
3
3
|
import { basename, dirname, extname, isAbsolute, join, normalize, relative } from "node:path";
|
|
4
4
|
import { pathToFileURL } from "node:url";
|
|
5
5
|
|
|
@@ -240,8 +240,7 @@ var DirectoryScanner = class {
|
|
|
240
240
|
* Define a bidirectional schema with optional backward transformation
|
|
241
241
|
*
|
|
242
242
|
* @param schema - Standard Schema for validation
|
|
243
|
-
* @param
|
|
244
|
-
* Required when schema performs transformations
|
|
243
|
+
* @param options - SchemaOptions object. When Input and Output types differ, backward transformation is required
|
|
245
244
|
*
|
|
246
245
|
* @example
|
|
247
246
|
* // No transformation - backward not needed
|
|
@@ -250,10 +249,12 @@ var DirectoryScanner = class {
|
|
|
250
249
|
* );
|
|
251
250
|
*
|
|
252
251
|
* @example
|
|
253
|
-
* // With transformation - backward
|
|
252
|
+
* // With transformation - backward REQUIRED
|
|
254
253
|
* const schema = defineSchema(
|
|
255
254
|
* v.pipe(v.string(), v.transform(Number)),
|
|
256
|
-
*
|
|
255
|
+
* {
|
|
256
|
+
* backward: (num) => String(num) // backward: number → string (REQUIRED)
|
|
257
|
+
* }
|
|
257
258
|
* );
|
|
258
259
|
*
|
|
259
260
|
* @example
|
|
@@ -268,14 +269,14 @@ var DirectoryScanner = class {
|
|
|
268
269
|
* }
|
|
269
270
|
* );
|
|
270
271
|
*/
|
|
271
|
-
function defineSchema(schema,
|
|
272
|
+
function defineSchema(schema, ...args) {
|
|
273
|
+
const options = args[0];
|
|
272
274
|
const bidirectionalSchema = Object.create(schema);
|
|
273
|
-
if (
|
|
274
|
-
|
|
275
|
-
if (
|
|
276
|
-
if (
|
|
277
|
-
if (
|
|
278
|
-
if (optionsOrBackward.indexes) bidirectionalSchema.indexes = optionsOrBackward.indexes;
|
|
275
|
+
if (options) {
|
|
276
|
+
if (options.backward) bidirectionalSchema.backward = options.backward;
|
|
277
|
+
if (options.primaryKey) bidirectionalSchema.primaryKey = options.primaryKey;
|
|
278
|
+
if (options.foreignKeys) bidirectionalSchema.foreignKeys = options.foreignKeys;
|
|
279
|
+
if (options.indexes) bidirectionalSchema.indexes = options.indexes;
|
|
279
280
|
}
|
|
280
281
|
Object.defineProperty(bidirectionalSchema, "~standard", {
|
|
281
282
|
value: schema["~standard"],
|
|
@@ -308,27 +309,49 @@ var LinesDB = class LinesDB {
|
|
|
308
309
|
return new LinesDB(config, dbPath);
|
|
309
310
|
}
|
|
310
311
|
/**
|
|
311
|
-
* Initialize database by loading all JSONL files
|
|
312
|
+
* Initialize database by loading all JSONL files or a specific table
|
|
312
313
|
* Uses dependency resolution to ensure foreign key references are loaded in correct order
|
|
314
|
+
* @param options Optional configuration for initialization
|
|
315
|
+
* @param options.tableName Optional table name to initialize. If not provided, initializes all tables
|
|
316
|
+
* @param options.detailedValidate If true, performs detailed validation by inserting rows one by one to catch constraint violations
|
|
317
|
+
* @param options.transform Optional transform function to apply to rows before validation (only applied to the specified tableName)
|
|
318
|
+
* @returns ValidationResult containing validation status, errors, and warnings
|
|
313
319
|
*/
|
|
314
|
-
async initialize() {
|
|
320
|
+
async initialize(options) {
|
|
321
|
+
const allErrors = [];
|
|
322
|
+
const allWarnings = [];
|
|
323
|
+
const tableName = options?.tableName;
|
|
324
|
+
const detailedValidate = options?.detailedValidate ?? false;
|
|
325
|
+
const transform = options?.transform;
|
|
315
326
|
this.tables = await DirectoryScanner.scanDirectory(this.config.dataDir);
|
|
327
|
+
const tablesToLoad = tableName ? [tableName] : Array.from(this.tables.keys());
|
|
328
|
+
for (const tableNameToLoad of tablesToLoad) if (!this.tables.has(tableNameToLoad)) throw new Error(`Table '${tableNameToLoad}' not found in directory '${this.config.dataDir}'`);
|
|
316
329
|
const loadedTables = /* @__PURE__ */ new Set();
|
|
317
330
|
const loadingTables = /* @__PURE__ */ new Set();
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
this.validationSchemas.delete(tableName);
|
|
331
|
+
const attemptedTables = /* @__PURE__ */ new Set();
|
|
332
|
+
for (const tableNameToLoad of tablesToLoad) if (!attemptedTables.has(tableNameToLoad)) {
|
|
333
|
+
const tableTransform = tableNameToLoad === tableName ? transform : void 0;
|
|
334
|
+
const { errors, warnings } = await this.loadTableWithDependencies(tableNameToLoad, loadedTables, loadingTables, attemptedTables, detailedValidate, tableTransform);
|
|
335
|
+
allErrors.push(...errors);
|
|
336
|
+
allWarnings.push(...warnings);
|
|
325
337
|
}
|
|
338
|
+
return {
|
|
339
|
+
valid: allErrors.length === 0,
|
|
340
|
+
errors: allErrors,
|
|
341
|
+
warnings: allWarnings
|
|
342
|
+
};
|
|
326
343
|
}
|
|
327
344
|
/**
|
|
328
345
|
* Load a table and its dependencies recursively
|
|
329
346
|
*/
|
|
330
|
-
async loadTableWithDependencies(tableName, loadedTables, loadingTables) {
|
|
331
|
-
|
|
347
|
+
async loadTableWithDependencies(tableName, loadedTables, loadingTables, attemptedTables, detailedValidate, transform) {
|
|
348
|
+
const errors = [];
|
|
349
|
+
const warnings = [];
|
|
350
|
+
if (attemptedTables.has(tableName)) return {
|
|
351
|
+
errors,
|
|
352
|
+
warnings
|
|
353
|
+
};
|
|
354
|
+
attemptedTables.add(tableName);
|
|
332
355
|
if (loadingTables.has(tableName)) throw new Error(`Circular dependency detected for table '${tableName}'`);
|
|
333
356
|
const tableConfig = this.tables.get(tableName);
|
|
334
357
|
if (!tableConfig) throw new Error(`Table configuration not found for '${tableName}'`);
|
|
@@ -342,21 +365,35 @@ var LinesDB = class LinesDB {
|
|
|
342
365
|
} catch {}
|
|
343
366
|
if (foreignKeys && foreignKeys.length > 0) for (const fk of foreignKeys) {
|
|
344
367
|
const referencedTable = fk.references.table;
|
|
345
|
-
if (
|
|
346
|
-
|
|
368
|
+
if (referencedTable === tableName) continue;
|
|
369
|
+
if (!attemptedTables.has(referencedTable)) if (this.tables.has(referencedTable)) {
|
|
370
|
+
const depResult = await this.loadTableWithDependencies(referencedTable, loadedTables, loadingTables, attemptedTables, detailedValidate, void 0);
|
|
371
|
+
errors.push(...depResult.errors);
|
|
372
|
+
warnings.push(...depResult.warnings);
|
|
373
|
+
} else throw new Error(`Foreign key reference to non-existent table '${referencedTable}' in table '${tableName}'`);
|
|
374
|
+
}
|
|
375
|
+
const { loaded, errors: loadErrors } = await this.loadTable(tableName, tableConfig, detailedValidate, transform);
|
|
376
|
+
errors.push(...loadErrors);
|
|
377
|
+
if (loaded) loadedTables.add(tableName);
|
|
378
|
+
else {
|
|
379
|
+
warnings.push(`Table '${tableName}' was not loaded (no data or skipped)`);
|
|
380
|
+
this.tables.delete(tableName);
|
|
347
381
|
}
|
|
348
|
-
if (await this.loadTable(tableName, tableConfig)) loadedTables.add(tableName);
|
|
349
|
-
else this.tables.delete(tableName);
|
|
350
382
|
} finally {
|
|
351
383
|
loadingTables.delete(tableName);
|
|
352
384
|
}
|
|
385
|
+
return {
|
|
386
|
+
errors,
|
|
387
|
+
warnings
|
|
388
|
+
};
|
|
353
389
|
}
|
|
354
390
|
/**
|
|
355
391
|
* Load a single table from JSONL file
|
|
356
|
-
* @returns
|
|
392
|
+
* @returns Object with loaded status and validation errors
|
|
357
393
|
*/
|
|
358
|
-
async loadTable(tableName, config) {
|
|
359
|
-
|
|
394
|
+
async loadTable(tableName, config, detailedValidate, transform) {
|
|
395
|
+
let data = await JsonlReader.read(config.jsonlPath);
|
|
396
|
+
if (transform) data = data.map((row) => transform(row));
|
|
360
397
|
let validationSchema = config.validationSchema;
|
|
361
398
|
const schemaMetadata = {};
|
|
362
399
|
if (!validationSchema) try {
|
|
@@ -390,18 +427,32 @@ var LinesDB = class LinesDB {
|
|
|
390
427
|
else throw error;
|
|
391
428
|
}
|
|
392
429
|
}
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
}
|
|
430
|
+
const validationErrorDetails = validationErrors.map((ve) => ({
|
|
431
|
+
file: config.jsonlPath,
|
|
432
|
+
tableName,
|
|
433
|
+
rowIndex: ve.rowIndex,
|
|
434
|
+
issues: ve.error.issues,
|
|
435
|
+
type: "schema"
|
|
436
|
+
}));
|
|
437
|
+
if (validationErrors.length > 0) return {
|
|
438
|
+
loaded: false,
|
|
439
|
+
errors: validationErrorDetails
|
|
440
|
+
};
|
|
400
441
|
let schema;
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
schema =
|
|
442
|
+
let inferredSchema;
|
|
443
|
+
if (validatedData.length > 0) inferredSchema = JsonlReader.inferSchema(tableName, validatedData);
|
|
444
|
+
if (config.schema) {
|
|
445
|
+
schema = config.schema;
|
|
446
|
+
if (inferredSchema) for (const inferredCol of inferredSchema.columns) {
|
|
447
|
+
const schemaCol = schema.columns.find((c) => c.name === inferredCol.name);
|
|
448
|
+
if (schemaCol && inferredCol.valueType && !schemaCol.valueType) schemaCol.valueType = inferredCol.valueType;
|
|
449
|
+
}
|
|
450
|
+
} else if (config.autoInferSchema !== false) {
|
|
451
|
+
if (validatedData.length === 0) return {
|
|
452
|
+
loaded: false,
|
|
453
|
+
errors: []
|
|
454
|
+
};
|
|
455
|
+
schema = inferredSchema;
|
|
405
456
|
} else throw new Error(`No schema provided for table ${tableName} and autoInferSchema is disabled`);
|
|
406
457
|
const biSchema = validationSchema;
|
|
407
458
|
const primaryKey = biSchema?.primaryKey || schemaMetadata.primaryKey;
|
|
@@ -418,8 +469,17 @@ var LinesDB = class LinesDB {
|
|
|
418
469
|
if (indexes) schema.indexes = indexes;
|
|
419
470
|
this.schemas.set(tableName, schema);
|
|
420
471
|
this.createTable(schema);
|
|
421
|
-
|
|
422
|
-
|
|
472
|
+
if (detailedValidate) {
|
|
473
|
+
const insertErrors = this.insertDataWithDetailedValidation(tableName, schema, validatedData, config.jsonlPath);
|
|
474
|
+
if (insertErrors.length > 0) return {
|
|
475
|
+
loaded: false,
|
|
476
|
+
errors: insertErrors
|
|
477
|
+
};
|
|
478
|
+
} else this.insertData(tableName, schema, validatedData);
|
|
479
|
+
return {
|
|
480
|
+
loaded: true,
|
|
481
|
+
errors: []
|
|
482
|
+
};
|
|
423
483
|
}
|
|
424
484
|
/**
|
|
425
485
|
* Create table in SQLite with constraints and indexes
|
|
@@ -464,18 +524,85 @@ var LinesDB = class LinesDB {
|
|
|
464
524
|
return `"${identifier.replace(/"/g, "\"\"")}"`;
|
|
465
525
|
}
|
|
466
526
|
/**
|
|
467
|
-
* Insert data into table
|
|
527
|
+
* Insert data into table using batch insert (multiple rows per SQL)
|
|
528
|
+
* SQLite has a parameter limit (default 999), so we batch rows accordingly
|
|
529
|
+
* Throws exception if any constraint violation occurs
|
|
468
530
|
*/
|
|
469
531
|
insertData(tableName, schema, data) {
|
|
532
|
+
if (data.length === 0) return;
|
|
533
|
+
const columnNames = schema.columns.map((col) => col.name);
|
|
534
|
+
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
535
|
+
const columnCount = columnNames.length;
|
|
536
|
+
const maxBatchSize = Math.floor(900 / columnCount);
|
|
537
|
+
const batchSize = Math.max(1, Math.min(maxBatchSize, 100));
|
|
538
|
+
for (let i = 0; i < data.length; i += batchSize) {
|
|
539
|
+
const batch = data.slice(i, i + batchSize);
|
|
540
|
+
const rowPlaceholders = columnNames.map(() => "?").join(", ");
|
|
541
|
+
const valuesPlaceholders = batch.map(() => `(${rowPlaceholders})`).join(", ");
|
|
542
|
+
const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES ${valuesPlaceholders}`;
|
|
543
|
+
const values = [];
|
|
544
|
+
for (const row of batch) for (const col of columnNames) values.push(this.normalizeValue(row[col]));
|
|
545
|
+
this.db.prepare(sql).run(...values);
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
/**
|
|
549
|
+
* Insert data into table one row at a time with detailed error reporting
|
|
550
|
+
* This is used for validation to catch constraint violations
|
|
551
|
+
*/
|
|
552
|
+
insertDataWithDetailedValidation(tableName, schema, data, filePath) {
|
|
553
|
+
const errors = [];
|
|
470
554
|
const columnNames = schema.columns.map((col) => col.name);
|
|
471
555
|
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
472
556
|
const placeholders = columnNames.map(() => "?").join(", ");
|
|
473
557
|
const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
|
|
474
558
|
const stmt = this.db.prepare(sql);
|
|
475
|
-
for (
|
|
476
|
-
const
|
|
477
|
-
|
|
559
|
+
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
560
|
+
const row = data[rowIndex];
|
|
561
|
+
try {
|
|
562
|
+
const values = columnNames.map((col) => this.normalizeValue(row[col]));
|
|
563
|
+
stmt.run(...values);
|
|
564
|
+
} catch (error) {
|
|
565
|
+
const constraintError = this.analyzeConstraintError(error, filePath, tableName, rowIndex, row, schema.foreignKeys || []);
|
|
566
|
+
if (constraintError) errors.push(constraintError);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
return errors;
|
|
570
|
+
}
|
|
571
|
+
/**
|
|
572
|
+
* Analyze constraint error and extract detailed information
|
|
573
|
+
*/
|
|
574
|
+
analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys) {
|
|
575
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
576
|
+
if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
|
|
577
|
+
const fkValue = row[fk.column];
|
|
578
|
+
if (fkValue === null || fkValue === void 0) continue;
|
|
579
|
+
try {
|
|
580
|
+
const result = this.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [this.normalizeValue(fkValue)]);
|
|
581
|
+
if (result.length > 0 && result[0].count === 0) return {
|
|
582
|
+
file,
|
|
583
|
+
tableName,
|
|
584
|
+
rowIndex,
|
|
585
|
+
issues: [],
|
|
586
|
+
type: "foreignKey",
|
|
587
|
+
foreignKeyError: {
|
|
588
|
+
column: fk.column,
|
|
589
|
+
value: fkValue,
|
|
590
|
+
referencedTable: fk.references.table,
|
|
591
|
+
referencedColumn: fk.references.column
|
|
592
|
+
}
|
|
593
|
+
};
|
|
594
|
+
} catch (_) {}
|
|
478
595
|
}
|
|
596
|
+
return {
|
|
597
|
+
file,
|
|
598
|
+
tableName,
|
|
599
|
+
rowIndex,
|
|
600
|
+
issues: [{
|
|
601
|
+
message: errorMessage,
|
|
602
|
+
path: []
|
|
603
|
+
}],
|
|
604
|
+
type: "schema"
|
|
605
|
+
};
|
|
479
606
|
}
|
|
480
607
|
/**
|
|
481
608
|
* Execute a raw SQL query
|
|
@@ -887,9 +1014,13 @@ var LinesDB = class LinesDB {
|
|
|
887
1014
|
/**
|
|
888
1015
|
* Sync database changes back to JSONL files
|
|
889
1016
|
* Uses backward transformation when available
|
|
1017
|
+
* @param tableName Optional table name to sync. If not provided, syncs all loaded tables
|
|
890
1018
|
*/
|
|
891
|
-
async sync() {
|
|
892
|
-
|
|
1019
|
+
async sync(tableName) {
|
|
1020
|
+
if (tableName) {
|
|
1021
|
+
if (!this.schemas.has(tableName)) throw new Error(`Table '${tableName}' is not loaded`);
|
|
1022
|
+
await this.syncTable(tableName);
|
|
1023
|
+
} else for (const [name] of this.schemas) await this.syncTable(name);
|
|
893
1024
|
}
|
|
894
1025
|
/**
|
|
895
1026
|
* Execute a function within a transaction
|
|
@@ -1036,295 +1167,33 @@ function sanitizeIdentifier(value) {
|
|
|
1036
1167
|
return value.replace(/[^A-Za-z0-9_$]/g, "");
|
|
1037
1168
|
}
|
|
1038
1169
|
|
|
1039
|
-
//#endregion
|
|
1040
|
-
//#region src/validator.ts
|
|
1041
|
-
var Validator = class {
|
|
1042
|
-
path;
|
|
1043
|
-
projectRoot;
|
|
1044
|
-
constructor(options) {
|
|
1045
|
-
this.path = options.path;
|
|
1046
|
-
this.projectRoot = options.projectRoot || process.cwd();
|
|
1047
|
-
}
|
|
1048
|
-
/**
|
|
1049
|
-
* Validate JSONL file(s)
|
|
1050
|
-
*/
|
|
1051
|
-
async validate() {
|
|
1052
|
-
const fullPath = this.path.startsWith("/") ? this.path : join(this.projectRoot, this.path);
|
|
1053
|
-
const stats = await stat(fullPath);
|
|
1054
|
-
if (stats.isDirectory()) return this.validateDirectory(fullPath);
|
|
1055
|
-
else if (stats.isFile() && fullPath.endsWith(".jsonl")) return this.validateFile(fullPath);
|
|
1056
|
-
else throw new Error(`Invalid path: ${this.path}. Must be a directory or .jsonl file.`);
|
|
1057
|
-
}
|
|
1058
|
-
/**
|
|
1059
|
-
* Validate all JSONL files in a directory
|
|
1060
|
-
*/
|
|
1061
|
-
async validateDirectory(dirPath) {
|
|
1062
|
-
const jsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
|
|
1063
|
-
if (jsonlFiles.length === 0) throw new Error(`No JSONL files found in directory: ${dirPath}`);
|
|
1064
|
-
const allErrors = [];
|
|
1065
|
-
const allWarnings = [];
|
|
1066
|
-
const filesWithSchema = [];
|
|
1067
|
-
for (const file of jsonlFiles) if (await SchemaLoader.hasSchema(file)) filesWithSchema.push(file);
|
|
1068
|
-
else {
|
|
1069
|
-
const tableName = basename(file, ".jsonl");
|
|
1070
|
-
allWarnings.push(`Skipping validation for '${tableName}': schema file not found`);
|
|
1071
|
-
}
|
|
1072
|
-
for (const file of filesWithSchema) {
|
|
1073
|
-
const result = await this.validateFile(file);
|
|
1074
|
-
allErrors.push(...result.errors);
|
|
1075
|
-
allWarnings.push(...result.warnings);
|
|
1076
|
-
}
|
|
1077
|
-
if (filesWithSchema.length > 0 && allErrors.length === 0) {
|
|
1078
|
-
const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
|
|
1079
|
-
allErrors.push(...dbErrors);
|
|
1080
|
-
}
|
|
1081
|
-
return {
|
|
1082
|
-
valid: allErrors.length === 0,
|
|
1083
|
-
errors: allErrors,
|
|
1084
|
-
warnings: allWarnings
|
|
1085
|
-
};
|
|
1086
|
-
}
|
|
1087
|
-
/**
|
|
1088
|
-
* Validate by loading data into database one row at a time
|
|
1089
|
-
* This catches constraint violations and extracts detailed error information
|
|
1090
|
-
*/
|
|
1091
|
-
async validateWithDatabase(dirPath, jsonlFiles) {
|
|
1092
|
-
const errors = [];
|
|
1093
|
-
try {
|
|
1094
|
-
const db = LinesDB.create({ dataDir: ":memory:" });
|
|
1095
|
-
for (const file of jsonlFiles) {
|
|
1096
|
-
const tableName = basename(file, ".jsonl");
|
|
1097
|
-
const data = await JsonlReader.read(file);
|
|
1098
|
-
let schema;
|
|
1099
|
-
let foreignKeys = [];
|
|
1100
|
-
let indexes = [];
|
|
1101
|
-
let primaryKey;
|
|
1102
|
-
try {
|
|
1103
|
-
schema = await SchemaLoader.loadSchema(file);
|
|
1104
|
-
const { pathToFileURL: pathToFileURL$1 } = await import("node:url");
|
|
1105
|
-
const schemaModule = await import(`${pathToFileURL$1(file.replace(".jsonl", ".schema.ts")).href}?t=${Date.now()}`);
|
|
1106
|
-
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
1107
|
-
if (schemaExport?.foreignKeys) foreignKeys = schemaExport.foreignKeys;
|
|
1108
|
-
if (schemaExport?.indexes) indexes = schemaExport.indexes;
|
|
1109
|
-
if (schemaExport?.primaryKey) primaryKey = schemaExport.primaryKey;
|
|
1110
|
-
} catch (_error) {
|
|
1111
|
-
continue;
|
|
1112
|
-
}
|
|
1113
|
-
try {
|
|
1114
|
-
const tableSchema = this.createTableSchema(tableName, data, schema, foreignKeys, indexes, primaryKey);
|
|
1115
|
-
this.createTableInDb(db, tableSchema);
|
|
1116
|
-
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
1117
|
-
const row = data[rowIndex];
|
|
1118
|
-
try {
|
|
1119
|
-
this.insertRowIntoDb(db, tableName, tableSchema, row);
|
|
1120
|
-
} catch (error) {
|
|
1121
|
-
const constraintError = this.analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db);
|
|
1122
|
-
if (constraintError) errors.push(constraintError);
|
|
1123
|
-
}
|
|
1124
|
-
}
|
|
1125
|
-
} catch (_error) {
|
|
1126
|
-
continue;
|
|
1127
|
-
}
|
|
1128
|
-
}
|
|
1129
|
-
await db.close();
|
|
1130
|
-
} catch (error) {
|
|
1131
|
-
errors.push({
|
|
1132
|
-
file: dirPath,
|
|
1133
|
-
tableName: "database",
|
|
1134
|
-
rowIndex: 0,
|
|
1135
|
-
issues: [{
|
|
1136
|
-
message: `Database initialization failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
1137
|
-
path: []
|
|
1138
|
-
}],
|
|
1139
|
-
type: "schema"
|
|
1140
|
-
});
|
|
1141
|
-
}
|
|
1142
|
-
return errors;
|
|
1143
|
-
}
|
|
1144
|
-
/**
|
|
1145
|
-
* Create table schema from data and validation schema
|
|
1146
|
-
*/
|
|
1147
|
-
createTableSchema(tableName, data, validationSchema, foreignKeys, indexes, primaryKey) {
|
|
1148
|
-
if (data.length === 0) throw new Error(`No data found in ${tableName}`);
|
|
1149
|
-
const schema = JsonlReader.inferSchema(tableName, data);
|
|
1150
|
-
if (primaryKey) {
|
|
1151
|
-
const pkColumn = schema.columns.find((col) => col.name === primaryKey);
|
|
1152
|
-
if (pkColumn) pkColumn.primaryKey = true;
|
|
1153
|
-
} else if (!schema.columns.some((col) => col.primaryKey)) {
|
|
1154
|
-
const idColumn = schema.columns.find((c) => c.name === "id");
|
|
1155
|
-
if (idColumn) idColumn.primaryKey = true;
|
|
1156
|
-
}
|
|
1157
|
-
if (foreignKeys && foreignKeys.length > 0) schema.foreignKeys = foreignKeys;
|
|
1158
|
-
if (indexes && indexes.length > 0) schema.indexes = indexes;
|
|
1159
|
-
return schema;
|
|
1160
|
-
}
|
|
1161
|
-
/**
|
|
1162
|
-
* Create table in database
|
|
1163
|
-
*/
|
|
1164
|
-
createTableInDb(db, schema) {
|
|
1165
|
-
const columns = schema.columns.map((col) => {
|
|
1166
|
-
let colDef = `${this.quoteIdentifier(col.name)} ${col.type.toUpperCase()}`;
|
|
1167
|
-
if (col.primaryKey) colDef += " PRIMARY KEY";
|
|
1168
|
-
return colDef;
|
|
1169
|
-
});
|
|
1170
|
-
if (schema.foreignKeys && schema.foreignKeys.length > 0) for (const fk of schema.foreignKeys) columns.push(`FOREIGN KEY (${this.quoteIdentifier(fk.column)}) REFERENCES ${this.quoteIdentifier(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`);
|
|
1171
|
-
const sql = `CREATE TABLE IF NOT EXISTS ${this.quoteIdentifier(schema.name)} (${columns.join(", ")})`;
|
|
1172
|
-
db.execute(sql);
|
|
1173
|
-
if (schema.indexes && schema.indexes.length > 0) for (const index of schema.indexes) {
|
|
1174
|
-
const indexName = index.name || `idx_${schema.name}_${index.columns.join("_")}`;
|
|
1175
|
-
const uniqueKeyword = index.unique ? "UNIQUE" : "";
|
|
1176
|
-
const indexColumns = index.columns.map((col) => this.quoteIdentifier(col)).join(", ");
|
|
1177
|
-
const indexSql = `CREATE ${uniqueKeyword} INDEX IF NOT EXISTS ${this.quoteIdentifier(indexName)} ON ${this.quoteIdentifier(schema.name)} (${indexColumns})`;
|
|
1178
|
-
db.execute(indexSql);
|
|
1179
|
-
}
|
|
1180
|
-
}
|
|
1181
|
-
/**
|
|
1182
|
-
* Insert a row into database
|
|
1183
|
-
*/
|
|
1184
|
-
insertRowIntoDb(db, tableName, schema, row) {
|
|
1185
|
-
const columnNames = schema.columns.map((col) => col.name);
|
|
1186
|
-
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
1187
|
-
const placeholders = columnNames.map(() => "?").join(", ");
|
|
1188
|
-
const sql = `INSERT INTO ${this.quoteIdentifier(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
|
|
1189
|
-
const values = columnNames.map((col) => {
|
|
1190
|
-
const value = row[col];
|
|
1191
|
-
if (value === null || value === void 0) return null;
|
|
1192
|
-
if (typeof value === "object") return JSON.stringify(value);
|
|
1193
|
-
if (typeof value === "boolean") return value ? 1 : 0;
|
|
1194
|
-
return value;
|
|
1195
|
-
});
|
|
1196
|
-
db.execute(sql, values);
|
|
1197
|
-
}
|
|
1198
|
-
/**
|
|
1199
|
-
* Analyze constraint error and extract detailed information
|
|
1200
|
-
*/
|
|
1201
|
-
analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db) {
|
|
1202
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1203
|
-
if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
|
|
1204
|
-
const fkValue = row[fk.column];
|
|
1205
|
-
if (fkValue === null || fkValue === void 0) continue;
|
|
1206
|
-
try {
|
|
1207
|
-
const result = db.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [fkValue]);
|
|
1208
|
-
if (result.length > 0 && result[0].count === 0) return {
|
|
1209
|
-
file,
|
|
1210
|
-
tableName,
|
|
1211
|
-
rowIndex,
|
|
1212
|
-
issues: [],
|
|
1213
|
-
type: "foreignKey",
|
|
1214
|
-
foreignKeyError: {
|
|
1215
|
-
column: fk.column,
|
|
1216
|
-
value: fkValue,
|
|
1217
|
-
referencedTable: fk.references.table,
|
|
1218
|
-
referencedColumn: fk.references.column
|
|
1219
|
-
}
|
|
1220
|
-
};
|
|
1221
|
-
} catch (_) {}
|
|
1222
|
-
}
|
|
1223
|
-
return {
|
|
1224
|
-
file,
|
|
1225
|
-
tableName,
|
|
1226
|
-
rowIndex,
|
|
1227
|
-
issues: [{
|
|
1228
|
-
message: errorMessage,
|
|
1229
|
-
path: []
|
|
1230
|
-
}],
|
|
1231
|
-
type: "schema"
|
|
1232
|
-
};
|
|
1233
|
-
}
|
|
1234
|
-
/**
|
|
1235
|
-
* Quote SQL identifier
|
|
1236
|
-
*/
|
|
1237
|
-
quoteIdentifier(name) {
|
|
1238
|
-
return `"${name.replace(/"/g, "\"\"")}"`;
|
|
1239
|
-
}
|
|
1240
|
-
/**
|
|
1241
|
-
* Validate a single JSONL file
|
|
1242
|
-
*/
|
|
1243
|
-
async validateFile(filePath) {
|
|
1244
|
-
const tableName = basename(filePath, ".jsonl");
|
|
1245
|
-
const data = await JsonlReader.read(filePath);
|
|
1246
|
-
const schema = await SchemaLoader.loadSchema(filePath);
|
|
1247
|
-
const errors = [];
|
|
1248
|
-
for (let i = 0; i < data.length; i++) {
|
|
1249
|
-
const row = data[i];
|
|
1250
|
-
const result = schema["~standard"].validate(row);
|
|
1251
|
-
if (result instanceof Promise) throw new Error("Asynchronous validation is not supported.");
|
|
1252
|
-
if (result.issues && result.issues.length > 0) errors.push({
|
|
1253
|
-
file: filePath,
|
|
1254
|
-
tableName,
|
|
1255
|
-
rowIndex: i,
|
|
1256
|
-
issues: result.issues,
|
|
1257
|
-
type: "schema"
|
|
1258
|
-
});
|
|
1259
|
-
}
|
|
1260
|
-
if (errors.length === 0) {
|
|
1261
|
-
const dirPath = dirname(filePath);
|
|
1262
|
-
const allJsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
|
|
1263
|
-
const dbErrors = await this.validateWithDatabase(dirPath, allJsonlFiles);
|
|
1264
|
-
errors.push(...dbErrors.filter((e) => e.file === filePath));
|
|
1265
|
-
}
|
|
1266
|
-
return {
|
|
1267
|
-
valid: errors.length === 0,
|
|
1268
|
-
errors,
|
|
1269
|
-
warnings: []
|
|
1270
|
-
};
|
|
1271
|
-
}
|
|
1272
|
-
};
|
|
1273
|
-
|
|
1274
1170
|
//#endregion
|
|
1275
1171
|
//#region src/jsonl-migration.ts
|
|
1276
1172
|
/**
|
|
1277
1173
|
* Validate a table by temporarily supplying in-memory rows while reusing the existing LinesDB validation pipeline.
|
|
1278
|
-
* If validation fails,
|
|
1174
|
+
* If validation fails, throws an error with validation details.
|
|
1279
1175
|
*/
|
|
1280
1176
|
async function ensureTableRowsValid(options) {
|
|
1281
|
-
console.log("[ensureTableRowsValid] START");
|
|
1282
|
-
console.log("[ensureTableRowsValid] dataDir:", options.dataDir);
|
|
1283
|
-
console.log("[ensureTableRowsValid] tableName:", options.tableName);
|
|
1284
|
-
console.log("[ensureTableRowsValid] rows count:", options.rows.length);
|
|
1285
1177
|
const tablePath = join(options.dataDir, `${options.tableName}.jsonl`);
|
|
1286
1178
|
const overrides = new Map([[tablePath, options.rows]]);
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
}
|
|
1299
|
-
};
|
|
1300
|
-
try {
|
|
1301
|
-
console.log("[ensureTableRowsValid] Calling JsonlReader.withOverrides");
|
|
1302
|
-
await JsonlReader.withOverrides(overrides, async () => {
|
|
1303
|
-
console.log("[ensureTableRowsValid] Inside withOverrides callback");
|
|
1304
|
-
const db = LinesDB.create({ dataDir: options.dataDir });
|
|
1305
|
-
console.log("[ensureTableRowsValid] LinesDB created");
|
|
1306
|
-
try {
|
|
1307
|
-
console.log("[ensureTableRowsValid] Calling db.initialize()");
|
|
1308
|
-
await db.initialize();
|
|
1309
|
-
console.log("[ensureTableRowsValid] db.initialize() completed");
|
|
1310
|
-
} finally {
|
|
1311
|
-
console.log("[ensureTableRowsValid] Calling db.close()");
|
|
1312
|
-
await db.close();
|
|
1179
|
+
await JsonlReader.withOverrides(overrides, async () => {
|
|
1180
|
+
const db = LinesDB.create({ dataDir: options.dataDir });
|
|
1181
|
+
try {
|
|
1182
|
+
const result = await db.initialize({ tableName: options.tableName });
|
|
1183
|
+
if (!result.valid) {
|
|
1184
|
+
const errorCount = result.errors.length;
|
|
1185
|
+
const errorDetails = result.errors.map((e) => {
|
|
1186
|
+
const issueMessages = e.issues.map((issue) => issue.message).join(", ");
|
|
1187
|
+
return ` Row ${e.rowIndex}: ${issueMessages}`;
|
|
1188
|
+
}).join("\n");
|
|
1189
|
+
throw new Error(`Validation failed for table '${options.tableName}' (${errorCount} error(s)):\n${errorDetails}`);
|
|
1313
1190
|
}
|
|
1314
|
-
}
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
}
|
|
1319
|
-
console.log("[ensureTableRowsValid] Warnings captured:", warnMessages.length);
|
|
1320
|
-
console.log("[ensureTableRowsValid] capturedError:", capturedError ? "YES" : "NO");
|
|
1321
|
-
if (capturedError) {
|
|
1322
|
-
console.log("[ensureTableRowsValid] Throwing captured error");
|
|
1323
|
-
throw capturedError;
|
|
1324
|
-
}
|
|
1325
|
-
console.log("[ensureTableRowsValid] END (success)");
|
|
1191
|
+
} finally {
|
|
1192
|
+
await db.close();
|
|
1193
|
+
}
|
|
1194
|
+
});
|
|
1326
1195
|
}
|
|
1327
1196
|
|
|
1328
1197
|
//#endregion
|
|
1329
|
-
export { DirectoryScanner, JsonlReader, JsonlWriter, LinesDB, RUNTIME, SchemaLoader, TypeGenerator,
|
|
1198
|
+
export { DirectoryScanner, JsonlReader, JsonlWriter, LinesDB, RUNTIME, SchemaLoader, TypeGenerator, defineSchema, detectRuntime, ensureTableRowsValid, hasBackward };
|
|
1330
1199
|
//# sourceMappingURL=index.js.map
|