@toiroakr/lines-db 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/bin/cli.js +400 -418
- package/dist/index.cjs +217 -330
- package/dist/index.d.cts +64 -84
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.ts +64 -84
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +219 -331
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
- package/src/cli.ts +226 -126
- package/src/database.ts +342 -53
- package/src/index.ts +2 -2
- package/src/jsonl-migration.ts +24 -56
- package/src/schema.ts +37 -32
- package/src/types.ts +21 -0
- package/src/validator.test.ts +0 -507
- package/src/validator.ts +0 -441
package/dist/index.cjs
CHANGED
|
@@ -261,8 +261,7 @@ var DirectoryScanner = class {
|
|
|
261
261
|
* Define a bidirectional schema with optional backward transformation
|
|
262
262
|
*
|
|
263
263
|
* @param schema - Standard Schema for validation
|
|
264
|
-
* @param
|
|
265
|
-
* Required when schema performs transformations
|
|
264
|
+
* @param options - SchemaOptions object. When Input and Output types differ, backward transformation is required
|
|
266
265
|
*
|
|
267
266
|
* @example
|
|
268
267
|
* // No transformation - backward not needed
|
|
@@ -271,10 +270,12 @@ var DirectoryScanner = class {
|
|
|
271
270
|
* );
|
|
272
271
|
*
|
|
273
272
|
* @example
|
|
274
|
-
* // With transformation - backward
|
|
273
|
+
* // With transformation - backward REQUIRED
|
|
275
274
|
* const schema = defineSchema(
|
|
276
275
|
* v.pipe(v.string(), v.transform(Number)),
|
|
277
|
-
*
|
|
276
|
+
* {
|
|
277
|
+
* backward: (num) => String(num) // backward: number → string (REQUIRED)
|
|
278
|
+
* }
|
|
278
279
|
* );
|
|
279
280
|
*
|
|
280
281
|
* @example
|
|
@@ -289,14 +290,14 @@ var DirectoryScanner = class {
|
|
|
289
290
|
* }
|
|
290
291
|
* );
|
|
291
292
|
*/
|
|
292
|
-
function defineSchema(schema,
|
|
293
|
+
function defineSchema(schema, ...args) {
|
|
294
|
+
const options = args[0];
|
|
293
295
|
const bidirectionalSchema = Object.create(schema);
|
|
294
|
-
if (
|
|
295
|
-
|
|
296
|
-
if (
|
|
297
|
-
if (
|
|
298
|
-
if (
|
|
299
|
-
if (optionsOrBackward.indexes) bidirectionalSchema.indexes = optionsOrBackward.indexes;
|
|
296
|
+
if (options) {
|
|
297
|
+
if (options.backward) bidirectionalSchema.backward = options.backward;
|
|
298
|
+
if (options.primaryKey) bidirectionalSchema.primaryKey = options.primaryKey;
|
|
299
|
+
if (options.foreignKeys) bidirectionalSchema.foreignKeys = options.foreignKeys;
|
|
300
|
+
if (options.indexes) bidirectionalSchema.indexes = options.indexes;
|
|
300
301
|
}
|
|
301
302
|
Object.defineProperty(bidirectionalSchema, "~standard", {
|
|
302
303
|
value: schema["~standard"],
|
|
@@ -329,27 +330,49 @@ var LinesDB = class LinesDB {
|
|
|
329
330
|
return new LinesDB(config, dbPath);
|
|
330
331
|
}
|
|
331
332
|
/**
|
|
332
|
-
* Initialize database by loading all JSONL files
|
|
333
|
+
* Initialize database by loading all JSONL files or a specific table
|
|
333
334
|
* Uses dependency resolution to ensure foreign key references are loaded in correct order
|
|
335
|
+
* @param options Optional configuration for initialization
|
|
336
|
+
* @param options.tableName Optional table name to initialize. If not provided, initializes all tables
|
|
337
|
+
* @param options.detailedValidate If true, performs detailed validation by inserting rows one by one to catch constraint violations
|
|
338
|
+
* @param options.transform Optional transform function to apply to rows before validation (only applied to the specified tableName)
|
|
339
|
+
* @returns ValidationResult containing validation status, errors, and warnings
|
|
334
340
|
*/
|
|
335
|
-
async initialize() {
|
|
341
|
+
async initialize(options) {
|
|
342
|
+
const allErrors = [];
|
|
343
|
+
const allWarnings = [];
|
|
344
|
+
const tableName = options?.tableName;
|
|
345
|
+
const detailedValidate = options?.detailedValidate ?? false;
|
|
346
|
+
const transform = options?.transform;
|
|
336
347
|
this.tables = await DirectoryScanner.scanDirectory(this.config.dataDir);
|
|
348
|
+
const tablesToLoad = tableName ? [tableName] : Array.from(this.tables.keys());
|
|
349
|
+
for (const tableNameToLoad of tablesToLoad) if (!this.tables.has(tableNameToLoad)) throw new Error(`Table '${tableNameToLoad}' not found in directory '${this.config.dataDir}'`);
|
|
337
350
|
const loadedTables = /* @__PURE__ */ new Set();
|
|
338
351
|
const loadingTables = /* @__PURE__ */ new Set();
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
this.validationSchemas.delete(tableName);
|
|
352
|
+
const attemptedTables = /* @__PURE__ */ new Set();
|
|
353
|
+
for (const tableNameToLoad of tablesToLoad) if (!attemptedTables.has(tableNameToLoad)) {
|
|
354
|
+
const tableTransform = tableNameToLoad === tableName ? transform : void 0;
|
|
355
|
+
const { errors, warnings } = await this.loadTableWithDependencies(tableNameToLoad, loadedTables, loadingTables, attemptedTables, detailedValidate, tableTransform);
|
|
356
|
+
allErrors.push(...errors);
|
|
357
|
+
allWarnings.push(...warnings);
|
|
346
358
|
}
|
|
359
|
+
return {
|
|
360
|
+
valid: allErrors.length === 0,
|
|
361
|
+
errors: allErrors,
|
|
362
|
+
warnings: allWarnings
|
|
363
|
+
};
|
|
347
364
|
}
|
|
348
365
|
/**
|
|
349
366
|
* Load a table and its dependencies recursively
|
|
350
367
|
*/
|
|
351
|
-
async loadTableWithDependencies(tableName, loadedTables, loadingTables) {
|
|
352
|
-
|
|
368
|
+
async loadTableWithDependencies(tableName, loadedTables, loadingTables, attemptedTables, detailedValidate, transform) {
|
|
369
|
+
const errors = [];
|
|
370
|
+
const warnings = [];
|
|
371
|
+
if (attemptedTables.has(tableName)) return {
|
|
372
|
+
errors,
|
|
373
|
+
warnings
|
|
374
|
+
};
|
|
375
|
+
attemptedTables.add(tableName);
|
|
353
376
|
if (loadingTables.has(tableName)) throw new Error(`Circular dependency detected for table '${tableName}'`);
|
|
354
377
|
const tableConfig = this.tables.get(tableName);
|
|
355
378
|
if (!tableConfig) throw new Error(`Table configuration not found for '${tableName}'`);
|
|
@@ -363,21 +386,35 @@ var LinesDB = class LinesDB {
|
|
|
363
386
|
} catch {}
|
|
364
387
|
if (foreignKeys && foreignKeys.length > 0) for (const fk of foreignKeys) {
|
|
365
388
|
const referencedTable = fk.references.table;
|
|
366
|
-
if (
|
|
367
|
-
|
|
389
|
+
if (referencedTable === tableName) continue;
|
|
390
|
+
if (!attemptedTables.has(referencedTable)) if (this.tables.has(referencedTable)) {
|
|
391
|
+
const depResult = await this.loadTableWithDependencies(referencedTable, loadedTables, loadingTables, attemptedTables, detailedValidate, void 0);
|
|
392
|
+
errors.push(...depResult.errors);
|
|
393
|
+
warnings.push(...depResult.warnings);
|
|
394
|
+
} else throw new Error(`Foreign key reference to non-existent table '${referencedTable}' in table '${tableName}'`);
|
|
395
|
+
}
|
|
396
|
+
const { loaded, errors: loadErrors } = await this.loadTable(tableName, tableConfig, detailedValidate, transform);
|
|
397
|
+
errors.push(...loadErrors);
|
|
398
|
+
if (loaded) loadedTables.add(tableName);
|
|
399
|
+
else {
|
|
400
|
+
warnings.push(`Table '${tableName}' was not loaded (no data or skipped)`);
|
|
401
|
+
this.tables.delete(tableName);
|
|
368
402
|
}
|
|
369
|
-
if (await this.loadTable(tableName, tableConfig)) loadedTables.add(tableName);
|
|
370
|
-
else this.tables.delete(tableName);
|
|
371
403
|
} finally {
|
|
372
404
|
loadingTables.delete(tableName);
|
|
373
405
|
}
|
|
406
|
+
return {
|
|
407
|
+
errors,
|
|
408
|
+
warnings
|
|
409
|
+
};
|
|
374
410
|
}
|
|
375
411
|
/**
|
|
376
412
|
* Load a single table from JSONL file
|
|
377
|
-
* @returns
|
|
413
|
+
* @returns Object with loaded status and validation errors
|
|
378
414
|
*/
|
|
379
|
-
async loadTable(tableName, config) {
|
|
380
|
-
|
|
415
|
+
async loadTable(tableName, config, detailedValidate, transform) {
|
|
416
|
+
let data = await JsonlReader.read(config.jsonlPath);
|
|
417
|
+
if (transform) data = data.map((row) => transform(row));
|
|
381
418
|
let validationSchema = config.validationSchema;
|
|
382
419
|
const schemaMetadata = {};
|
|
383
420
|
if (!validationSchema) try {
|
|
@@ -393,7 +430,15 @@ var LinesDB = class LinesDB {
|
|
|
393
430
|
else if (schemaModule.foreignKeys) schemaMetadata.foreignKeys = schemaModule.foreignKeys;
|
|
394
431
|
if (schemaExport?.indexes) schemaMetadata.indexes = schemaExport.indexes;
|
|
395
432
|
else if (schemaModule.indexes) schemaMetadata.indexes = schemaModule.indexes;
|
|
396
|
-
|
|
433
|
+
if (process.env.DEBUG_LINES_DB) {
|
|
434
|
+
console.log(`[lines-db] Schema metadata for ${tableName}:`);
|
|
435
|
+
console.log(` primaryKey: ${schemaMetadata.primaryKey}`);
|
|
436
|
+
console.log(` foreignKeys: ${JSON.stringify(schemaMetadata.foreignKeys)}`);
|
|
437
|
+
console.log(` indexes: ${JSON.stringify(schemaMetadata.indexes)}`);
|
|
438
|
+
}
|
|
439
|
+
} catch (_error) {
|
|
440
|
+
if (process.env.DEBUG_LINES_DB) console.warn(`[lines-db] Failed to load schema metadata for ${tableName}:`, _error instanceof Error ? _error.message : String(_error));
|
|
441
|
+
}
|
|
397
442
|
this.validationSchemas.set(tableName, validationSchema);
|
|
398
443
|
const validationErrors = [];
|
|
399
444
|
const validatedData = [];
|
|
@@ -411,18 +456,32 @@ var LinesDB = class LinesDB {
|
|
|
411
456
|
else throw error;
|
|
412
457
|
}
|
|
413
458
|
}
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
}
|
|
459
|
+
const validationErrorDetails = validationErrors.map((ve) => ({
|
|
460
|
+
file: config.jsonlPath,
|
|
461
|
+
tableName,
|
|
462
|
+
rowIndex: ve.rowIndex,
|
|
463
|
+
issues: ve.error.issues,
|
|
464
|
+
type: "schema"
|
|
465
|
+
}));
|
|
466
|
+
if (validationErrors.length > 0) return {
|
|
467
|
+
loaded: false,
|
|
468
|
+
errors: validationErrorDetails
|
|
469
|
+
};
|
|
421
470
|
let schema;
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
schema =
|
|
471
|
+
let inferredSchema;
|
|
472
|
+
if (validatedData.length > 0) inferredSchema = JsonlReader.inferSchema(tableName, validatedData);
|
|
473
|
+
if (config.schema) {
|
|
474
|
+
schema = config.schema;
|
|
475
|
+
if (inferredSchema) for (const inferredCol of inferredSchema.columns) {
|
|
476
|
+
const schemaCol = schema.columns.find((c) => c.name === inferredCol.name);
|
|
477
|
+
if (schemaCol && inferredCol.valueType && !schemaCol.valueType) schemaCol.valueType = inferredCol.valueType;
|
|
478
|
+
}
|
|
479
|
+
} else if (config.autoInferSchema !== false) {
|
|
480
|
+
if (validatedData.length === 0) return {
|
|
481
|
+
loaded: false,
|
|
482
|
+
errors: []
|
|
483
|
+
};
|
|
484
|
+
schema = inferredSchema;
|
|
426
485
|
} else throw new Error(`No schema provided for table ${tableName} and autoInferSchema is disabled`);
|
|
427
486
|
const biSchema = validationSchema;
|
|
428
487
|
const primaryKey = biSchema?.primaryKey || schemaMetadata.primaryKey;
|
|
@@ -436,23 +495,43 @@ var LinesDB = class LinesDB {
|
|
|
436
495
|
if (idColumn) idColumn.primaryKey = true;
|
|
437
496
|
}
|
|
438
497
|
if (foreignKeys) schema.foreignKeys = foreignKeys;
|
|
439
|
-
if (indexes)
|
|
498
|
+
if (indexes) {
|
|
499
|
+
schema.indexes = indexes;
|
|
500
|
+
for (const index of indexes) if (index.unique && index.columns.length === 1) {
|
|
501
|
+
const col = schema.columns.find((c) => c.name === index.columns[0]);
|
|
502
|
+
if (col && !col.unique && !col.primaryKey) col.unique = true;
|
|
503
|
+
}
|
|
504
|
+
}
|
|
440
505
|
this.schemas.set(tableName, schema);
|
|
441
506
|
this.createTable(schema);
|
|
442
|
-
|
|
443
|
-
|
|
507
|
+
if (detailedValidate) {
|
|
508
|
+
const insertErrors = this.insertDataWithDetailedValidation(tableName, schema, validatedData, config.jsonlPath);
|
|
509
|
+
if (insertErrors.length > 0) return {
|
|
510
|
+
loaded: false,
|
|
511
|
+
errors: insertErrors
|
|
512
|
+
};
|
|
513
|
+
} else this.insertData(tableName, schema, validatedData);
|
|
514
|
+
return {
|
|
515
|
+
loaded: true,
|
|
516
|
+
errors: []
|
|
517
|
+
};
|
|
444
518
|
}
|
|
445
519
|
/**
|
|
446
520
|
* Create table in SQLite with constraints and indexes
|
|
447
521
|
*/
|
|
448
522
|
createTable(schema) {
|
|
449
523
|
const quotedTableName = this.quoteTableName(schema.name);
|
|
524
|
+
const uniqueColumns = /* @__PURE__ */ new Set();
|
|
525
|
+
for (const col of schema.columns) if (col.unique) uniqueColumns.add(col.name);
|
|
526
|
+
if (schema.indexes) {
|
|
527
|
+
for (const index of schema.indexes) if (index.unique && index.columns.length === 1) uniqueColumns.add(index.columns[0]);
|
|
528
|
+
}
|
|
450
529
|
const columnDefs = schema.columns.map((col) => {
|
|
451
530
|
const sqlType = col.type === "JSON" ? "TEXT" : col.type;
|
|
452
531
|
const parts = [this.quoteIdentifier(col.name), sqlType];
|
|
453
532
|
if (col.primaryKey) parts.push("PRIMARY KEY");
|
|
454
533
|
if (col.notNull) parts.push("NOT NULL");
|
|
455
|
-
if (col.
|
|
534
|
+
if (uniqueColumns.has(col.name) && !col.primaryKey) parts.push("UNIQUE");
|
|
456
535
|
return parts.join(" ");
|
|
457
536
|
});
|
|
458
537
|
const foreignKeyDefs = [];
|
|
@@ -485,18 +564,85 @@ var LinesDB = class LinesDB {
|
|
|
485
564
|
return `"${identifier.replace(/"/g, "\"\"")}"`;
|
|
486
565
|
}
|
|
487
566
|
/**
|
|
488
|
-
* Insert data into table
|
|
567
|
+
* Insert data into table using batch insert (multiple rows per SQL)
|
|
568
|
+
* SQLite has a parameter limit (default 999), so we batch rows accordingly
|
|
569
|
+
* Throws exception if any constraint violation occurs
|
|
489
570
|
*/
|
|
490
571
|
insertData(tableName, schema, data) {
|
|
572
|
+
if (data.length === 0) return;
|
|
573
|
+
const columnNames = schema.columns.map((col) => col.name);
|
|
574
|
+
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
575
|
+
const columnCount = columnNames.length;
|
|
576
|
+
const maxBatchSize = Math.floor(900 / columnCount);
|
|
577
|
+
const batchSize = Math.max(1, Math.min(maxBatchSize, 100));
|
|
578
|
+
for (let i = 0; i < data.length; i += batchSize) {
|
|
579
|
+
const batch = data.slice(i, i + batchSize);
|
|
580
|
+
const rowPlaceholders = columnNames.map(() => "?").join(", ");
|
|
581
|
+
const valuesPlaceholders = batch.map(() => `(${rowPlaceholders})`).join(", ");
|
|
582
|
+
const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES ${valuesPlaceholders}`;
|
|
583
|
+
const values = [];
|
|
584
|
+
for (const row of batch) for (const col of columnNames) values.push(this.normalizeValue(row[col]));
|
|
585
|
+
this.db.prepare(sql).run(...values);
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
/**
|
|
589
|
+
* Insert data into table one row at a time with detailed error reporting
|
|
590
|
+
* This is used for validation to catch constraint violations
|
|
591
|
+
*/
|
|
592
|
+
insertDataWithDetailedValidation(tableName, schema, data, filePath) {
|
|
593
|
+
const errors = [];
|
|
491
594
|
const columnNames = schema.columns.map((col) => col.name);
|
|
492
595
|
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
493
596
|
const placeholders = columnNames.map(() => "?").join(", ");
|
|
494
597
|
const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
|
|
495
598
|
const stmt = this.db.prepare(sql);
|
|
496
|
-
for (
|
|
497
|
-
const
|
|
498
|
-
|
|
599
|
+
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
600
|
+
const row = data[rowIndex];
|
|
601
|
+
try {
|
|
602
|
+
const values = columnNames.map((col) => this.normalizeValue(row[col]));
|
|
603
|
+
stmt.run(...values);
|
|
604
|
+
} catch (error) {
|
|
605
|
+
const constraintError = this.analyzeConstraintError(error, filePath, tableName, rowIndex, row, schema.foreignKeys || []);
|
|
606
|
+
if (constraintError) errors.push(constraintError);
|
|
607
|
+
}
|
|
499
608
|
}
|
|
609
|
+
return errors;
|
|
610
|
+
}
|
|
611
|
+
/**
|
|
612
|
+
* Analyze constraint error and extract detailed information
|
|
613
|
+
*/
|
|
614
|
+
analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys) {
|
|
615
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
616
|
+
if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
|
|
617
|
+
const fkValue = row[fk.column];
|
|
618
|
+
if (fkValue === null || fkValue === void 0) continue;
|
|
619
|
+
try {
|
|
620
|
+
const result = this.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [this.normalizeValue(fkValue)]);
|
|
621
|
+
if (result.length > 0 && result[0].count === 0) return {
|
|
622
|
+
file,
|
|
623
|
+
tableName,
|
|
624
|
+
rowIndex,
|
|
625
|
+
issues: [],
|
|
626
|
+
type: "foreignKey",
|
|
627
|
+
foreignKeyError: {
|
|
628
|
+
column: fk.column,
|
|
629
|
+
value: fkValue,
|
|
630
|
+
referencedTable: fk.references.table,
|
|
631
|
+
referencedColumn: fk.references.column
|
|
632
|
+
}
|
|
633
|
+
};
|
|
634
|
+
} catch (_) {}
|
|
635
|
+
}
|
|
636
|
+
return {
|
|
637
|
+
file,
|
|
638
|
+
tableName,
|
|
639
|
+
rowIndex,
|
|
640
|
+
issues: [{
|
|
641
|
+
message: errorMessage,
|
|
642
|
+
path: []
|
|
643
|
+
}],
|
|
644
|
+
type: "schema"
|
|
645
|
+
};
|
|
500
646
|
}
|
|
501
647
|
/**
|
|
502
648
|
* Execute a raw SQL query
|
|
@@ -908,9 +1054,13 @@ var LinesDB = class LinesDB {
|
|
|
908
1054
|
/**
|
|
909
1055
|
* Sync database changes back to JSONL files
|
|
910
1056
|
* Uses backward transformation when available
|
|
1057
|
+
* @param tableName Optional table name to sync. If not provided, syncs all loaded tables
|
|
911
1058
|
*/
|
|
912
|
-
async sync() {
|
|
913
|
-
|
|
1059
|
+
async sync(tableName) {
|
|
1060
|
+
if (tableName) {
|
|
1061
|
+
if (!this.schemas.has(tableName)) throw new Error(`Table '${tableName}' is not loaded`);
|
|
1062
|
+
await this.syncTable(tableName);
|
|
1063
|
+
} else for (const [name] of this.schemas) await this.syncTable(name);
|
|
914
1064
|
}
|
|
915
1065
|
/**
|
|
916
1066
|
* Execute a function within a transaction
|
|
@@ -1057,293 +1207,31 @@ function sanitizeIdentifier(value) {
|
|
|
1057
1207
|
return value.replace(/[^A-Za-z0-9_$]/g, "");
|
|
1058
1208
|
}
|
|
1059
1209
|
|
|
1060
|
-
//#endregion
|
|
1061
|
-
//#region src/validator.ts
|
|
1062
|
-
var Validator = class {
|
|
1063
|
-
path;
|
|
1064
|
-
projectRoot;
|
|
1065
|
-
constructor(options) {
|
|
1066
|
-
this.path = options.path;
|
|
1067
|
-
this.projectRoot = options.projectRoot || process.cwd();
|
|
1068
|
-
}
|
|
1069
|
-
/**
|
|
1070
|
-
* Validate JSONL file(s)
|
|
1071
|
-
*/
|
|
1072
|
-
async validate() {
|
|
1073
|
-
const fullPath = this.path.startsWith("/") ? this.path : (0, node_path.join)(this.projectRoot, this.path);
|
|
1074
|
-
const stats = await (0, node_fs_promises.stat)(fullPath);
|
|
1075
|
-
if (stats.isDirectory()) return this.validateDirectory(fullPath);
|
|
1076
|
-
else if (stats.isFile() && fullPath.endsWith(".jsonl")) return this.validateFile(fullPath);
|
|
1077
|
-
else throw new Error(`Invalid path: ${this.path}. Must be a directory or .jsonl file.`);
|
|
1078
|
-
}
|
|
1079
|
-
/**
|
|
1080
|
-
* Validate all JSONL files in a directory
|
|
1081
|
-
*/
|
|
1082
|
-
async validateDirectory(dirPath) {
|
|
1083
|
-
const jsonlFiles = (await (0, node_fs_promises.readdir)(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => (0, node_path.join)(dirPath, entry.name));
|
|
1084
|
-
if (jsonlFiles.length === 0) throw new Error(`No JSONL files found in directory: ${dirPath}`);
|
|
1085
|
-
const allErrors = [];
|
|
1086
|
-
const allWarnings = [];
|
|
1087
|
-
const filesWithSchema = [];
|
|
1088
|
-
for (const file of jsonlFiles) if (await SchemaLoader.hasSchema(file)) filesWithSchema.push(file);
|
|
1089
|
-
else {
|
|
1090
|
-
const tableName = (0, node_path.basename)(file, ".jsonl");
|
|
1091
|
-
allWarnings.push(`Skipping validation for '${tableName}': schema file not found`);
|
|
1092
|
-
}
|
|
1093
|
-
for (const file of filesWithSchema) {
|
|
1094
|
-
const result = await this.validateFile(file);
|
|
1095
|
-
allErrors.push(...result.errors);
|
|
1096
|
-
allWarnings.push(...result.warnings);
|
|
1097
|
-
}
|
|
1098
|
-
if (filesWithSchema.length > 0 && allErrors.length === 0) {
|
|
1099
|
-
const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
|
|
1100
|
-
allErrors.push(...dbErrors);
|
|
1101
|
-
}
|
|
1102
|
-
return {
|
|
1103
|
-
valid: allErrors.length === 0,
|
|
1104
|
-
errors: allErrors,
|
|
1105
|
-
warnings: allWarnings
|
|
1106
|
-
};
|
|
1107
|
-
}
|
|
1108
|
-
/**
|
|
1109
|
-
* Validate by loading data into database one row at a time
|
|
1110
|
-
* This catches constraint violations and extracts detailed error information
|
|
1111
|
-
*/
|
|
1112
|
-
async validateWithDatabase(dirPath, jsonlFiles) {
|
|
1113
|
-
const errors = [];
|
|
1114
|
-
try {
|
|
1115
|
-
const db = LinesDB.create({ dataDir: ":memory:" });
|
|
1116
|
-
for (const file of jsonlFiles) {
|
|
1117
|
-
const tableName = (0, node_path.basename)(file, ".jsonl");
|
|
1118
|
-
const data = await JsonlReader.read(file);
|
|
1119
|
-
let schema;
|
|
1120
|
-
let foreignKeys = [];
|
|
1121
|
-
let indexes = [];
|
|
1122
|
-
let primaryKey;
|
|
1123
|
-
try {
|
|
1124
|
-
schema = await SchemaLoader.loadSchema(file);
|
|
1125
|
-
const { pathToFileURL: pathToFileURL$1 } = await import("node:url");
|
|
1126
|
-
const schemaModule = await import(`${pathToFileURL$1(file.replace(".jsonl", ".schema.ts")).href}?t=${Date.now()}`);
|
|
1127
|
-
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
1128
|
-
if (schemaExport?.foreignKeys) foreignKeys = schemaExport.foreignKeys;
|
|
1129
|
-
if (schemaExport?.indexes) indexes = schemaExport.indexes;
|
|
1130
|
-
if (schemaExport?.primaryKey) primaryKey = schemaExport.primaryKey;
|
|
1131
|
-
} catch (_error) {
|
|
1132
|
-
continue;
|
|
1133
|
-
}
|
|
1134
|
-
try {
|
|
1135
|
-
const tableSchema = this.createTableSchema(tableName, data, schema, foreignKeys, indexes, primaryKey);
|
|
1136
|
-
this.createTableInDb(db, tableSchema);
|
|
1137
|
-
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
1138
|
-
const row = data[rowIndex];
|
|
1139
|
-
try {
|
|
1140
|
-
this.insertRowIntoDb(db, tableName, tableSchema, row);
|
|
1141
|
-
} catch (error) {
|
|
1142
|
-
const constraintError = this.analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db);
|
|
1143
|
-
if (constraintError) errors.push(constraintError);
|
|
1144
|
-
}
|
|
1145
|
-
}
|
|
1146
|
-
} catch (_error) {
|
|
1147
|
-
continue;
|
|
1148
|
-
}
|
|
1149
|
-
}
|
|
1150
|
-
await db.close();
|
|
1151
|
-
} catch (error) {
|
|
1152
|
-
errors.push({
|
|
1153
|
-
file: dirPath,
|
|
1154
|
-
tableName: "database",
|
|
1155
|
-
rowIndex: 0,
|
|
1156
|
-
issues: [{
|
|
1157
|
-
message: `Database initialization failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
1158
|
-
path: []
|
|
1159
|
-
}],
|
|
1160
|
-
type: "schema"
|
|
1161
|
-
});
|
|
1162
|
-
}
|
|
1163
|
-
return errors;
|
|
1164
|
-
}
|
|
1165
|
-
/**
|
|
1166
|
-
* Create table schema from data and validation schema
|
|
1167
|
-
*/
|
|
1168
|
-
createTableSchema(tableName, data, validationSchema, foreignKeys, indexes, primaryKey) {
|
|
1169
|
-
if (data.length === 0) throw new Error(`No data found in ${tableName}`);
|
|
1170
|
-
const schema = JsonlReader.inferSchema(tableName, data);
|
|
1171
|
-
if (primaryKey) {
|
|
1172
|
-
const pkColumn = schema.columns.find((col) => col.name === primaryKey);
|
|
1173
|
-
if (pkColumn) pkColumn.primaryKey = true;
|
|
1174
|
-
} else if (!schema.columns.some((col) => col.primaryKey)) {
|
|
1175
|
-
const idColumn = schema.columns.find((c) => c.name === "id");
|
|
1176
|
-
if (idColumn) idColumn.primaryKey = true;
|
|
1177
|
-
}
|
|
1178
|
-
if (foreignKeys && foreignKeys.length > 0) schema.foreignKeys = foreignKeys;
|
|
1179
|
-
if (indexes && indexes.length > 0) schema.indexes = indexes;
|
|
1180
|
-
return schema;
|
|
1181
|
-
}
|
|
1182
|
-
/**
|
|
1183
|
-
* Create table in database
|
|
1184
|
-
*/
|
|
1185
|
-
createTableInDb(db, schema) {
|
|
1186
|
-
const columns = schema.columns.map((col) => {
|
|
1187
|
-
let colDef = `${this.quoteIdentifier(col.name)} ${col.type.toUpperCase()}`;
|
|
1188
|
-
if (col.primaryKey) colDef += " PRIMARY KEY";
|
|
1189
|
-
return colDef;
|
|
1190
|
-
});
|
|
1191
|
-
if (schema.foreignKeys && schema.foreignKeys.length > 0) for (const fk of schema.foreignKeys) columns.push(`FOREIGN KEY (${this.quoteIdentifier(fk.column)}) REFERENCES ${this.quoteIdentifier(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`);
|
|
1192
|
-
const sql = `CREATE TABLE IF NOT EXISTS ${this.quoteIdentifier(schema.name)} (${columns.join(", ")})`;
|
|
1193
|
-
db.execute(sql);
|
|
1194
|
-
if (schema.indexes && schema.indexes.length > 0) for (const index of schema.indexes) {
|
|
1195
|
-
const indexName = index.name || `idx_${schema.name}_${index.columns.join("_")}`;
|
|
1196
|
-
const uniqueKeyword = index.unique ? "UNIQUE" : "";
|
|
1197
|
-
const indexColumns = index.columns.map((col) => this.quoteIdentifier(col)).join(", ");
|
|
1198
|
-
const indexSql = `CREATE ${uniqueKeyword} INDEX IF NOT EXISTS ${this.quoteIdentifier(indexName)} ON ${this.quoteIdentifier(schema.name)} (${indexColumns})`;
|
|
1199
|
-
db.execute(indexSql);
|
|
1200
|
-
}
|
|
1201
|
-
}
|
|
1202
|
-
/**
|
|
1203
|
-
* Insert a row into database
|
|
1204
|
-
*/
|
|
1205
|
-
insertRowIntoDb(db, tableName, schema, row) {
|
|
1206
|
-
const columnNames = schema.columns.map((col) => col.name);
|
|
1207
|
-
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
1208
|
-
const placeholders = columnNames.map(() => "?").join(", ");
|
|
1209
|
-
const sql = `INSERT INTO ${this.quoteIdentifier(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
|
|
1210
|
-
const values = columnNames.map((col) => {
|
|
1211
|
-
const value = row[col];
|
|
1212
|
-
if (value === null || value === void 0) return null;
|
|
1213
|
-
if (typeof value === "object") return JSON.stringify(value);
|
|
1214
|
-
if (typeof value === "boolean") return value ? 1 : 0;
|
|
1215
|
-
return value;
|
|
1216
|
-
});
|
|
1217
|
-
db.execute(sql, values);
|
|
1218
|
-
}
|
|
1219
|
-
/**
|
|
1220
|
-
* Analyze constraint error and extract detailed information
|
|
1221
|
-
*/
|
|
1222
|
-
analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db) {
|
|
1223
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1224
|
-
if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
|
|
1225
|
-
const fkValue = row[fk.column];
|
|
1226
|
-
if (fkValue === null || fkValue === void 0) continue;
|
|
1227
|
-
try {
|
|
1228
|
-
const result = db.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [fkValue]);
|
|
1229
|
-
if (result.length > 0 && result[0].count === 0) return {
|
|
1230
|
-
file,
|
|
1231
|
-
tableName,
|
|
1232
|
-
rowIndex,
|
|
1233
|
-
issues: [],
|
|
1234
|
-
type: "foreignKey",
|
|
1235
|
-
foreignKeyError: {
|
|
1236
|
-
column: fk.column,
|
|
1237
|
-
value: fkValue,
|
|
1238
|
-
referencedTable: fk.references.table,
|
|
1239
|
-
referencedColumn: fk.references.column
|
|
1240
|
-
}
|
|
1241
|
-
};
|
|
1242
|
-
} catch (_) {}
|
|
1243
|
-
}
|
|
1244
|
-
return {
|
|
1245
|
-
file,
|
|
1246
|
-
tableName,
|
|
1247
|
-
rowIndex,
|
|
1248
|
-
issues: [{
|
|
1249
|
-
message: errorMessage,
|
|
1250
|
-
path: []
|
|
1251
|
-
}],
|
|
1252
|
-
type: "schema"
|
|
1253
|
-
};
|
|
1254
|
-
}
|
|
1255
|
-
/**
|
|
1256
|
-
* Quote SQL identifier
|
|
1257
|
-
*/
|
|
1258
|
-
quoteIdentifier(name) {
|
|
1259
|
-
return `"${name.replace(/"/g, "\"\"")}"`;
|
|
1260
|
-
}
|
|
1261
|
-
/**
|
|
1262
|
-
* Validate a single JSONL file
|
|
1263
|
-
*/
|
|
1264
|
-
async validateFile(filePath) {
|
|
1265
|
-
const tableName = (0, node_path.basename)(filePath, ".jsonl");
|
|
1266
|
-
const data = await JsonlReader.read(filePath);
|
|
1267
|
-
const schema = await SchemaLoader.loadSchema(filePath);
|
|
1268
|
-
const errors = [];
|
|
1269
|
-
for (let i = 0; i < data.length; i++) {
|
|
1270
|
-
const row = data[i];
|
|
1271
|
-
const result = schema["~standard"].validate(row);
|
|
1272
|
-
if (result instanceof Promise) throw new Error("Asynchronous validation is not supported.");
|
|
1273
|
-
if (result.issues && result.issues.length > 0) errors.push({
|
|
1274
|
-
file: filePath,
|
|
1275
|
-
tableName,
|
|
1276
|
-
rowIndex: i,
|
|
1277
|
-
issues: result.issues,
|
|
1278
|
-
type: "schema"
|
|
1279
|
-
});
|
|
1280
|
-
}
|
|
1281
|
-
if (errors.length === 0) {
|
|
1282
|
-
const dirPath = (0, node_path.dirname)(filePath);
|
|
1283
|
-
const allJsonlFiles = (await (0, node_fs_promises.readdir)(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => (0, node_path.join)(dirPath, entry.name));
|
|
1284
|
-
const dbErrors = await this.validateWithDatabase(dirPath, allJsonlFiles);
|
|
1285
|
-
errors.push(...dbErrors.filter((e) => e.file === filePath));
|
|
1286
|
-
}
|
|
1287
|
-
return {
|
|
1288
|
-
valid: errors.length === 0,
|
|
1289
|
-
errors,
|
|
1290
|
-
warnings: []
|
|
1291
|
-
};
|
|
1292
|
-
}
|
|
1293
|
-
};
|
|
1294
|
-
|
|
1295
1210
|
//#endregion
|
|
1296
1211
|
//#region src/jsonl-migration.ts
|
|
1297
1212
|
/**
|
|
1298
1213
|
* Validate a table by temporarily supplying in-memory rows while reusing the existing LinesDB validation pipeline.
|
|
1299
|
-
* If validation fails,
|
|
1214
|
+
* If validation fails, throws an error with validation details.
|
|
1300
1215
|
*/
|
|
1301
1216
|
async function ensureTableRowsValid(options) {
|
|
1302
|
-
console.log("[ensureTableRowsValid] START");
|
|
1303
|
-
console.log("[ensureTableRowsValid] dataDir:", options.dataDir);
|
|
1304
|
-
console.log("[ensureTableRowsValid] tableName:", options.tableName);
|
|
1305
|
-
console.log("[ensureTableRowsValid] rows count:", options.rows.length);
|
|
1306
1217
|
const tablePath = (0, node_path.join)(options.dataDir, `${options.tableName}.jsonl`);
|
|
1307
1218
|
const overrides = new Map([[tablePath, options.rows]]);
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
}
|
|
1320
|
-
};
|
|
1321
|
-
try {
|
|
1322
|
-
console.log("[ensureTableRowsValid] Calling JsonlReader.withOverrides");
|
|
1323
|
-
await JsonlReader.withOverrides(overrides, async () => {
|
|
1324
|
-
console.log("[ensureTableRowsValid] Inside withOverrides callback");
|
|
1325
|
-
const db = LinesDB.create({ dataDir: options.dataDir });
|
|
1326
|
-
console.log("[ensureTableRowsValid] LinesDB created");
|
|
1327
|
-
try {
|
|
1328
|
-
console.log("[ensureTableRowsValid] Calling db.initialize()");
|
|
1329
|
-
await db.initialize();
|
|
1330
|
-
console.log("[ensureTableRowsValid] db.initialize() completed");
|
|
1331
|
-
} finally {
|
|
1332
|
-
console.log("[ensureTableRowsValid] Calling db.close()");
|
|
1333
|
-
await db.close();
|
|
1219
|
+
await JsonlReader.withOverrides(overrides, async () => {
|
|
1220
|
+
const db = LinesDB.create({ dataDir: options.dataDir });
|
|
1221
|
+
try {
|
|
1222
|
+
const result = await db.initialize({ tableName: options.tableName });
|
|
1223
|
+
if (!result.valid) {
|
|
1224
|
+
const errorCount = result.errors.length;
|
|
1225
|
+
const errorDetails = result.errors.map((e) => {
|
|
1226
|
+
const issueMessages = e.issues.map((issue) => issue.message).join(", ");
|
|
1227
|
+
return ` Row ${e.rowIndex}: ${issueMessages}`;
|
|
1228
|
+
}).join("\n");
|
|
1229
|
+
throw new Error(`Validation failed for table '${options.tableName}' (${errorCount} error(s)):\n${errorDetails}`);
|
|
1334
1230
|
}
|
|
1335
|
-
}
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
}
|
|
1340
|
-
console.log("[ensureTableRowsValid] Warnings captured:", warnMessages.length);
|
|
1341
|
-
console.log("[ensureTableRowsValid] capturedError:", capturedError ? "YES" : "NO");
|
|
1342
|
-
if (capturedError) {
|
|
1343
|
-
console.log("[ensureTableRowsValid] Throwing captured error");
|
|
1344
|
-
throw capturedError;
|
|
1345
|
-
}
|
|
1346
|
-
console.log("[ensureTableRowsValid] END (success)");
|
|
1231
|
+
} finally {
|
|
1232
|
+
await db.close();
|
|
1233
|
+
}
|
|
1234
|
+
});
|
|
1347
1235
|
}
|
|
1348
1236
|
|
|
1349
1237
|
//#endregion
|
|
@@ -1354,7 +1242,6 @@ exports.LinesDB = LinesDB;
|
|
|
1354
1242
|
exports.RUNTIME = RUNTIME;
|
|
1355
1243
|
exports.SchemaLoader = SchemaLoader;
|
|
1356
1244
|
exports.TypeGenerator = TypeGenerator;
|
|
1357
|
-
exports.Validator = Validator;
|
|
1358
1245
|
exports.defineSchema = defineSchema;
|
|
1359
1246
|
exports.detectRuntime = detectRuntime;
|
|
1360
1247
|
exports.ensureTableRowsValid = ensureTableRowsValid;
|