@toiroakr/lines-db 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/bin/cli.js +400 -418
- package/dist/index.cjs +217 -330
- package/dist/index.d.cts +64 -84
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.ts +64 -84
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +219 -331
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
- package/src/cli.ts +226 -126
- package/src/database.ts +342 -53
- package/src/index.ts +2 -2
- package/src/jsonl-migration.ts +24 -56
- package/src/schema.ts +37 -32
- package/src/types.ts +21 -0
- package/src/validator.test.ts +0 -507
- package/src/validator.ts +0 -441
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { createRequire } from "node:module";
|
|
2
|
-
import { access, mkdir, readFile, readdir,
|
|
2
|
+
import { access, mkdir, readFile, readdir, writeFile } from "node:fs/promises";
|
|
3
3
|
import { basename, dirname, extname, isAbsolute, join, normalize, relative } from "node:path";
|
|
4
4
|
import { pathToFileURL } from "node:url";
|
|
5
5
|
|
|
@@ -240,8 +240,7 @@ var DirectoryScanner = class {
|
|
|
240
240
|
* Define a bidirectional schema with optional backward transformation
|
|
241
241
|
*
|
|
242
242
|
* @param schema - Standard Schema for validation
|
|
243
|
-
* @param
|
|
244
|
-
* Required when schema performs transformations
|
|
243
|
+
* @param options - SchemaOptions object. When Input and Output types differ, backward transformation is required
|
|
245
244
|
*
|
|
246
245
|
* @example
|
|
247
246
|
* // No transformation - backward not needed
|
|
@@ -250,10 +249,12 @@ var DirectoryScanner = class {
|
|
|
250
249
|
* );
|
|
251
250
|
*
|
|
252
251
|
* @example
|
|
253
|
-
* // With transformation - backward
|
|
252
|
+
* // With transformation - backward REQUIRED
|
|
254
253
|
* const schema = defineSchema(
|
|
255
254
|
* v.pipe(v.string(), v.transform(Number)),
|
|
256
|
-
*
|
|
255
|
+
* {
|
|
256
|
+
* backward: (num) => String(num) // backward: number → string (REQUIRED)
|
|
257
|
+
* }
|
|
257
258
|
* );
|
|
258
259
|
*
|
|
259
260
|
* @example
|
|
@@ -268,14 +269,14 @@ var DirectoryScanner = class {
|
|
|
268
269
|
* }
|
|
269
270
|
* );
|
|
270
271
|
*/
|
|
271
|
-
function defineSchema(schema,
|
|
272
|
+
function defineSchema(schema, ...args) {
|
|
273
|
+
const options = args[0];
|
|
272
274
|
const bidirectionalSchema = Object.create(schema);
|
|
273
|
-
if (
|
|
274
|
-
|
|
275
|
-
if (
|
|
276
|
-
if (
|
|
277
|
-
if (
|
|
278
|
-
if (optionsOrBackward.indexes) bidirectionalSchema.indexes = optionsOrBackward.indexes;
|
|
275
|
+
if (options) {
|
|
276
|
+
if (options.backward) bidirectionalSchema.backward = options.backward;
|
|
277
|
+
if (options.primaryKey) bidirectionalSchema.primaryKey = options.primaryKey;
|
|
278
|
+
if (options.foreignKeys) bidirectionalSchema.foreignKeys = options.foreignKeys;
|
|
279
|
+
if (options.indexes) bidirectionalSchema.indexes = options.indexes;
|
|
279
280
|
}
|
|
280
281
|
Object.defineProperty(bidirectionalSchema, "~standard", {
|
|
281
282
|
value: schema["~standard"],
|
|
@@ -308,27 +309,49 @@ var LinesDB = class LinesDB {
|
|
|
308
309
|
return new LinesDB(config, dbPath);
|
|
309
310
|
}
|
|
310
311
|
/**
|
|
311
|
-
* Initialize database by loading all JSONL files
|
|
312
|
+
* Initialize database by loading all JSONL files or a specific table
|
|
312
313
|
* Uses dependency resolution to ensure foreign key references are loaded in correct order
|
|
314
|
+
* @param options Optional configuration for initialization
|
|
315
|
+
* @param options.tableName Optional table name to initialize. If not provided, initializes all tables
|
|
316
|
+
* @param options.detailedValidate If true, performs detailed validation by inserting rows one by one to catch constraint violations
|
|
317
|
+
* @param options.transform Optional transform function to apply to rows before validation (only applied to the specified tableName)
|
|
318
|
+
* @returns ValidationResult containing validation status, errors, and warnings
|
|
313
319
|
*/
|
|
314
|
-
async initialize() {
|
|
320
|
+
async initialize(options) {
|
|
321
|
+
const allErrors = [];
|
|
322
|
+
const allWarnings = [];
|
|
323
|
+
const tableName = options?.tableName;
|
|
324
|
+
const detailedValidate = options?.detailedValidate ?? false;
|
|
325
|
+
const transform = options?.transform;
|
|
315
326
|
this.tables = await DirectoryScanner.scanDirectory(this.config.dataDir);
|
|
327
|
+
const tablesToLoad = tableName ? [tableName] : Array.from(this.tables.keys());
|
|
328
|
+
for (const tableNameToLoad of tablesToLoad) if (!this.tables.has(tableNameToLoad)) throw new Error(`Table '${tableNameToLoad}' not found in directory '${this.config.dataDir}'`);
|
|
316
329
|
const loadedTables = /* @__PURE__ */ new Set();
|
|
317
330
|
const loadingTables = /* @__PURE__ */ new Set();
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
this.validationSchemas.delete(tableName);
|
|
331
|
+
const attemptedTables = /* @__PURE__ */ new Set();
|
|
332
|
+
for (const tableNameToLoad of tablesToLoad) if (!attemptedTables.has(tableNameToLoad)) {
|
|
333
|
+
const tableTransform = tableNameToLoad === tableName ? transform : void 0;
|
|
334
|
+
const { errors, warnings } = await this.loadTableWithDependencies(tableNameToLoad, loadedTables, loadingTables, attemptedTables, detailedValidate, tableTransform);
|
|
335
|
+
allErrors.push(...errors);
|
|
336
|
+
allWarnings.push(...warnings);
|
|
325
337
|
}
|
|
338
|
+
return {
|
|
339
|
+
valid: allErrors.length === 0,
|
|
340
|
+
errors: allErrors,
|
|
341
|
+
warnings: allWarnings
|
|
342
|
+
};
|
|
326
343
|
}
|
|
327
344
|
/**
|
|
328
345
|
* Load a table and its dependencies recursively
|
|
329
346
|
*/
|
|
330
|
-
async loadTableWithDependencies(tableName, loadedTables, loadingTables) {
|
|
331
|
-
|
|
347
|
+
async loadTableWithDependencies(tableName, loadedTables, loadingTables, attemptedTables, detailedValidate, transform) {
|
|
348
|
+
const errors = [];
|
|
349
|
+
const warnings = [];
|
|
350
|
+
if (attemptedTables.has(tableName)) return {
|
|
351
|
+
errors,
|
|
352
|
+
warnings
|
|
353
|
+
};
|
|
354
|
+
attemptedTables.add(tableName);
|
|
332
355
|
if (loadingTables.has(tableName)) throw new Error(`Circular dependency detected for table '${tableName}'`);
|
|
333
356
|
const tableConfig = this.tables.get(tableName);
|
|
334
357
|
if (!tableConfig) throw new Error(`Table configuration not found for '${tableName}'`);
|
|
@@ -342,21 +365,35 @@ var LinesDB = class LinesDB {
|
|
|
342
365
|
} catch {}
|
|
343
366
|
if (foreignKeys && foreignKeys.length > 0) for (const fk of foreignKeys) {
|
|
344
367
|
const referencedTable = fk.references.table;
|
|
345
|
-
if (
|
|
346
|
-
|
|
368
|
+
if (referencedTable === tableName) continue;
|
|
369
|
+
if (!attemptedTables.has(referencedTable)) if (this.tables.has(referencedTable)) {
|
|
370
|
+
const depResult = await this.loadTableWithDependencies(referencedTable, loadedTables, loadingTables, attemptedTables, detailedValidate, void 0);
|
|
371
|
+
errors.push(...depResult.errors);
|
|
372
|
+
warnings.push(...depResult.warnings);
|
|
373
|
+
} else throw new Error(`Foreign key reference to non-existent table '${referencedTable}' in table '${tableName}'`);
|
|
374
|
+
}
|
|
375
|
+
const { loaded, errors: loadErrors } = await this.loadTable(tableName, tableConfig, detailedValidate, transform);
|
|
376
|
+
errors.push(...loadErrors);
|
|
377
|
+
if (loaded) loadedTables.add(tableName);
|
|
378
|
+
else {
|
|
379
|
+
warnings.push(`Table '${tableName}' was not loaded (no data or skipped)`);
|
|
380
|
+
this.tables.delete(tableName);
|
|
347
381
|
}
|
|
348
|
-
if (await this.loadTable(tableName, tableConfig)) loadedTables.add(tableName);
|
|
349
|
-
else this.tables.delete(tableName);
|
|
350
382
|
} finally {
|
|
351
383
|
loadingTables.delete(tableName);
|
|
352
384
|
}
|
|
385
|
+
return {
|
|
386
|
+
errors,
|
|
387
|
+
warnings
|
|
388
|
+
};
|
|
353
389
|
}
|
|
354
390
|
/**
|
|
355
391
|
* Load a single table from JSONL file
|
|
356
|
-
* @returns
|
|
392
|
+
* @returns Object with loaded status and validation errors
|
|
357
393
|
*/
|
|
358
|
-
async loadTable(tableName, config) {
|
|
359
|
-
|
|
394
|
+
async loadTable(tableName, config, detailedValidate, transform) {
|
|
395
|
+
let data = await JsonlReader.read(config.jsonlPath);
|
|
396
|
+
if (transform) data = data.map((row) => transform(row));
|
|
360
397
|
let validationSchema = config.validationSchema;
|
|
361
398
|
const schemaMetadata = {};
|
|
362
399
|
if (!validationSchema) try {
|
|
@@ -372,7 +409,15 @@ var LinesDB = class LinesDB {
|
|
|
372
409
|
else if (schemaModule.foreignKeys) schemaMetadata.foreignKeys = schemaModule.foreignKeys;
|
|
373
410
|
if (schemaExport?.indexes) schemaMetadata.indexes = schemaExport.indexes;
|
|
374
411
|
else if (schemaModule.indexes) schemaMetadata.indexes = schemaModule.indexes;
|
|
375
|
-
|
|
412
|
+
if (process.env.DEBUG_LINES_DB) {
|
|
413
|
+
console.log(`[lines-db] Schema metadata for ${tableName}:`);
|
|
414
|
+
console.log(` primaryKey: ${schemaMetadata.primaryKey}`);
|
|
415
|
+
console.log(` foreignKeys: ${JSON.stringify(schemaMetadata.foreignKeys)}`);
|
|
416
|
+
console.log(` indexes: ${JSON.stringify(schemaMetadata.indexes)}`);
|
|
417
|
+
}
|
|
418
|
+
} catch (_error) {
|
|
419
|
+
if (process.env.DEBUG_LINES_DB) console.warn(`[lines-db] Failed to load schema metadata for ${tableName}:`, _error instanceof Error ? _error.message : String(_error));
|
|
420
|
+
}
|
|
376
421
|
this.validationSchemas.set(tableName, validationSchema);
|
|
377
422
|
const validationErrors = [];
|
|
378
423
|
const validatedData = [];
|
|
@@ -390,18 +435,32 @@ var LinesDB = class LinesDB {
|
|
|
390
435
|
else throw error;
|
|
391
436
|
}
|
|
392
437
|
}
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
}
|
|
438
|
+
const validationErrorDetails = validationErrors.map((ve) => ({
|
|
439
|
+
file: config.jsonlPath,
|
|
440
|
+
tableName,
|
|
441
|
+
rowIndex: ve.rowIndex,
|
|
442
|
+
issues: ve.error.issues,
|
|
443
|
+
type: "schema"
|
|
444
|
+
}));
|
|
445
|
+
if (validationErrors.length > 0) return {
|
|
446
|
+
loaded: false,
|
|
447
|
+
errors: validationErrorDetails
|
|
448
|
+
};
|
|
400
449
|
let schema;
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
schema =
|
|
450
|
+
let inferredSchema;
|
|
451
|
+
if (validatedData.length > 0) inferredSchema = JsonlReader.inferSchema(tableName, validatedData);
|
|
452
|
+
if (config.schema) {
|
|
453
|
+
schema = config.schema;
|
|
454
|
+
if (inferredSchema) for (const inferredCol of inferredSchema.columns) {
|
|
455
|
+
const schemaCol = schema.columns.find((c) => c.name === inferredCol.name);
|
|
456
|
+
if (schemaCol && inferredCol.valueType && !schemaCol.valueType) schemaCol.valueType = inferredCol.valueType;
|
|
457
|
+
}
|
|
458
|
+
} else if (config.autoInferSchema !== false) {
|
|
459
|
+
if (validatedData.length === 0) return {
|
|
460
|
+
loaded: false,
|
|
461
|
+
errors: []
|
|
462
|
+
};
|
|
463
|
+
schema = inferredSchema;
|
|
405
464
|
} else throw new Error(`No schema provided for table ${tableName} and autoInferSchema is disabled`);
|
|
406
465
|
const biSchema = validationSchema;
|
|
407
466
|
const primaryKey = biSchema?.primaryKey || schemaMetadata.primaryKey;
|
|
@@ -415,23 +474,43 @@ var LinesDB = class LinesDB {
|
|
|
415
474
|
if (idColumn) idColumn.primaryKey = true;
|
|
416
475
|
}
|
|
417
476
|
if (foreignKeys) schema.foreignKeys = foreignKeys;
|
|
418
|
-
if (indexes)
|
|
477
|
+
if (indexes) {
|
|
478
|
+
schema.indexes = indexes;
|
|
479
|
+
for (const index of indexes) if (index.unique && index.columns.length === 1) {
|
|
480
|
+
const col = schema.columns.find((c) => c.name === index.columns[0]);
|
|
481
|
+
if (col && !col.unique && !col.primaryKey) col.unique = true;
|
|
482
|
+
}
|
|
483
|
+
}
|
|
419
484
|
this.schemas.set(tableName, schema);
|
|
420
485
|
this.createTable(schema);
|
|
421
|
-
|
|
422
|
-
|
|
486
|
+
if (detailedValidate) {
|
|
487
|
+
const insertErrors = this.insertDataWithDetailedValidation(tableName, schema, validatedData, config.jsonlPath);
|
|
488
|
+
if (insertErrors.length > 0) return {
|
|
489
|
+
loaded: false,
|
|
490
|
+
errors: insertErrors
|
|
491
|
+
};
|
|
492
|
+
} else this.insertData(tableName, schema, validatedData);
|
|
493
|
+
return {
|
|
494
|
+
loaded: true,
|
|
495
|
+
errors: []
|
|
496
|
+
};
|
|
423
497
|
}
|
|
424
498
|
/**
|
|
425
499
|
* Create table in SQLite with constraints and indexes
|
|
426
500
|
*/
|
|
427
501
|
createTable(schema) {
|
|
428
502
|
const quotedTableName = this.quoteTableName(schema.name);
|
|
503
|
+
const uniqueColumns = /* @__PURE__ */ new Set();
|
|
504
|
+
for (const col of schema.columns) if (col.unique) uniqueColumns.add(col.name);
|
|
505
|
+
if (schema.indexes) {
|
|
506
|
+
for (const index of schema.indexes) if (index.unique && index.columns.length === 1) uniqueColumns.add(index.columns[0]);
|
|
507
|
+
}
|
|
429
508
|
const columnDefs = schema.columns.map((col) => {
|
|
430
509
|
const sqlType = col.type === "JSON" ? "TEXT" : col.type;
|
|
431
510
|
const parts = [this.quoteIdentifier(col.name), sqlType];
|
|
432
511
|
if (col.primaryKey) parts.push("PRIMARY KEY");
|
|
433
512
|
if (col.notNull) parts.push("NOT NULL");
|
|
434
|
-
if (col.
|
|
513
|
+
if (uniqueColumns.has(col.name) && !col.primaryKey) parts.push("UNIQUE");
|
|
435
514
|
return parts.join(" ");
|
|
436
515
|
});
|
|
437
516
|
const foreignKeyDefs = [];
|
|
@@ -464,18 +543,85 @@ var LinesDB = class LinesDB {
|
|
|
464
543
|
return `"${identifier.replace(/"/g, "\"\"")}"`;
|
|
465
544
|
}
|
|
466
545
|
/**
|
|
467
|
-
* Insert data into table
|
|
546
|
+
* Insert data into table using batch insert (multiple rows per SQL)
|
|
547
|
+
* SQLite has a parameter limit (default 999), so we batch rows accordingly
|
|
548
|
+
* Throws exception if any constraint violation occurs
|
|
468
549
|
*/
|
|
469
550
|
insertData(tableName, schema, data) {
|
|
551
|
+
if (data.length === 0) return;
|
|
552
|
+
const columnNames = schema.columns.map((col) => col.name);
|
|
553
|
+
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
554
|
+
const columnCount = columnNames.length;
|
|
555
|
+
const maxBatchSize = Math.floor(900 / columnCount);
|
|
556
|
+
const batchSize = Math.max(1, Math.min(maxBatchSize, 100));
|
|
557
|
+
for (let i = 0; i < data.length; i += batchSize) {
|
|
558
|
+
const batch = data.slice(i, i + batchSize);
|
|
559
|
+
const rowPlaceholders = columnNames.map(() => "?").join(", ");
|
|
560
|
+
const valuesPlaceholders = batch.map(() => `(${rowPlaceholders})`).join(", ");
|
|
561
|
+
const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES ${valuesPlaceholders}`;
|
|
562
|
+
const values = [];
|
|
563
|
+
for (const row of batch) for (const col of columnNames) values.push(this.normalizeValue(row[col]));
|
|
564
|
+
this.db.prepare(sql).run(...values);
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
/**
|
|
568
|
+
* Insert data into table one row at a time with detailed error reporting
|
|
569
|
+
* This is used for validation to catch constraint violations
|
|
570
|
+
*/
|
|
571
|
+
insertDataWithDetailedValidation(tableName, schema, data, filePath) {
|
|
572
|
+
const errors = [];
|
|
470
573
|
const columnNames = schema.columns.map((col) => col.name);
|
|
471
574
|
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
472
575
|
const placeholders = columnNames.map(() => "?").join(", ");
|
|
473
576
|
const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
|
|
474
577
|
const stmt = this.db.prepare(sql);
|
|
475
|
-
for (
|
|
476
|
-
const
|
|
477
|
-
|
|
578
|
+
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
579
|
+
const row = data[rowIndex];
|
|
580
|
+
try {
|
|
581
|
+
const values = columnNames.map((col) => this.normalizeValue(row[col]));
|
|
582
|
+
stmt.run(...values);
|
|
583
|
+
} catch (error) {
|
|
584
|
+
const constraintError = this.analyzeConstraintError(error, filePath, tableName, rowIndex, row, schema.foreignKeys || []);
|
|
585
|
+
if (constraintError) errors.push(constraintError);
|
|
586
|
+
}
|
|
478
587
|
}
|
|
588
|
+
return errors;
|
|
589
|
+
}
|
|
590
|
+
/**
|
|
591
|
+
* Analyze constraint error and extract detailed information
|
|
592
|
+
*/
|
|
593
|
+
analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys) {
|
|
594
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
595
|
+
if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
|
|
596
|
+
const fkValue = row[fk.column];
|
|
597
|
+
if (fkValue === null || fkValue === void 0) continue;
|
|
598
|
+
try {
|
|
599
|
+
const result = this.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [this.normalizeValue(fkValue)]);
|
|
600
|
+
if (result.length > 0 && result[0].count === 0) return {
|
|
601
|
+
file,
|
|
602
|
+
tableName,
|
|
603
|
+
rowIndex,
|
|
604
|
+
issues: [],
|
|
605
|
+
type: "foreignKey",
|
|
606
|
+
foreignKeyError: {
|
|
607
|
+
column: fk.column,
|
|
608
|
+
value: fkValue,
|
|
609
|
+
referencedTable: fk.references.table,
|
|
610
|
+
referencedColumn: fk.references.column
|
|
611
|
+
}
|
|
612
|
+
};
|
|
613
|
+
} catch (_) {}
|
|
614
|
+
}
|
|
615
|
+
return {
|
|
616
|
+
file,
|
|
617
|
+
tableName,
|
|
618
|
+
rowIndex,
|
|
619
|
+
issues: [{
|
|
620
|
+
message: errorMessage,
|
|
621
|
+
path: []
|
|
622
|
+
}],
|
|
623
|
+
type: "schema"
|
|
624
|
+
};
|
|
479
625
|
}
|
|
480
626
|
/**
|
|
481
627
|
* Execute a raw SQL query
|
|
@@ -887,9 +1033,13 @@ var LinesDB = class LinesDB {
|
|
|
887
1033
|
/**
|
|
888
1034
|
* Sync database changes back to JSONL files
|
|
889
1035
|
* Uses backward transformation when available
|
|
1036
|
+
* @param tableName Optional table name to sync. If not provided, syncs all loaded tables
|
|
890
1037
|
*/
|
|
891
|
-
async sync() {
|
|
892
|
-
|
|
1038
|
+
async sync(tableName) {
|
|
1039
|
+
if (tableName) {
|
|
1040
|
+
if (!this.schemas.has(tableName)) throw new Error(`Table '${tableName}' is not loaded`);
|
|
1041
|
+
await this.syncTable(tableName);
|
|
1042
|
+
} else for (const [name] of this.schemas) await this.syncTable(name);
|
|
893
1043
|
}
|
|
894
1044
|
/**
|
|
895
1045
|
* Execute a function within a transaction
|
|
@@ -1036,295 +1186,33 @@ function sanitizeIdentifier(value) {
|
|
|
1036
1186
|
return value.replace(/[^A-Za-z0-9_$]/g, "");
|
|
1037
1187
|
}
|
|
1038
1188
|
|
|
1039
|
-
//#endregion
|
|
1040
|
-
//#region src/validator.ts
|
|
1041
|
-
var Validator = class {
|
|
1042
|
-
path;
|
|
1043
|
-
projectRoot;
|
|
1044
|
-
constructor(options) {
|
|
1045
|
-
this.path = options.path;
|
|
1046
|
-
this.projectRoot = options.projectRoot || process.cwd();
|
|
1047
|
-
}
|
|
1048
|
-
/**
|
|
1049
|
-
* Validate JSONL file(s)
|
|
1050
|
-
*/
|
|
1051
|
-
async validate() {
|
|
1052
|
-
const fullPath = this.path.startsWith("/") ? this.path : join(this.projectRoot, this.path);
|
|
1053
|
-
const stats = await stat(fullPath);
|
|
1054
|
-
if (stats.isDirectory()) return this.validateDirectory(fullPath);
|
|
1055
|
-
else if (stats.isFile() && fullPath.endsWith(".jsonl")) return this.validateFile(fullPath);
|
|
1056
|
-
else throw new Error(`Invalid path: ${this.path}. Must be a directory or .jsonl file.`);
|
|
1057
|
-
}
|
|
1058
|
-
/**
|
|
1059
|
-
* Validate all JSONL files in a directory
|
|
1060
|
-
*/
|
|
1061
|
-
async validateDirectory(dirPath) {
|
|
1062
|
-
const jsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
|
|
1063
|
-
if (jsonlFiles.length === 0) throw new Error(`No JSONL files found in directory: ${dirPath}`);
|
|
1064
|
-
const allErrors = [];
|
|
1065
|
-
const allWarnings = [];
|
|
1066
|
-
const filesWithSchema = [];
|
|
1067
|
-
for (const file of jsonlFiles) if (await SchemaLoader.hasSchema(file)) filesWithSchema.push(file);
|
|
1068
|
-
else {
|
|
1069
|
-
const tableName = basename(file, ".jsonl");
|
|
1070
|
-
allWarnings.push(`Skipping validation for '${tableName}': schema file not found`);
|
|
1071
|
-
}
|
|
1072
|
-
for (const file of filesWithSchema) {
|
|
1073
|
-
const result = await this.validateFile(file);
|
|
1074
|
-
allErrors.push(...result.errors);
|
|
1075
|
-
allWarnings.push(...result.warnings);
|
|
1076
|
-
}
|
|
1077
|
-
if (filesWithSchema.length > 0 && allErrors.length === 0) {
|
|
1078
|
-
const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
|
|
1079
|
-
allErrors.push(...dbErrors);
|
|
1080
|
-
}
|
|
1081
|
-
return {
|
|
1082
|
-
valid: allErrors.length === 0,
|
|
1083
|
-
errors: allErrors,
|
|
1084
|
-
warnings: allWarnings
|
|
1085
|
-
};
|
|
1086
|
-
}
|
|
1087
|
-
/**
|
|
1088
|
-
* Validate by loading data into database one row at a time
|
|
1089
|
-
* This catches constraint violations and extracts detailed error information
|
|
1090
|
-
*/
|
|
1091
|
-
async validateWithDatabase(dirPath, jsonlFiles) {
|
|
1092
|
-
const errors = [];
|
|
1093
|
-
try {
|
|
1094
|
-
const db = LinesDB.create({ dataDir: ":memory:" });
|
|
1095
|
-
for (const file of jsonlFiles) {
|
|
1096
|
-
const tableName = basename(file, ".jsonl");
|
|
1097
|
-
const data = await JsonlReader.read(file);
|
|
1098
|
-
let schema;
|
|
1099
|
-
let foreignKeys = [];
|
|
1100
|
-
let indexes = [];
|
|
1101
|
-
let primaryKey;
|
|
1102
|
-
try {
|
|
1103
|
-
schema = await SchemaLoader.loadSchema(file);
|
|
1104
|
-
const { pathToFileURL: pathToFileURL$1 } = await import("node:url");
|
|
1105
|
-
const schemaModule = await import(`${pathToFileURL$1(file.replace(".jsonl", ".schema.ts")).href}?t=${Date.now()}`);
|
|
1106
|
-
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
1107
|
-
if (schemaExport?.foreignKeys) foreignKeys = schemaExport.foreignKeys;
|
|
1108
|
-
if (schemaExport?.indexes) indexes = schemaExport.indexes;
|
|
1109
|
-
if (schemaExport?.primaryKey) primaryKey = schemaExport.primaryKey;
|
|
1110
|
-
} catch (_error) {
|
|
1111
|
-
continue;
|
|
1112
|
-
}
|
|
1113
|
-
try {
|
|
1114
|
-
const tableSchema = this.createTableSchema(tableName, data, schema, foreignKeys, indexes, primaryKey);
|
|
1115
|
-
this.createTableInDb(db, tableSchema);
|
|
1116
|
-
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
1117
|
-
const row = data[rowIndex];
|
|
1118
|
-
try {
|
|
1119
|
-
this.insertRowIntoDb(db, tableName, tableSchema, row);
|
|
1120
|
-
} catch (error) {
|
|
1121
|
-
const constraintError = this.analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db);
|
|
1122
|
-
if (constraintError) errors.push(constraintError);
|
|
1123
|
-
}
|
|
1124
|
-
}
|
|
1125
|
-
} catch (_error) {
|
|
1126
|
-
continue;
|
|
1127
|
-
}
|
|
1128
|
-
}
|
|
1129
|
-
await db.close();
|
|
1130
|
-
} catch (error) {
|
|
1131
|
-
errors.push({
|
|
1132
|
-
file: dirPath,
|
|
1133
|
-
tableName: "database",
|
|
1134
|
-
rowIndex: 0,
|
|
1135
|
-
issues: [{
|
|
1136
|
-
message: `Database initialization failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
1137
|
-
path: []
|
|
1138
|
-
}],
|
|
1139
|
-
type: "schema"
|
|
1140
|
-
});
|
|
1141
|
-
}
|
|
1142
|
-
return errors;
|
|
1143
|
-
}
|
|
1144
|
-
/**
|
|
1145
|
-
* Create table schema from data and validation schema
|
|
1146
|
-
*/
|
|
1147
|
-
createTableSchema(tableName, data, validationSchema, foreignKeys, indexes, primaryKey) {
|
|
1148
|
-
if (data.length === 0) throw new Error(`No data found in ${tableName}`);
|
|
1149
|
-
const schema = JsonlReader.inferSchema(tableName, data);
|
|
1150
|
-
if (primaryKey) {
|
|
1151
|
-
const pkColumn = schema.columns.find((col) => col.name === primaryKey);
|
|
1152
|
-
if (pkColumn) pkColumn.primaryKey = true;
|
|
1153
|
-
} else if (!schema.columns.some((col) => col.primaryKey)) {
|
|
1154
|
-
const idColumn = schema.columns.find((c) => c.name === "id");
|
|
1155
|
-
if (idColumn) idColumn.primaryKey = true;
|
|
1156
|
-
}
|
|
1157
|
-
if (foreignKeys && foreignKeys.length > 0) schema.foreignKeys = foreignKeys;
|
|
1158
|
-
if (indexes && indexes.length > 0) schema.indexes = indexes;
|
|
1159
|
-
return schema;
|
|
1160
|
-
}
|
|
1161
|
-
/**
|
|
1162
|
-
* Create table in database
|
|
1163
|
-
*/
|
|
1164
|
-
createTableInDb(db, schema) {
|
|
1165
|
-
const columns = schema.columns.map((col) => {
|
|
1166
|
-
let colDef = `${this.quoteIdentifier(col.name)} ${col.type.toUpperCase()}`;
|
|
1167
|
-
if (col.primaryKey) colDef += " PRIMARY KEY";
|
|
1168
|
-
return colDef;
|
|
1169
|
-
});
|
|
1170
|
-
if (schema.foreignKeys && schema.foreignKeys.length > 0) for (const fk of schema.foreignKeys) columns.push(`FOREIGN KEY (${this.quoteIdentifier(fk.column)}) REFERENCES ${this.quoteIdentifier(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`);
|
|
1171
|
-
const sql = `CREATE TABLE IF NOT EXISTS ${this.quoteIdentifier(schema.name)} (${columns.join(", ")})`;
|
|
1172
|
-
db.execute(sql);
|
|
1173
|
-
if (schema.indexes && schema.indexes.length > 0) for (const index of schema.indexes) {
|
|
1174
|
-
const indexName = index.name || `idx_${schema.name}_${index.columns.join("_")}`;
|
|
1175
|
-
const uniqueKeyword = index.unique ? "UNIQUE" : "";
|
|
1176
|
-
const indexColumns = index.columns.map((col) => this.quoteIdentifier(col)).join(", ");
|
|
1177
|
-
const indexSql = `CREATE ${uniqueKeyword} INDEX IF NOT EXISTS ${this.quoteIdentifier(indexName)} ON ${this.quoteIdentifier(schema.name)} (${indexColumns})`;
|
|
1178
|
-
db.execute(indexSql);
|
|
1179
|
-
}
|
|
1180
|
-
}
|
|
1181
|
-
/**
|
|
1182
|
-
* Insert a row into database
|
|
1183
|
-
*/
|
|
1184
|
-
insertRowIntoDb(db, tableName, schema, row) {
|
|
1185
|
-
const columnNames = schema.columns.map((col) => col.name);
|
|
1186
|
-
const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
|
|
1187
|
-
const placeholders = columnNames.map(() => "?").join(", ");
|
|
1188
|
-
const sql = `INSERT INTO ${this.quoteIdentifier(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
|
|
1189
|
-
const values = columnNames.map((col) => {
|
|
1190
|
-
const value = row[col];
|
|
1191
|
-
if (value === null || value === void 0) return null;
|
|
1192
|
-
if (typeof value === "object") return JSON.stringify(value);
|
|
1193
|
-
if (typeof value === "boolean") return value ? 1 : 0;
|
|
1194
|
-
return value;
|
|
1195
|
-
});
|
|
1196
|
-
db.execute(sql, values);
|
|
1197
|
-
}
|
|
1198
|
-
/**
|
|
1199
|
-
* Analyze constraint error and extract detailed information
|
|
1200
|
-
*/
|
|
1201
|
-
analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db) {
|
|
1202
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
1203
|
-
if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
|
|
1204
|
-
const fkValue = row[fk.column];
|
|
1205
|
-
if (fkValue === null || fkValue === void 0) continue;
|
|
1206
|
-
try {
|
|
1207
|
-
const result = db.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [fkValue]);
|
|
1208
|
-
if (result.length > 0 && result[0].count === 0) return {
|
|
1209
|
-
file,
|
|
1210
|
-
tableName,
|
|
1211
|
-
rowIndex,
|
|
1212
|
-
issues: [],
|
|
1213
|
-
type: "foreignKey",
|
|
1214
|
-
foreignKeyError: {
|
|
1215
|
-
column: fk.column,
|
|
1216
|
-
value: fkValue,
|
|
1217
|
-
referencedTable: fk.references.table,
|
|
1218
|
-
referencedColumn: fk.references.column
|
|
1219
|
-
}
|
|
1220
|
-
};
|
|
1221
|
-
} catch (_) {}
|
|
1222
|
-
}
|
|
1223
|
-
return {
|
|
1224
|
-
file,
|
|
1225
|
-
tableName,
|
|
1226
|
-
rowIndex,
|
|
1227
|
-
issues: [{
|
|
1228
|
-
message: errorMessage,
|
|
1229
|
-
path: []
|
|
1230
|
-
}],
|
|
1231
|
-
type: "schema"
|
|
1232
|
-
};
|
|
1233
|
-
}
|
|
1234
|
-
/**
|
|
1235
|
-
* Quote SQL identifier
|
|
1236
|
-
*/
|
|
1237
|
-
quoteIdentifier(name) {
|
|
1238
|
-
return `"${name.replace(/"/g, "\"\"")}"`;
|
|
1239
|
-
}
|
|
1240
|
-
/**
|
|
1241
|
-
* Validate a single JSONL file
|
|
1242
|
-
*/
|
|
1243
|
-
async validateFile(filePath) {
|
|
1244
|
-
const tableName = basename(filePath, ".jsonl");
|
|
1245
|
-
const data = await JsonlReader.read(filePath);
|
|
1246
|
-
const schema = await SchemaLoader.loadSchema(filePath);
|
|
1247
|
-
const errors = [];
|
|
1248
|
-
for (let i = 0; i < data.length; i++) {
|
|
1249
|
-
const row = data[i];
|
|
1250
|
-
const result = schema["~standard"].validate(row);
|
|
1251
|
-
if (result instanceof Promise) throw new Error("Asynchronous validation is not supported.");
|
|
1252
|
-
if (result.issues && result.issues.length > 0) errors.push({
|
|
1253
|
-
file: filePath,
|
|
1254
|
-
tableName,
|
|
1255
|
-
rowIndex: i,
|
|
1256
|
-
issues: result.issues,
|
|
1257
|
-
type: "schema"
|
|
1258
|
-
});
|
|
1259
|
-
}
|
|
1260
|
-
if (errors.length === 0) {
|
|
1261
|
-
const dirPath = dirname(filePath);
|
|
1262
|
-
const allJsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
|
|
1263
|
-
const dbErrors = await this.validateWithDatabase(dirPath, allJsonlFiles);
|
|
1264
|
-
errors.push(...dbErrors.filter((e) => e.file === filePath));
|
|
1265
|
-
}
|
|
1266
|
-
return {
|
|
1267
|
-
valid: errors.length === 0,
|
|
1268
|
-
errors,
|
|
1269
|
-
warnings: []
|
|
1270
|
-
};
|
|
1271
|
-
}
|
|
1272
|
-
};
|
|
1273
|
-
|
|
1274
1189
|
//#endregion
|
|
1275
1190
|
//#region src/jsonl-migration.ts
|
|
1276
1191
|
/**
|
|
1277
1192
|
* Validate a table by temporarily supplying in-memory rows while reusing the existing LinesDB validation pipeline.
|
|
1278
|
-
* If validation fails,
|
|
1193
|
+
* If validation fails, throws an error with validation details.
|
|
1279
1194
|
*/
|
|
1280
1195
|
async function ensureTableRowsValid(options) {
|
|
1281
|
-
console.log("[ensureTableRowsValid] START");
|
|
1282
|
-
console.log("[ensureTableRowsValid] dataDir:", options.dataDir);
|
|
1283
|
-
console.log("[ensureTableRowsValid] tableName:", options.tableName);
|
|
1284
|
-
console.log("[ensureTableRowsValid] rows count:", options.rows.length);
|
|
1285
1196
|
const tablePath = join(options.dataDir, `${options.tableName}.jsonl`);
|
|
1286
1197
|
const overrides = new Map([[tablePath, options.rows]]);
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
}
|
|
1299
|
-
};
|
|
1300
|
-
try {
|
|
1301
|
-
console.log("[ensureTableRowsValid] Calling JsonlReader.withOverrides");
|
|
1302
|
-
await JsonlReader.withOverrides(overrides, async () => {
|
|
1303
|
-
console.log("[ensureTableRowsValid] Inside withOverrides callback");
|
|
1304
|
-
const db = LinesDB.create({ dataDir: options.dataDir });
|
|
1305
|
-
console.log("[ensureTableRowsValid] LinesDB created");
|
|
1306
|
-
try {
|
|
1307
|
-
console.log("[ensureTableRowsValid] Calling db.initialize()");
|
|
1308
|
-
await db.initialize();
|
|
1309
|
-
console.log("[ensureTableRowsValid] db.initialize() completed");
|
|
1310
|
-
} finally {
|
|
1311
|
-
console.log("[ensureTableRowsValid] Calling db.close()");
|
|
1312
|
-
await db.close();
|
|
1198
|
+
await JsonlReader.withOverrides(overrides, async () => {
|
|
1199
|
+
const db = LinesDB.create({ dataDir: options.dataDir });
|
|
1200
|
+
try {
|
|
1201
|
+
const result = await db.initialize({ tableName: options.tableName });
|
|
1202
|
+
if (!result.valid) {
|
|
1203
|
+
const errorCount = result.errors.length;
|
|
1204
|
+
const errorDetails = result.errors.map((e) => {
|
|
1205
|
+
const issueMessages = e.issues.map((issue) => issue.message).join(", ");
|
|
1206
|
+
return ` Row ${e.rowIndex}: ${issueMessages}`;
|
|
1207
|
+
}).join("\n");
|
|
1208
|
+
throw new Error(`Validation failed for table '${options.tableName}' (${errorCount} error(s)):\n${errorDetails}`);
|
|
1313
1209
|
}
|
|
1314
|
-
}
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
}
|
|
1319
|
-
console.log("[ensureTableRowsValid] Warnings captured:", warnMessages.length);
|
|
1320
|
-
console.log("[ensureTableRowsValid] capturedError:", capturedError ? "YES" : "NO");
|
|
1321
|
-
if (capturedError) {
|
|
1322
|
-
console.log("[ensureTableRowsValid] Throwing captured error");
|
|
1323
|
-
throw capturedError;
|
|
1324
|
-
}
|
|
1325
|
-
console.log("[ensureTableRowsValid] END (success)");
|
|
1210
|
+
} finally {
|
|
1211
|
+
await db.close();
|
|
1212
|
+
}
|
|
1213
|
+
});
|
|
1326
1214
|
}
|
|
1327
1215
|
|
|
1328
1216
|
//#endregion
|
|
1329
|
-
export { DirectoryScanner, JsonlReader, JsonlWriter, LinesDB, RUNTIME, SchemaLoader, TypeGenerator,
|
|
1217
|
+
export { DirectoryScanner, JsonlReader, JsonlWriter, LinesDB, RUNTIME, SchemaLoader, TypeGenerator, defineSchema, detectRuntime, ensureTableRowsValid, hasBackward };
|
|
1330
1218
|
//# sourceMappingURL=index.js.map
|