@toiroakr/lines-db 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/bin/cli.js +222 -171
- package/dist/index.cjs +143 -92
- package/dist/index.d.cts +23 -11
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.ts +23 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +143 -92
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/cli.ts +5 -2
- package/src/database.ts +221 -77
- package/src/jsonl-reader.ts +1 -1
- package/src/schema.ts +6 -6
- package/src/sqlite-adapter.ts +4 -0
- package/src/types.ts +2 -2
- package/src/validator.ts +70 -72
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,21 @@
|
|
|
1
1
|
# @toiroakr/lines-db
|
|
2
2
|
|
|
3
|
+
## 0.4.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- a662484: - Allow flexible schema export methods (support loading from `schema` or `default` exports)
|
|
8
|
+
- Enhance constraint validation by loading data into an actual database (catches unique, primary key, and foreign key violations)
|
|
9
|
+
- Add fallback logic to automatically use `id` column as primary key when it exists and no primary key is explicitly defined
|
|
10
|
+
|
|
11
|
+
## 0.3.0
|
|
12
|
+
|
|
13
|
+
### Minor Changes
|
|
14
|
+
|
|
15
|
+
- 50266c5: - Enhanced database initialization with dependency resolution and error handling
|
|
16
|
+
- Added support for undefined values in schema inference
|
|
17
|
+
- Implemented validation that automatically adds columns during data insertion
|
|
18
|
+
|
|
3
19
|
## 0.2.1
|
|
4
20
|
|
|
5
21
|
### Patch Changes
|
package/bin/cli.js
CHANGED
|
@@ -190,7 +190,7 @@ var JsonlReader = class {
|
|
|
190
190
|
};
|
|
191
191
|
}
|
|
192
192
|
static inferType(value) {
|
|
193
|
-
if (value === null) return "NULL";
|
|
193
|
+
if (value === null || value === void 0) return "NULL";
|
|
194
194
|
if (typeof value === "number") return Number.isInteger(value) ? "INTEGER" : "REAL";
|
|
195
195
|
if (typeof value === "string") return "TEXT";
|
|
196
196
|
if (typeof value === "boolean") return "INTEGER";
|
|
@@ -248,133 +248,6 @@ var SchemaLoader = class {
|
|
|
248
248
|
}
|
|
249
249
|
};
|
|
250
250
|
|
|
251
|
-
//#endregion
|
|
252
|
-
//#region src/validator.ts
|
|
253
|
-
var Validator = class {
|
|
254
|
-
path;
|
|
255
|
-
projectRoot;
|
|
256
|
-
constructor(options) {
|
|
257
|
-
this.path = options.path;
|
|
258
|
-
this.projectRoot = options.projectRoot || process.cwd();
|
|
259
|
-
}
|
|
260
|
-
/**
|
|
261
|
-
* Validate JSONL file(s)
|
|
262
|
-
*/
|
|
263
|
-
async validate() {
|
|
264
|
-
const fullPath = this.path.startsWith("/") ? this.path : join(this.projectRoot, this.path);
|
|
265
|
-
const stats = await stat(fullPath);
|
|
266
|
-
if (stats.isDirectory()) return this.validateDirectory(fullPath);
|
|
267
|
-
else if (stats.isFile() && fullPath.endsWith(".jsonl")) return this.validateFile(fullPath);
|
|
268
|
-
else throw new Error(`Invalid path: ${this.path}. Must be a directory or .jsonl file.`);
|
|
269
|
-
}
|
|
270
|
-
/**
|
|
271
|
-
* Validate all JSONL files in a directory
|
|
272
|
-
*/
|
|
273
|
-
async validateDirectory(dirPath) {
|
|
274
|
-
const jsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
|
|
275
|
-
if (jsonlFiles.length === 0) throw new Error(`No JSONL files found in directory: ${dirPath}`);
|
|
276
|
-
const allErrors = [];
|
|
277
|
-
const allWarnings = [];
|
|
278
|
-
const filesWithSchema = [];
|
|
279
|
-
for (const file of jsonlFiles) if (await SchemaLoader.hasSchema(file)) filesWithSchema.push(file);
|
|
280
|
-
else {
|
|
281
|
-
const tableName = basename(file, ".jsonl");
|
|
282
|
-
allWarnings.push(`Skipping validation for '${tableName}': schema file not found`);
|
|
283
|
-
}
|
|
284
|
-
for (const file of filesWithSchema) {
|
|
285
|
-
const result = await this.validateFile(file);
|
|
286
|
-
allErrors.push(...result.errors);
|
|
287
|
-
allWarnings.push(...result.warnings);
|
|
288
|
-
}
|
|
289
|
-
if (filesWithSchema.length > 0) {
|
|
290
|
-
const fkErrors = await this.validateForeignKeys(dirPath, filesWithSchema);
|
|
291
|
-
allErrors.push(...fkErrors);
|
|
292
|
-
}
|
|
293
|
-
return {
|
|
294
|
-
valid: allErrors.length === 0,
|
|
295
|
-
errors: allErrors,
|
|
296
|
-
warnings: allWarnings
|
|
297
|
-
};
|
|
298
|
-
}
|
|
299
|
-
/**
|
|
300
|
-
* Validate foreign key constraints across all tables
|
|
301
|
-
*/
|
|
302
|
-
async validateForeignKeys(dirPath, jsonlFiles) {
|
|
303
|
-
const errors = [];
|
|
304
|
-
const tableData = /* @__PURE__ */ new Map();
|
|
305
|
-
const tableSchemas = /* @__PURE__ */ new Map();
|
|
306
|
-
for (const file of jsonlFiles) {
|
|
307
|
-
const tableName = basename(file, ".jsonl");
|
|
308
|
-
const data = await JsonlReader.read(file);
|
|
309
|
-
const schema = await SchemaLoader.loadSchema(file);
|
|
310
|
-
tableData.set(tableName, data);
|
|
311
|
-
tableSchemas.set(tableName, schema);
|
|
312
|
-
}
|
|
313
|
-
for (const file of jsonlFiles) {
|
|
314
|
-
const tableName = basename(file, ".jsonl");
|
|
315
|
-
const schema = tableSchemas.get(tableName);
|
|
316
|
-
const data = tableData.get(tableName);
|
|
317
|
-
if (!schema || !data || !schema.foreignKeys) continue;
|
|
318
|
-
for (const fk of schema.foreignKeys) {
|
|
319
|
-
const referencedTable = fk.references.table;
|
|
320
|
-
const referencedData = tableData.get(referencedTable);
|
|
321
|
-
if (!referencedData) continue;
|
|
322
|
-
const referencedValues = /* @__PURE__ */ new Set();
|
|
323
|
-
for (const refRow of referencedData) {
|
|
324
|
-
const keyValues = fk.references.columns.map((col) => refRow[col]);
|
|
325
|
-
const compositeKey = JSON.stringify(keyValues);
|
|
326
|
-
referencedValues.add(compositeKey);
|
|
327
|
-
}
|
|
328
|
-
for (let i = 0; i < data.length; i++) {
|
|
329
|
-
const row = data[i];
|
|
330
|
-
const foreignKeyValues = fk.columns.map((col) => row[col]);
|
|
331
|
-
const compositeKey = JSON.stringify(foreignKeyValues);
|
|
332
|
-
if (!referencedValues.has(compositeKey)) errors.push({
|
|
333
|
-
file,
|
|
334
|
-
tableName,
|
|
335
|
-
rowIndex: i,
|
|
336
|
-
issues: [],
|
|
337
|
-
type: "foreignKey",
|
|
338
|
-
foreignKeyError: {
|
|
339
|
-
column: fk.columns.join(", "),
|
|
340
|
-
value: foreignKeyValues.length === 1 ? foreignKeyValues[0] : foreignKeyValues,
|
|
341
|
-
referencedTable,
|
|
342
|
-
referencedColumn: fk.references.columns.join(", ")
|
|
343
|
-
}
|
|
344
|
-
});
|
|
345
|
-
}
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
return errors;
|
|
349
|
-
}
|
|
350
|
-
/**
|
|
351
|
-
* Validate a single JSONL file
|
|
352
|
-
*/
|
|
353
|
-
async validateFile(filePath) {
|
|
354
|
-
const tableName = basename(filePath, ".jsonl");
|
|
355
|
-
const data = await JsonlReader.read(filePath);
|
|
356
|
-
const schema = await SchemaLoader.loadSchema(filePath);
|
|
357
|
-
const errors = [];
|
|
358
|
-
for (let i = 0; i < data.length; i++) {
|
|
359
|
-
const row = data[i];
|
|
360
|
-
const result = schema["~standard"].validate(row);
|
|
361
|
-
if (result instanceof Promise) throw new Error("Asynchronous validation is not supported.");
|
|
362
|
-
if (result.issues && result.issues.length > 0) errors.push({
|
|
363
|
-
file: filePath,
|
|
364
|
-
tableName,
|
|
365
|
-
rowIndex: i,
|
|
366
|
-
issues: result.issues,
|
|
367
|
-
type: "schema"
|
|
368
|
-
});
|
|
369
|
-
}
|
|
370
|
-
return {
|
|
371
|
-
valid: errors.length === 0,
|
|
372
|
-
errors,
|
|
373
|
-
warnings: []
|
|
374
|
-
};
|
|
375
|
-
}
|
|
376
|
-
};
|
|
377
|
-
|
|
378
251
|
//#endregion
|
|
379
252
|
//#region src/runtime.ts
|
|
380
253
|
function detectRuntime() {
|
|
@@ -398,6 +271,7 @@ function createDatabase(path = ":memory:") {
|
|
|
398
271
|
function createNodeDatabase(path) {
|
|
399
272
|
const { DatabaseSync } = __require("node:sqlite");
|
|
400
273
|
const db = new DatabaseSync(path);
|
|
274
|
+
db.exec("PRAGMA foreign_keys = ON");
|
|
401
275
|
return {
|
|
402
276
|
prepare(sql) {
|
|
403
277
|
const stmt = db.prepare(sql);
|
|
@@ -500,57 +374,78 @@ var LinesDB = class LinesDB {
|
|
|
500
374
|
}
|
|
501
375
|
/**
|
|
502
376
|
* Initialize database by loading all JSONL files
|
|
377
|
+
* Uses dependency resolution to ensure foreign key references are loaded in correct order
|
|
503
378
|
*/
|
|
504
379
|
async initialize() {
|
|
505
380
|
this.tables = await DirectoryScanner.scanDirectory(this.config.dataDir);
|
|
506
|
-
|
|
507
|
-
|
|
381
|
+
const loadedTables = /* @__PURE__ */ new Set();
|
|
382
|
+
const loadingTables = /* @__PURE__ */ new Set();
|
|
383
|
+
for (const [tableName] of this.tables) if (!loadedTables.has(tableName)) try {
|
|
384
|
+
await this.loadTableWithDependencies(tableName, loadedTables, loadingTables);
|
|
508
385
|
} catch (error) {
|
|
509
386
|
console.warn(`Warning: Failed to load table '${tableName}':`, error instanceof Error ? error.message : String(error));
|
|
510
387
|
this.tables.delete(tableName);
|
|
388
|
+
this.schemas.delete(tableName);
|
|
389
|
+
this.validationSchemas.delete(tableName);
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
/**
|
|
393
|
+
* Load a table and its dependencies recursively
|
|
394
|
+
*/
|
|
395
|
+
async loadTableWithDependencies(tableName, loadedTables, loadingTables) {
|
|
396
|
+
if (loadedTables.has(tableName)) return;
|
|
397
|
+
if (loadingTables.has(tableName)) throw new Error(`Circular dependency detected for table '${tableName}'`);
|
|
398
|
+
const tableConfig = this.tables.get(tableName);
|
|
399
|
+
if (!tableConfig) throw new Error(`Table configuration not found for '${tableName}'`);
|
|
400
|
+
loadingTables.add(tableName);
|
|
401
|
+
try {
|
|
402
|
+
let foreignKeys;
|
|
403
|
+
try {
|
|
404
|
+
const { pathToFileURL: pathToFileURL$1 } = await import("node:url");
|
|
405
|
+
const schemaModule = await import(`${pathToFileURL$1(tableConfig.jsonlPath.replace(".jsonl", ".schema.ts")).href}?t=${Date.now()}`);
|
|
406
|
+
foreignKeys = (schemaModule.schema || schemaModule.default)?.foreignKeys || schemaModule.foreignKeys;
|
|
407
|
+
} catch {}
|
|
408
|
+
if (foreignKeys && foreignKeys.length > 0) for (const fk of foreignKeys) {
|
|
409
|
+
const referencedTable = fk.references.table;
|
|
410
|
+
if (!loadedTables.has(referencedTable)) if (this.tables.has(referencedTable)) await this.loadTableWithDependencies(referencedTable, loadedTables, loadingTables);
|
|
411
|
+
else throw new Error(`Foreign key reference to non-existent table '${referencedTable}' in table '${tableName}'`);
|
|
412
|
+
}
|
|
413
|
+
if (await this.loadTable(tableName, tableConfig)) loadedTables.add(tableName);
|
|
414
|
+
else this.tables.delete(tableName);
|
|
415
|
+
} finally {
|
|
416
|
+
loadingTables.delete(tableName);
|
|
511
417
|
}
|
|
512
418
|
}
|
|
513
419
|
/**
|
|
514
420
|
* Load a single table from JSONL file
|
|
421
|
+
* @returns true if table was loaded, false if skipped
|
|
515
422
|
*/
|
|
516
423
|
async loadTable(tableName, config) {
|
|
517
424
|
const data = await JsonlReader.read(config.jsonlPath);
|
|
518
|
-
if (data.length === 0) {
|
|
519
|
-
console.warn(`Warning: Table ${tableName} has no data`);
|
|
520
|
-
return;
|
|
521
|
-
}
|
|
522
425
|
let validationSchema = config.validationSchema;
|
|
426
|
+
const schemaMetadata = {};
|
|
523
427
|
if (!validationSchema) try {
|
|
524
428
|
validationSchema = await SchemaLoader.loadSchema(config.jsonlPath);
|
|
525
|
-
} catch (
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
429
|
+
} catch (_error) {}
|
|
430
|
+
if (!config.validationSchema) try {
|
|
431
|
+
const { pathToFileURL: pathToFileURL$1 } = await import("node:url");
|
|
432
|
+
const schemaModule = await import(`${pathToFileURL$1(config.jsonlPath.replace(".jsonl", ".schema.ts")).href}?t=${Date.now()}`);
|
|
433
|
+
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
434
|
+
if (schemaExport?.primaryKey) schemaMetadata.primaryKey = schemaExport.primaryKey;
|
|
435
|
+
else if (schemaModule.primaryKey) schemaMetadata.primaryKey = schemaModule.primaryKey;
|
|
436
|
+
if (schemaExport?.foreignKeys) schemaMetadata.foreignKeys = schemaExport.foreignKeys;
|
|
437
|
+
else if (schemaModule.foreignKeys) schemaMetadata.foreignKeys = schemaModule.foreignKeys;
|
|
438
|
+
if (schemaExport?.indexes) schemaMetadata.indexes = schemaExport.indexes;
|
|
439
|
+
else if (schemaModule.indexes) schemaMetadata.indexes = schemaModule.indexes;
|
|
440
|
+
} catch (_error) {}
|
|
533
441
|
this.validationSchemas.set(tableName, validationSchema);
|
|
534
|
-
let schema;
|
|
535
|
-
if (config.schema) schema = config.schema;
|
|
536
|
-
else if (config.autoInferSchema !== false) schema = JsonlReader.inferSchema(tableName, data);
|
|
537
|
-
else throw new Error(`No schema provided for table ${tableName} and autoInferSchema is disabled`);
|
|
538
|
-
if (validationSchema) {
|
|
539
|
-
const biSchema = validationSchema;
|
|
540
|
-
if (biSchema.primaryKey && !schema.columns.some((col) => col.primaryKey)) for (const pkColumn of biSchema.primaryKey) {
|
|
541
|
-
const col = schema.columns.find((c) => c.name === pkColumn);
|
|
542
|
-
if (col) col.primaryKey = true;
|
|
543
|
-
}
|
|
544
|
-
if (biSchema.foreignKeys) schema.foreignKeys = biSchema.foreignKeys;
|
|
545
|
-
if (biSchema.indexes) schema.indexes = biSchema.indexes;
|
|
546
|
-
}
|
|
547
|
-
this.schemas.set(tableName, schema);
|
|
548
|
-
this.createTable(schema);
|
|
549
442
|
const validationErrors = [];
|
|
443
|
+
const validatedData = [];
|
|
550
444
|
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
551
445
|
const row = data[rowIndex];
|
|
552
446
|
try {
|
|
553
|
-
this.
|
|
447
|
+
const validatedRow = this.validateAndTransform(tableName, row);
|
|
448
|
+
validatedData.push(validatedRow);
|
|
554
449
|
} catch (error) {
|
|
555
450
|
if (error instanceof Error && error.name === "ValidationError") validationErrors.push({
|
|
556
451
|
rowIndex,
|
|
@@ -567,13 +462,34 @@ var LinesDB = class LinesDB {
|
|
|
567
462
|
enhancedError.issues = validationErrors[0].error.issues;
|
|
568
463
|
throw enhancedError;
|
|
569
464
|
}
|
|
570
|
-
|
|
465
|
+
let schema;
|
|
466
|
+
if (config.schema) schema = config.schema;
|
|
467
|
+
else if (config.autoInferSchema !== false) {
|
|
468
|
+
if (validatedData.length === 0) return false;
|
|
469
|
+
schema = JsonlReader.inferSchema(tableName, validatedData);
|
|
470
|
+
} else throw new Error(`No schema provided for table ${tableName} and autoInferSchema is disabled`);
|
|
471
|
+
const biSchema = validationSchema;
|
|
472
|
+
const primaryKey = biSchema?.primaryKey || schemaMetadata.primaryKey;
|
|
473
|
+
const foreignKeys = biSchema?.foreignKeys || schemaMetadata.foreignKeys;
|
|
474
|
+
const indexes = biSchema?.indexes || schemaMetadata.indexes;
|
|
475
|
+
if (primaryKey && !schema.columns.some((col) => col.primaryKey)) {
|
|
476
|
+
const col = schema.columns.find((c) => c.name === primaryKey);
|
|
477
|
+
if (col) col.primaryKey = true;
|
|
478
|
+
} else if (!primaryKey && !schema.columns.some((col) => col.primaryKey)) {
|
|
479
|
+
const idColumn = schema.columns.find((c) => c.name === "id");
|
|
480
|
+
if (idColumn) idColumn.primaryKey = true;
|
|
481
|
+
}
|
|
482
|
+
if (foreignKeys) schema.foreignKeys = foreignKeys;
|
|
483
|
+
if (indexes) schema.indexes = indexes;
|
|
484
|
+
this.schemas.set(tableName, schema);
|
|
485
|
+
this.createTable(schema);
|
|
486
|
+
this.insertData(tableName, schema, validatedData);
|
|
487
|
+
return true;
|
|
571
488
|
}
|
|
572
489
|
/**
|
|
573
490
|
* Create table in SQLite with constraints and indexes
|
|
574
491
|
*/
|
|
575
492
|
createTable(schema) {
|
|
576
|
-
this.db.exec("PRAGMA foreign_keys = ON");
|
|
577
493
|
const quotedTableName = this.quoteTableName(schema.name);
|
|
578
494
|
const columnDefs = schema.columns.map((col) => {
|
|
579
495
|
const sqlType = col.type === "JSON" ? "TEXT" : col.type;
|
|
@@ -585,7 +501,7 @@ var LinesDB = class LinesDB {
|
|
|
585
501
|
});
|
|
586
502
|
const foreignKeyDefs = [];
|
|
587
503
|
if (schema.foreignKeys && schema.foreignKeys.length > 0) for (const fk of schema.foreignKeys) {
|
|
588
|
-
const fkParts = [`FOREIGN KEY (${
|
|
504
|
+
const fkParts = [`FOREIGN KEY (${this.quoteIdentifier(fk.column)})`, `REFERENCES ${this.quoteTableName(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`];
|
|
589
505
|
if (fk.onDelete) fkParts.push(`ON DELETE ${fk.onDelete}`);
|
|
590
506
|
if (fk.onUpdate) fkParts.push(`ON UPDATE ${fk.onUpdate}`);
|
|
591
507
|
foreignKeyDefs.push(fkParts.join(" "));
|
|
@@ -700,17 +616,12 @@ var LinesDB = class LinesDB {
|
|
|
700
616
|
return deserializedRow;
|
|
701
617
|
}
|
|
702
618
|
/**
|
|
703
|
-
* Validate data using StandardSchema
|
|
619
|
+
* Validate data using StandardSchema and return the transformed value
|
|
704
620
|
* Note: Only synchronous validation is supported
|
|
705
621
|
*/
|
|
706
|
-
|
|
622
|
+
validateAndTransform(tableName, data) {
|
|
707
623
|
const schema = this.validationSchemas.get(tableName);
|
|
708
|
-
|
|
709
|
-
if (!schema) {
|
|
710
|
-
console.log(`[LinesDB] No validation schema found for table '${tableName}', skipping validation`);
|
|
711
|
-
return;
|
|
712
|
-
}
|
|
713
|
-
console.log(`[LinesDB] Validating data:`, JSON.stringify(data));
|
|
624
|
+
if (!schema) return data;
|
|
714
625
|
const result = schema["~standard"].validate(data);
|
|
715
626
|
if (result instanceof Promise) throw new Error("Asynchronous validation is not supported. Please use synchronous validation schemas.");
|
|
716
627
|
if (result.issues && result.issues.length > 0) {
|
|
@@ -727,6 +638,17 @@ var LinesDB = class LinesDB {
|
|
|
727
638
|
error.issues = result.issues;
|
|
728
639
|
throw error;
|
|
729
640
|
}
|
|
641
|
+
const transformedValue = "value" in result ? result.value : data;
|
|
642
|
+
const normalizedValue = {};
|
|
643
|
+
for (const [key, value] of Object.entries(transformedValue)) normalizedValue[key] = value === void 0 ? null : value;
|
|
644
|
+
return normalizedValue;
|
|
645
|
+
}
|
|
646
|
+
/**
|
|
647
|
+
* Validate data using StandardSchema (without returning transformed value)
|
|
648
|
+
* Note: Only synchronous validation is supported
|
|
649
|
+
*/
|
|
650
|
+
validateData(tableName, data) {
|
|
651
|
+
this.validateAndTransform(tableName, data);
|
|
730
652
|
}
|
|
731
653
|
/**
|
|
732
654
|
* Insert a row into a table with validation
|
|
@@ -1049,7 +971,7 @@ var LinesDB = class LinesDB {
|
|
|
1049
971
|
await this.sync();
|
|
1050
972
|
return result;
|
|
1051
973
|
} catch (error) {
|
|
1052
|
-
this.db.exec("ROLLBACK");
|
|
974
|
+
if (this.inTransaction) this.db.exec("ROLLBACK");
|
|
1053
975
|
this.inTransaction = false;
|
|
1054
976
|
throw error;
|
|
1055
977
|
}
|
|
@@ -1070,6 +992,135 @@ var LinesDB = class LinesDB {
|
|
|
1070
992
|
}
|
|
1071
993
|
};
|
|
1072
994
|
|
|
995
|
+
//#endregion
|
|
996
|
+
//#region src/validator.ts
|
|
997
|
+
var Validator = class {
|
|
998
|
+
path;
|
|
999
|
+
projectRoot;
|
|
1000
|
+
constructor(options) {
|
|
1001
|
+
this.path = options.path;
|
|
1002
|
+
this.projectRoot = options.projectRoot || process.cwd();
|
|
1003
|
+
}
|
|
1004
|
+
/**
|
|
1005
|
+
* Validate JSONL file(s)
|
|
1006
|
+
*/
|
|
1007
|
+
async validate() {
|
|
1008
|
+
const fullPath = this.path.startsWith("/") ? this.path : join(this.projectRoot, this.path);
|
|
1009
|
+
const stats = await stat(fullPath);
|
|
1010
|
+
if (stats.isDirectory()) return this.validateDirectory(fullPath);
|
|
1011
|
+
else if (stats.isFile() && fullPath.endsWith(".jsonl")) return this.validateFile(fullPath);
|
|
1012
|
+
else throw new Error(`Invalid path: ${this.path}. Must be a directory or .jsonl file.`);
|
|
1013
|
+
}
|
|
1014
|
+
/**
|
|
1015
|
+
* Validate all JSONL files in a directory
|
|
1016
|
+
*/
|
|
1017
|
+
async validateDirectory(dirPath) {
|
|
1018
|
+
const jsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
|
|
1019
|
+
if (jsonlFiles.length === 0) throw new Error(`No JSONL files found in directory: ${dirPath}`);
|
|
1020
|
+
const allErrors = [];
|
|
1021
|
+
const allWarnings = [];
|
|
1022
|
+
const filesWithSchema = [];
|
|
1023
|
+
for (const file of jsonlFiles) if (await SchemaLoader.hasSchema(file)) filesWithSchema.push(file);
|
|
1024
|
+
else {
|
|
1025
|
+
const tableName = basename(file, ".jsonl");
|
|
1026
|
+
allWarnings.push(`Skipping validation for '${tableName}': schema file not found`);
|
|
1027
|
+
}
|
|
1028
|
+
for (const file of filesWithSchema) {
|
|
1029
|
+
const result = await this.validateFile(file);
|
|
1030
|
+
allErrors.push(...result.errors);
|
|
1031
|
+
allWarnings.push(...result.warnings);
|
|
1032
|
+
}
|
|
1033
|
+
if (filesWithSchema.length > 0 && allErrors.length === 0) {
|
|
1034
|
+
const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
|
|
1035
|
+
allErrors.push(...dbErrors);
|
|
1036
|
+
}
|
|
1037
|
+
return {
|
|
1038
|
+
valid: allErrors.length === 0,
|
|
1039
|
+
errors: allErrors,
|
|
1040
|
+
warnings: allWarnings
|
|
1041
|
+
};
|
|
1042
|
+
}
|
|
1043
|
+
/**
|
|
1044
|
+
* Validate by loading data into an actual database
|
|
1045
|
+
* This catches constraint violations (unique, primary key, foreign key, etc.)
|
|
1046
|
+
*/
|
|
1047
|
+
async validateWithDatabase(dirPath, jsonlFiles) {
|
|
1048
|
+
const errors = [];
|
|
1049
|
+
const warnMessages = [];
|
|
1050
|
+
const originalWarn = console.warn;
|
|
1051
|
+
console.warn = (...args) => {
|
|
1052
|
+
const message = args.map((arg) => String(arg)).join(" ");
|
|
1053
|
+
warnMessages.push(message);
|
|
1054
|
+
originalWarn(...args);
|
|
1055
|
+
};
|
|
1056
|
+
try {
|
|
1057
|
+
const db = LinesDB.create({ dataDir: dirPath });
|
|
1058
|
+
await db.initialize();
|
|
1059
|
+
await db.close();
|
|
1060
|
+
for (const message of warnMessages) if (message.includes("Failed to load table")) {
|
|
1061
|
+
const tableNameMatch = message.match(/Failed to load table '([^']+)'/);
|
|
1062
|
+
const tableName = tableNameMatch ? tableNameMatch[1] : "unknown";
|
|
1063
|
+
const file = jsonlFiles.find((f) => basename(f, ".jsonl") === tableName);
|
|
1064
|
+
errors.push({
|
|
1065
|
+
file: file || `${dirPath}/${tableName}.jsonl`,
|
|
1066
|
+
tableName,
|
|
1067
|
+
rowIndex: 0,
|
|
1068
|
+
issues: [{
|
|
1069
|
+
message: message.replace(/^Warning:\s*/, ""),
|
|
1070
|
+
path: []
|
|
1071
|
+
}],
|
|
1072
|
+
type: "schema"
|
|
1073
|
+
});
|
|
1074
|
+
}
|
|
1075
|
+
} catch (error) {
|
|
1076
|
+
errors.push({
|
|
1077
|
+
file: dirPath,
|
|
1078
|
+
tableName: "database",
|
|
1079
|
+
rowIndex: 0,
|
|
1080
|
+
issues: [{
|
|
1081
|
+
message: `Database initialization failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
1082
|
+
path: []
|
|
1083
|
+
}],
|
|
1084
|
+
type: "schema"
|
|
1085
|
+
});
|
|
1086
|
+
} finally {
|
|
1087
|
+
console.warn = originalWarn;
|
|
1088
|
+
}
|
|
1089
|
+
return errors;
|
|
1090
|
+
}
|
|
1091
|
+
/**
|
|
1092
|
+
* Validate a single JSONL file
|
|
1093
|
+
*/
|
|
1094
|
+
async validateFile(filePath) {
|
|
1095
|
+
const tableName = basename(filePath, ".jsonl");
|
|
1096
|
+
const data = await JsonlReader.read(filePath);
|
|
1097
|
+
const schema = await SchemaLoader.loadSchema(filePath);
|
|
1098
|
+
const errors = [];
|
|
1099
|
+
for (let i = 0; i < data.length; i++) {
|
|
1100
|
+
const row = data[i];
|
|
1101
|
+
const result = schema["~standard"].validate(row);
|
|
1102
|
+
if (result instanceof Promise) throw new Error("Asynchronous validation is not supported.");
|
|
1103
|
+
if (result.issues && result.issues.length > 0) errors.push({
|
|
1104
|
+
file: filePath,
|
|
1105
|
+
tableName,
|
|
1106
|
+
rowIndex: i,
|
|
1107
|
+
issues: result.issues,
|
|
1108
|
+
type: "schema"
|
|
1109
|
+
});
|
|
1110
|
+
}
|
|
1111
|
+
if (errors.length === 0) {
|
|
1112
|
+
const dirPath = dirname(filePath);
|
|
1113
|
+
const dbErrors = await this.validateWithDatabase(dirPath, [filePath]);
|
|
1114
|
+
errors.push(...dbErrors);
|
|
1115
|
+
}
|
|
1116
|
+
return {
|
|
1117
|
+
valid: errors.length === 0,
|
|
1118
|
+
errors,
|
|
1119
|
+
warnings: []
|
|
1120
|
+
};
|
|
1121
|
+
}
|
|
1122
|
+
};
|
|
1123
|
+
|
|
1073
1124
|
//#endregion
|
|
1074
1125
|
//#region src/error-formatter.ts
|
|
1075
1126
|
var ErrorFormatter = class {
|
|
@@ -1306,7 +1357,6 @@ program.command("migrate").description("Migrate data with transformation functio
|
|
|
1306
1357
|
process.exit(0);
|
|
1307
1358
|
}
|
|
1308
1359
|
const transformedRows = rowsToMigrate.map((row) => transform(row));
|
|
1309
|
-
console.log("\nApplying migration in transaction...\n");
|
|
1310
1360
|
try {
|
|
1311
1361
|
await db.transaction(async () => {
|
|
1312
1362
|
db.batchUpdate(tableName, transformedRows, { validate: true });
|
|
@@ -1351,6 +1401,7 @@ program.command("migrate").description("Migrate data with transformation functio
|
|
|
1351
1401
|
}
|
|
1352
1402
|
} else if (error instanceof Error) {
|
|
1353
1403
|
console.error(`\n ${error.message}`);
|
|
1404
|
+
if (options.verbose && error.stack) console.error(`\nStack trace:\n${error.stack}`);
|
|
1354
1405
|
if (error.message.includes("UNIQUE constraint failed") || error.message.includes("FOREIGN KEY constraint failed") || error.message.includes("NOT NULL constraint failed") || error.message.includes("CHECK constraint failed")) {
|
|
1355
1406
|
console.error("\n This is a SQLite constraint violation.");
|
|
1356
1407
|
console.error(" Please check your data and schema requirements.");
|