@toiroakr/lines-db 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/cli.js CHANGED
@@ -119,6 +119,54 @@ function sanitizeIdentifier(value) {
119
119
  return value.replace(/[^A-Za-z0-9_$]/g, "");
120
120
  }
121
121
 
122
+ //#endregion
123
+ //#region src/runtime.ts
124
+ function detectRuntime() {
125
+ if (typeof process !== "undefined" && process.versions && process.versions.node) return "node";
126
+ return "unknown";
127
+ }
128
+ const RUNTIME = detectRuntime();
129
+
130
+ //#endregion
131
+ //#region src/sqlite-adapter.ts
132
+ /**
133
+ * Create a SQLite database instance for Node.js
134
+ */
135
+ function createDatabase(path = ":memory:") {
136
+ if (RUNTIME === "node") return createNodeDatabase(path);
137
+ else throw new Error(`Unsupported runtime: ${RUNTIME}`);
138
+ }
139
+ /**
140
+ * Create a Node.js SQLite database
141
+ */
142
+ function createNodeDatabase(path) {
143
+ const { DatabaseSync } = __require("node:sqlite");
144
+ const db = new DatabaseSync(path);
145
+ db.exec("PRAGMA foreign_keys = ON");
146
+ return {
147
+ prepare(sql) {
148
+ const stmt = db.prepare(sql);
149
+ return {
150
+ run(...params) {
151
+ return stmt.run(...params);
152
+ },
153
+ get(...params) {
154
+ return stmt.get(...params);
155
+ },
156
+ all(...params) {
157
+ return stmt.all(...params);
158
+ }
159
+ };
160
+ },
161
+ exec(sql) {
162
+ db.exec(sql);
163
+ },
164
+ close() {
165
+ db.close();
166
+ }
167
+ };
168
+ }
169
+
122
170
  //#endregion
123
171
  //#region src/jsonl-reader.ts
124
172
  var JsonlReader = class {
@@ -199,6 +247,31 @@ var JsonlReader = class {
199
247
  }
200
248
  };
201
249
 
250
+ //#endregion
251
+ //#region src/jsonl-writer.ts
252
+ var JsonlWriter = class {
253
+ /**
254
+ * Write data to JSONL file
255
+ */
256
+ static async write(filePath, data) {
257
+ await writeFile(filePath, data.map((obj) => JSON.stringify(obj)).join("\n") + "\n", "utf-8");
258
+ }
259
+ /**
260
+ * Append data to JSONL file
261
+ */
262
+ static async append(filePath, data) {
263
+ const { readFile: readFile$1, writeFile: writeFile$1 } = await import("node:fs/promises");
264
+ try {
265
+ const existing = await readFile$1(filePath, "utf-8");
266
+ const lines = data.map((obj) => JSON.stringify(obj)).join("\n");
267
+ await writeFile$1(filePath, existing.trim() + "\n" + lines + "\n", "utf-8");
268
+ } catch (error) {
269
+ if (error.code === "ENOENT") await this.write(filePath, data);
270
+ else throw error;
271
+ }
272
+ }
273
+ };
274
+
202
275
  //#endregion
203
276
  //#region src/schema-loader.ts
204
277
  var SchemaLoader = class {
@@ -248,79 +321,6 @@ var SchemaLoader = class {
248
321
  }
249
322
  };
250
323
 
251
- //#endregion
252
- //#region src/runtime.ts
253
- function detectRuntime() {
254
- if (typeof process !== "undefined" && process.versions && process.versions.node) return "node";
255
- return "unknown";
256
- }
257
- const RUNTIME = detectRuntime();
258
-
259
- //#endregion
260
- //#region src/sqlite-adapter.ts
261
- /**
262
- * Create a SQLite database instance for Node.js
263
- */
264
- function createDatabase(path = ":memory:") {
265
- if (RUNTIME === "node") return createNodeDatabase(path);
266
- else throw new Error(`Unsupported runtime: ${RUNTIME}`);
267
- }
268
- /**
269
- * Create a Node.js SQLite database
270
- */
271
- function createNodeDatabase(path) {
272
- const { DatabaseSync } = __require("node:sqlite");
273
- const db = new DatabaseSync(path);
274
- db.exec("PRAGMA foreign_keys = ON");
275
- return {
276
- prepare(sql) {
277
- const stmt = db.prepare(sql);
278
- return {
279
- run(...params) {
280
- return stmt.run(...params);
281
- },
282
- get(...params) {
283
- return stmt.get(...params);
284
- },
285
- all(...params) {
286
- return stmt.all(...params);
287
- }
288
- };
289
- },
290
- exec(sql) {
291
- db.exec(sql);
292
- },
293
- close() {
294
- db.close();
295
- }
296
- };
297
- }
298
-
299
- //#endregion
300
- //#region src/jsonl-writer.ts
301
- var JsonlWriter = class {
302
- /**
303
- * Write data to JSONL file
304
- */
305
- static async write(filePath, data) {
306
- await writeFile(filePath, data.map((obj) => JSON.stringify(obj)).join("\n") + "\n", "utf-8");
307
- }
308
- /**
309
- * Append data to JSONL file
310
- */
311
- static async append(filePath, data) {
312
- const { readFile: readFile$1, writeFile: writeFile$1 } = await import("node:fs/promises");
313
- try {
314
- const existing = await readFile$1(filePath, "utf-8");
315
- const lines = data.map((obj) => JSON.stringify(obj)).join("\n");
316
- await writeFile$1(filePath, existing.trim() + "\n" + lines + "\n", "utf-8");
317
- } catch (error) {
318
- if (error.code === "ENOENT") await this.write(filePath, data);
319
- else throw error;
320
- }
321
- }
322
- };
323
-
324
324
  //#endregion
325
325
  //#region src/directory-scanner.ts
326
326
  var DirectoryScanner = class {
@@ -373,27 +373,49 @@ var LinesDB = class LinesDB {
373
373
  return new LinesDB(config, dbPath);
374
374
  }
375
375
  /**
376
- * Initialize database by loading all JSONL files
376
+ * Initialize database by loading all JSONL files or a specific table
377
377
  * Uses dependency resolution to ensure foreign key references are loaded in correct order
378
+ * @param options Optional configuration for initialization
379
+ * @param options.tableName Optional table name to initialize. If not provided, initializes all tables
380
+ * @param options.detailedValidate If true, performs detailed validation by inserting rows one by one to catch constraint violations
381
+ * @param options.transform Optional transform function to apply to rows before validation (only applied to the specified tableName)
382
+ * @returns ValidationResult containing validation status, errors, and warnings
378
383
  */
379
- async initialize() {
384
+ async initialize(options) {
385
+ const allErrors = [];
386
+ const allWarnings = [];
387
+ const tableName = options?.tableName;
388
+ const detailedValidate = options?.detailedValidate ?? false;
389
+ const transform = options?.transform;
380
390
  this.tables = await DirectoryScanner.scanDirectory(this.config.dataDir);
391
+ const tablesToLoad = tableName ? [tableName] : Array.from(this.tables.keys());
392
+ for (const tableNameToLoad of tablesToLoad) if (!this.tables.has(tableNameToLoad)) throw new Error(`Table '${tableNameToLoad}' not found in directory '${this.config.dataDir}'`);
381
393
  const loadedTables = /* @__PURE__ */ new Set();
382
394
  const loadingTables = /* @__PURE__ */ new Set();
383
- for (const [tableName] of this.tables) if (!loadedTables.has(tableName)) try {
384
- await this.loadTableWithDependencies(tableName, loadedTables, loadingTables);
385
- } catch (error) {
386
- console.warn(`Warning: Failed to load table '${tableName}':`, error instanceof Error ? error.message : String(error));
387
- this.tables.delete(tableName);
388
- this.schemas.delete(tableName);
389
- this.validationSchemas.delete(tableName);
395
+ const attemptedTables = /* @__PURE__ */ new Set();
396
+ for (const tableNameToLoad of tablesToLoad) if (!attemptedTables.has(tableNameToLoad)) {
397
+ const tableTransform = tableNameToLoad === tableName ? transform : void 0;
398
+ const { errors, warnings } = await this.loadTableWithDependencies(tableNameToLoad, loadedTables, loadingTables, attemptedTables, detailedValidate, tableTransform);
399
+ allErrors.push(...errors);
400
+ allWarnings.push(...warnings);
390
401
  }
402
+ return {
403
+ valid: allErrors.length === 0,
404
+ errors: allErrors,
405
+ warnings: allWarnings
406
+ };
391
407
  }
392
408
  /**
393
409
  * Load a table and its dependencies recursively
394
410
  */
395
- async loadTableWithDependencies(tableName, loadedTables, loadingTables) {
396
- if (loadedTables.has(tableName)) return;
411
+ async loadTableWithDependencies(tableName, loadedTables, loadingTables, attemptedTables, detailedValidate, transform) {
412
+ const errors = [];
413
+ const warnings = [];
414
+ if (attemptedTables.has(tableName)) return {
415
+ errors,
416
+ warnings
417
+ };
418
+ attemptedTables.add(tableName);
397
419
  if (loadingTables.has(tableName)) throw new Error(`Circular dependency detected for table '${tableName}'`);
398
420
  const tableConfig = this.tables.get(tableName);
399
421
  if (!tableConfig) throw new Error(`Table configuration not found for '${tableName}'`);
@@ -407,21 +429,35 @@ var LinesDB = class LinesDB {
407
429
  } catch {}
408
430
  if (foreignKeys && foreignKeys.length > 0) for (const fk of foreignKeys) {
409
431
  const referencedTable = fk.references.table;
410
- if (!loadedTables.has(referencedTable)) if (this.tables.has(referencedTable)) await this.loadTableWithDependencies(referencedTable, loadedTables, loadingTables);
411
- else throw new Error(`Foreign key reference to non-existent table '${referencedTable}' in table '${tableName}'`);
432
+ if (referencedTable === tableName) continue;
433
+ if (!attemptedTables.has(referencedTable)) if (this.tables.has(referencedTable)) {
434
+ const depResult = await this.loadTableWithDependencies(referencedTable, loadedTables, loadingTables, attemptedTables, detailedValidate, void 0);
435
+ errors.push(...depResult.errors);
436
+ warnings.push(...depResult.warnings);
437
+ } else throw new Error(`Foreign key reference to non-existent table '${referencedTable}' in table '${tableName}'`);
438
+ }
439
+ const { loaded, errors: loadErrors } = await this.loadTable(tableName, tableConfig, detailedValidate, transform);
440
+ errors.push(...loadErrors);
441
+ if (loaded) loadedTables.add(tableName);
442
+ else {
443
+ warnings.push(`Table '${tableName}' was not loaded (no data or skipped)`);
444
+ this.tables.delete(tableName);
412
445
  }
413
- if (await this.loadTable(tableName, tableConfig)) loadedTables.add(tableName);
414
- else this.tables.delete(tableName);
415
446
  } finally {
416
447
  loadingTables.delete(tableName);
417
448
  }
449
+ return {
450
+ errors,
451
+ warnings
452
+ };
418
453
  }
419
454
  /**
420
455
  * Load a single table from JSONL file
421
- * @returns true if table was loaded, false if skipped
456
+ * @returns Object with loaded status and validation errors
422
457
  */
423
- async loadTable(tableName, config) {
424
- const data = await JsonlReader.read(config.jsonlPath);
458
+ async loadTable(tableName, config, detailedValidate, transform) {
459
+ let data = await JsonlReader.read(config.jsonlPath);
460
+ if (transform) data = data.map((row) => transform(row));
425
461
  let validationSchema = config.validationSchema;
426
462
  const schemaMetadata = {};
427
463
  if (!validationSchema) try {
@@ -437,7 +473,15 @@ var LinesDB = class LinesDB {
437
473
  else if (schemaModule.foreignKeys) schemaMetadata.foreignKeys = schemaModule.foreignKeys;
438
474
  if (schemaExport?.indexes) schemaMetadata.indexes = schemaExport.indexes;
439
475
  else if (schemaModule.indexes) schemaMetadata.indexes = schemaModule.indexes;
440
- } catch (_error) {}
476
+ if (process.env.DEBUG_LINES_DB) {
477
+ console.log(`[lines-db] Schema metadata for ${tableName}:`);
478
+ console.log(` primaryKey: ${schemaMetadata.primaryKey}`);
479
+ console.log(` foreignKeys: ${JSON.stringify(schemaMetadata.foreignKeys)}`);
480
+ console.log(` indexes: ${JSON.stringify(schemaMetadata.indexes)}`);
481
+ }
482
+ } catch (_error) {
483
+ if (process.env.DEBUG_LINES_DB) console.warn(`[lines-db] Failed to load schema metadata for ${tableName}:`, _error instanceof Error ? _error.message : String(_error));
484
+ }
441
485
  this.validationSchemas.set(tableName, validationSchema);
442
486
  const validationErrors = [];
443
487
  const validatedData = [];
@@ -455,18 +499,32 @@ var LinesDB = class LinesDB {
455
499
  else throw error;
456
500
  }
457
501
  }
458
- if (validationErrors.length > 0) {
459
- const enhancedError = /* @__PURE__ */ new Error(`Validation failed for ${validationErrors.length} row(s) in table ${tableName}`);
460
- enhancedError.name = "ValidationError";
461
- enhancedError.validationErrors = validationErrors;
462
- enhancedError.issues = validationErrors[0].error.issues;
463
- throw enhancedError;
464
- }
502
+ const validationErrorDetails = validationErrors.map((ve) => ({
503
+ file: config.jsonlPath,
504
+ tableName,
505
+ rowIndex: ve.rowIndex,
506
+ issues: ve.error.issues,
507
+ type: "schema"
508
+ }));
509
+ if (validationErrors.length > 0) return {
510
+ loaded: false,
511
+ errors: validationErrorDetails
512
+ };
465
513
  let schema;
466
- if (config.schema) schema = config.schema;
467
- else if (config.autoInferSchema !== false) {
468
- if (validatedData.length === 0) return false;
469
- schema = JsonlReader.inferSchema(tableName, validatedData);
514
+ let inferredSchema;
515
+ if (validatedData.length > 0) inferredSchema = JsonlReader.inferSchema(tableName, validatedData);
516
+ if (config.schema) {
517
+ schema = config.schema;
518
+ if (inferredSchema) for (const inferredCol of inferredSchema.columns) {
519
+ const schemaCol = schema.columns.find((c) => c.name === inferredCol.name);
520
+ if (schemaCol && inferredCol.valueType && !schemaCol.valueType) schemaCol.valueType = inferredCol.valueType;
521
+ }
522
+ } else if (config.autoInferSchema !== false) {
523
+ if (validatedData.length === 0) return {
524
+ loaded: false,
525
+ errors: []
526
+ };
527
+ schema = inferredSchema;
470
528
  } else throw new Error(`No schema provided for table ${tableName} and autoInferSchema is disabled`);
471
529
  const biSchema = validationSchema;
472
530
  const primaryKey = biSchema?.primaryKey || schemaMetadata.primaryKey;
@@ -480,23 +538,43 @@ var LinesDB = class LinesDB {
480
538
  if (idColumn) idColumn.primaryKey = true;
481
539
  }
482
540
  if (foreignKeys) schema.foreignKeys = foreignKeys;
483
- if (indexes) schema.indexes = indexes;
541
+ if (indexes) {
542
+ schema.indexes = indexes;
543
+ for (const index of indexes) if (index.unique && index.columns.length === 1) {
544
+ const col = schema.columns.find((c) => c.name === index.columns[0]);
545
+ if (col && !col.unique && !col.primaryKey) col.unique = true;
546
+ }
547
+ }
484
548
  this.schemas.set(tableName, schema);
485
549
  this.createTable(schema);
486
- this.insertData(tableName, schema, validatedData);
487
- return true;
550
+ if (detailedValidate) {
551
+ const insertErrors = this.insertDataWithDetailedValidation(tableName, schema, validatedData, config.jsonlPath);
552
+ if (insertErrors.length > 0) return {
553
+ loaded: false,
554
+ errors: insertErrors
555
+ };
556
+ } else this.insertData(tableName, schema, validatedData);
557
+ return {
558
+ loaded: true,
559
+ errors: []
560
+ };
488
561
  }
489
562
  /**
490
563
  * Create table in SQLite with constraints and indexes
491
564
  */
492
565
  createTable(schema) {
493
566
  const quotedTableName = this.quoteTableName(schema.name);
567
+ const uniqueColumns = /* @__PURE__ */ new Set();
568
+ for (const col of schema.columns) if (col.unique) uniqueColumns.add(col.name);
569
+ if (schema.indexes) {
570
+ for (const index of schema.indexes) if (index.unique && index.columns.length === 1) uniqueColumns.add(index.columns[0]);
571
+ }
494
572
  const columnDefs = schema.columns.map((col) => {
495
573
  const sqlType = col.type === "JSON" ? "TEXT" : col.type;
496
574
  const parts = [this.quoteIdentifier(col.name), sqlType];
497
575
  if (col.primaryKey) parts.push("PRIMARY KEY");
498
576
  if (col.notNull) parts.push("NOT NULL");
499
- if (col.unique) parts.push("UNIQUE");
577
+ if (uniqueColumns.has(col.name) && !col.primaryKey) parts.push("UNIQUE");
500
578
  return parts.join(" ");
501
579
  });
502
580
  const foreignKeyDefs = [];
@@ -529,18 +607,85 @@ var LinesDB = class LinesDB {
529
607
  return `"${identifier.replace(/"/g, "\"\"")}"`;
530
608
  }
531
609
  /**
532
- * Insert data into table
610
+ * Insert data into table using batch insert (multiple rows per SQL)
611
+ * SQLite has a parameter limit (default 999), so we batch rows accordingly
612
+ * Throws exception if any constraint violation occurs
533
613
  */
534
614
  insertData(tableName, schema, data) {
615
+ if (data.length === 0) return;
616
+ const columnNames = schema.columns.map((col) => col.name);
617
+ const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
618
+ const columnCount = columnNames.length;
619
+ const maxBatchSize = Math.floor(900 / columnCount);
620
+ const batchSize = Math.max(1, Math.min(maxBatchSize, 100));
621
+ for (let i = 0; i < data.length; i += batchSize) {
622
+ const batch = data.slice(i, i + batchSize);
623
+ const rowPlaceholders = columnNames.map(() => "?").join(", ");
624
+ const valuesPlaceholders = batch.map(() => `(${rowPlaceholders})`).join(", ");
625
+ const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES ${valuesPlaceholders}`;
626
+ const values = [];
627
+ for (const row of batch) for (const col of columnNames) values.push(this.normalizeValue(row[col]));
628
+ this.db.prepare(sql).run(...values);
629
+ }
630
+ }
631
+ /**
632
+ * Insert data into table one row at a time with detailed error reporting
633
+ * This is used for validation to catch constraint violations
634
+ */
635
+ insertDataWithDetailedValidation(tableName, schema, data, filePath) {
636
+ const errors = [];
535
637
  const columnNames = schema.columns.map((col) => col.name);
536
638
  const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
537
639
  const placeholders = columnNames.map(() => "?").join(", ");
538
640
  const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
539
641
  const stmt = this.db.prepare(sql);
540
- for (const row of data) {
541
- const values = columnNames.map((col) => this.normalizeValue(row[col]));
542
- stmt.run(...values);
642
+ for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
643
+ const row = data[rowIndex];
644
+ try {
645
+ const values = columnNames.map((col) => this.normalizeValue(row[col]));
646
+ stmt.run(...values);
647
+ } catch (error) {
648
+ const constraintError = this.analyzeConstraintError(error, filePath, tableName, rowIndex, row, schema.foreignKeys || []);
649
+ if (constraintError) errors.push(constraintError);
650
+ }
543
651
  }
652
+ return errors;
653
+ }
654
+ /**
655
+ * Analyze constraint error and extract detailed information
656
+ */
657
+ analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys) {
658
+ const errorMessage = error instanceof Error ? error.message : String(error);
659
+ if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
660
+ const fkValue = row[fk.column];
661
+ if (fkValue === null || fkValue === void 0) continue;
662
+ try {
663
+ const result = this.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [this.normalizeValue(fkValue)]);
664
+ if (result.length > 0 && result[0].count === 0) return {
665
+ file,
666
+ tableName,
667
+ rowIndex,
668
+ issues: [],
669
+ type: "foreignKey",
670
+ foreignKeyError: {
671
+ column: fk.column,
672
+ value: fkValue,
673
+ referencedTable: fk.references.table,
674
+ referencedColumn: fk.references.column
675
+ }
676
+ };
677
+ } catch (_) {}
678
+ }
679
+ return {
680
+ file,
681
+ tableName,
682
+ rowIndex,
683
+ issues: [{
684
+ message: errorMessage,
685
+ path: []
686
+ }],
687
+ type: "schema"
688
+ };
544
689
  }
545
690
  /**
546
691
  * Execute a raw SQL query
@@ -952,9 +1097,13 @@ var LinesDB = class LinesDB {
952
1097
  /**
953
1098
  * Sync database changes back to JSONL files
954
1099
  * Uses backward transformation when available
1100
+ * @param tableName Optional table name to sync. If not provided, syncs all loaded tables
955
1101
  */
956
- async sync() {
957
- for (const [tableName] of this.tables) await this.syncTable(tableName);
1102
+ async sync(tableName) {
1103
+ if (tableName) {
1104
+ if (!this.schemas.has(tableName)) throw new Error(`Table '${tableName}' is not loaded`);
1105
+ await this.syncTable(tableName);
1106
+ } else for (const [name] of this.schemas) await this.syncTable(name);
958
1107
  }
959
1108
  /**
960
1109
  * Execute a function within a transaction
@@ -992,241 +1141,6 @@ var LinesDB = class LinesDB {
992
1141
  }
993
1142
  };
994
1143
 
995
- //#endregion
996
- //#region src/validator.ts
997
- var Validator = class {
998
- path;
999
- projectRoot;
1000
- constructor(options) {
1001
- this.path = options.path;
1002
- this.projectRoot = options.projectRoot || process.cwd();
1003
- }
1004
- /**
1005
- * Validate JSONL file(s)
1006
- */
1007
- async validate() {
1008
- const fullPath = this.path.startsWith("/") ? this.path : join(this.projectRoot, this.path);
1009
- const stats = await stat(fullPath);
1010
- if (stats.isDirectory()) return this.validateDirectory(fullPath);
1011
- else if (stats.isFile() && fullPath.endsWith(".jsonl")) return this.validateFile(fullPath);
1012
- else throw new Error(`Invalid path: ${this.path}. Must be a directory or .jsonl file.`);
1013
- }
1014
- /**
1015
- * Validate all JSONL files in a directory
1016
- */
1017
- async validateDirectory(dirPath) {
1018
- const jsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
1019
- if (jsonlFiles.length === 0) throw new Error(`No JSONL files found in directory: ${dirPath}`);
1020
- const allErrors = [];
1021
- const allWarnings = [];
1022
- const filesWithSchema = [];
1023
- for (const file of jsonlFiles) if (await SchemaLoader.hasSchema(file)) filesWithSchema.push(file);
1024
- else {
1025
- const tableName = basename(file, ".jsonl");
1026
- allWarnings.push(`Skipping validation for '${tableName}': schema file not found`);
1027
- }
1028
- for (const file of filesWithSchema) {
1029
- const result = await this.validateFile(file);
1030
- allErrors.push(...result.errors);
1031
- allWarnings.push(...result.warnings);
1032
- }
1033
- if (filesWithSchema.length > 0 && allErrors.length === 0) {
1034
- const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
1035
- allErrors.push(...dbErrors);
1036
- }
1037
- return {
1038
- valid: allErrors.length === 0,
1039
- errors: allErrors,
1040
- warnings: allWarnings
1041
- };
1042
- }
1043
- /**
1044
- * Validate by loading data into database one row at a time
1045
- * This catches constraint violations and extracts detailed error information
1046
- */
1047
- async validateWithDatabase(dirPath, jsonlFiles) {
1048
- const errors = [];
1049
- try {
1050
- const db = LinesDB.create({ dataDir: ":memory:" });
1051
- for (const file of jsonlFiles) {
1052
- const tableName = basename(file, ".jsonl");
1053
- const data = await JsonlReader.read(file);
1054
- let schema;
1055
- let foreignKeys = [];
1056
- let indexes = [];
1057
- let primaryKey;
1058
- try {
1059
- schema = await SchemaLoader.loadSchema(file);
1060
- const { pathToFileURL: pathToFileURL$1 } = await import("node:url");
1061
- const schemaModule = await import(`${pathToFileURL$1(file.replace(".jsonl", ".schema.ts")).href}?t=${Date.now()}`);
1062
- const schemaExport = schemaModule.schema || schemaModule.default;
1063
- if (schemaExport?.foreignKeys) foreignKeys = schemaExport.foreignKeys;
1064
- if (schemaExport?.indexes) indexes = schemaExport.indexes;
1065
- if (schemaExport?.primaryKey) primaryKey = schemaExport.primaryKey;
1066
- } catch (_error) {
1067
- continue;
1068
- }
1069
- try {
1070
- const tableSchema = this.createTableSchema(tableName, data, schema, foreignKeys, indexes, primaryKey);
1071
- this.createTableInDb(db, tableSchema);
1072
- for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
1073
- const row = data[rowIndex];
1074
- try {
1075
- this.insertRowIntoDb(db, tableName, tableSchema, row);
1076
- } catch (error) {
1077
- const constraintError = this.analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db);
1078
- if (constraintError) errors.push(constraintError);
1079
- }
1080
- }
1081
- } catch (_error) {
1082
- continue;
1083
- }
1084
- }
1085
- await db.close();
1086
- } catch (error) {
1087
- errors.push({
1088
- file: dirPath,
1089
- tableName: "database",
1090
- rowIndex: 0,
1091
- issues: [{
1092
- message: `Database initialization failed: ${error instanceof Error ? error.message : String(error)}`,
1093
- path: []
1094
- }],
1095
- type: "schema"
1096
- });
1097
- }
1098
- return errors;
1099
- }
1100
- /**
1101
- * Create table schema from data and validation schema
1102
- */
1103
- createTableSchema(tableName, data, validationSchema, foreignKeys, indexes, primaryKey) {
1104
- if (data.length === 0) throw new Error(`No data found in ${tableName}`);
1105
- const schema = JsonlReader.inferSchema(tableName, data);
1106
- if (primaryKey) {
1107
- const pkColumn = schema.columns.find((col) => col.name === primaryKey);
1108
- if (pkColumn) pkColumn.primaryKey = true;
1109
- } else if (!schema.columns.some((col) => col.primaryKey)) {
1110
- const idColumn = schema.columns.find((c) => c.name === "id");
1111
- if (idColumn) idColumn.primaryKey = true;
1112
- }
1113
- if (foreignKeys && foreignKeys.length > 0) schema.foreignKeys = foreignKeys;
1114
- if (indexes && indexes.length > 0) schema.indexes = indexes;
1115
- return schema;
1116
- }
1117
- /**
1118
- * Create table in database
1119
- */
1120
- createTableInDb(db, schema) {
1121
- const columns = schema.columns.map((col) => {
1122
- let colDef = `${this.quoteIdentifier(col.name)} ${col.type.toUpperCase()}`;
1123
- if (col.primaryKey) colDef += " PRIMARY KEY";
1124
- return colDef;
1125
- });
1126
- if (schema.foreignKeys && schema.foreignKeys.length > 0) for (const fk of schema.foreignKeys) columns.push(`FOREIGN KEY (${this.quoteIdentifier(fk.column)}) REFERENCES ${this.quoteIdentifier(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`);
1127
- const sql = `CREATE TABLE IF NOT EXISTS ${this.quoteIdentifier(schema.name)} (${columns.join(", ")})`;
1128
- db.execute(sql);
1129
- if (schema.indexes && schema.indexes.length > 0) for (const index of schema.indexes) {
1130
- const indexName = index.name || `idx_${schema.name}_${index.columns.join("_")}`;
1131
- const uniqueKeyword = index.unique ? "UNIQUE" : "";
1132
- const indexColumns = index.columns.map((col) => this.quoteIdentifier(col)).join(", ");
1133
- const indexSql = `CREATE ${uniqueKeyword} INDEX IF NOT EXISTS ${this.quoteIdentifier(indexName)} ON ${this.quoteIdentifier(schema.name)} (${indexColumns})`;
1134
- db.execute(indexSql);
1135
- }
1136
- }
1137
- /**
1138
- * Insert a row into database
1139
- */
1140
- insertRowIntoDb(db, tableName, schema, row) {
1141
- const columnNames = schema.columns.map((col) => col.name);
1142
- const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
1143
- const placeholders = columnNames.map(() => "?").join(", ");
1144
- const sql = `INSERT INTO ${this.quoteIdentifier(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
1145
- const values = columnNames.map((col) => {
1146
- const value = row[col];
1147
- if (value === null || value === void 0) return null;
1148
- if (typeof value === "object") return JSON.stringify(value);
1149
- if (typeof value === "boolean") return value ? 1 : 0;
1150
- return value;
1151
- });
1152
- db.execute(sql, values);
1153
- }
1154
- /**
1155
- * Analyze constraint error and extract detailed information
1156
- */
1157
- analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db) {
1158
- const errorMessage = error instanceof Error ? error.message : String(error);
1159
- if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
1160
- const fkValue = row[fk.column];
1161
- if (fkValue === null || fkValue === void 0) continue;
1162
- try {
1163
- const result = db.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [fkValue]);
1164
- if (result.length > 0 && result[0].count === 0) return {
1165
- file,
1166
- tableName,
1167
- rowIndex,
1168
- issues: [],
1169
- type: "foreignKey",
1170
- foreignKeyError: {
1171
- column: fk.column,
1172
- value: fkValue,
1173
- referencedTable: fk.references.table,
1174
- referencedColumn: fk.references.column
1175
- }
1176
- };
1177
- } catch (_) {}
1178
- }
1179
- return {
1180
- file,
1181
- tableName,
1182
- rowIndex,
1183
- issues: [{
1184
- message: errorMessage,
1185
- path: []
1186
- }],
1187
- type: "schema"
1188
- };
1189
- }
1190
- /**
1191
- * Quote SQL identifier
1192
- */
1193
- quoteIdentifier(name) {
1194
- return `"${name.replace(/"/g, "\"\"")}"`;
1195
- }
1196
- /**
1197
- * Validate a single JSONL file
1198
- */
1199
- async validateFile(filePath) {
1200
- const tableName = basename(filePath, ".jsonl");
1201
- const data = await JsonlReader.read(filePath);
1202
- const schema = await SchemaLoader.loadSchema(filePath);
1203
- const errors = [];
1204
- for (let i = 0; i < data.length; i++) {
1205
- const row = data[i];
1206
- const result = schema["~standard"].validate(row);
1207
- if (result instanceof Promise) throw new Error("Asynchronous validation is not supported.");
1208
- if (result.issues && result.issues.length > 0) errors.push({
1209
- file: filePath,
1210
- tableName,
1211
- rowIndex: i,
1212
- issues: result.issues,
1213
- type: "schema"
1214
- });
1215
- }
1216
- if (errors.length === 0) {
1217
- const dirPath = dirname(filePath);
1218
- const allJsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
1219
- const dbErrors = await this.validateWithDatabase(dirPath, allJsonlFiles);
1220
- errors.push(...dbErrors.filter((e) => e.file === filePath));
1221
- }
1222
- return {
1223
- valid: errors.length === 0,
1224
- errors,
1225
- warnings: []
1226
- };
1227
- }
1228
- };
1229
-
1230
1144
  //#endregion
1231
1145
  //#region src/error-formatter.ts
1232
1146
  var ErrorFormatter = class {
@@ -1374,7 +1288,24 @@ program.command("generate").description("Generate TypeScript type definitions fr
1374
1288
  });
1375
1289
  program.command("validate").description("Validate JSONL file(s) against schema").argument("<path>", "File or directory path to validate").option("-v, --verbose", "Show verbose error output", false).action(async (path, options) => {
1376
1290
  try {
1377
- const result = await new Validator({ path }).validate();
1291
+ const stats = await stat(path);
1292
+ let dataDir;
1293
+ let tableName;
1294
+ if (stats.isDirectory()) dataDir = path;
1295
+ else if (stats.isFile() && path.endsWith(".jsonl")) {
1296
+ dataDir = dirname(path);
1297
+ tableName = basename(path, ".jsonl");
1298
+ } else throw new Error(`Invalid path: ${path}. Must be a directory or .jsonl file.`);
1299
+ const db = LinesDB.create({ dataDir });
1300
+ let result;
1301
+ try {
1302
+ result = await db.initialize({
1303
+ tableName,
1304
+ detailedValidate: true
1305
+ });
1306
+ } finally {
1307
+ await db.close();
1308
+ }
1378
1309
  if (result.warnings.length > 0) {
1379
1310
  for (const warning of result.warnings) console.warn(styleText("yellow", `⚠ ${warning}`));
1380
1311
  console.log("");
@@ -1449,7 +1380,19 @@ async function migrateDirectory(dirPath, transformStr, options) {
1449
1380
  }
1450
1381
  console.log(`Found ${jsonlFiles.length} JSONL file(s) in directory`);
1451
1382
  const db = LinesDB.create({ dataDir: dirPath });
1452
- await db.initialize();
1383
+ const initResult = await db.initialize({ detailedValidate: true });
1384
+ if (initResult.warnings.length > 0) for (const warning of initResult.warnings) console.warn(styleText("yellow", `⚠ ${warning}`));
1385
+ if (!initResult.valid) {
1386
+ console.error(`Error: Failed to initialize database due to validation errors:`);
1387
+ const formatter = new ErrorFormatter({ verbose: options.verbose });
1388
+ for (const error of initResult.errors) console.error(formatter.formatValidationErrors([{
1389
+ file: error.file,
1390
+ rowIndex: error.rowIndex,
1391
+ issues: error.issues
1392
+ }]));
1393
+ await db.close();
1394
+ process.exit(1);
1395
+ }
1453
1396
  const tableNames = db.getTableNames();
1454
1397
  if (tableNames.length === 0) {
1455
1398
  console.error(`Error: No tables could be loaded from directory: ${dirPath}`);
@@ -1534,89 +1477,128 @@ async function migrateFile(filePath, transformStr, options) {
1534
1477
  }
1535
1478
  const lastSlashIndex = filePath.lastIndexOf("/");
1536
1479
  const dataDir = lastSlashIndex > 0 ? filePath.substring(0, lastSlashIndex) : ".";
1537
- const db = LinesDB.create({ dataDir });
1538
- await db.initialize();
1480
+ let transform;
1539
1481
  try {
1540
- const transform = runInSandbox(`(${transformStr})`);
1541
- if (typeof transform !== "function") {
1482
+ const parsedTransform = runInSandbox(`(${transformStr})`);
1483
+ if (typeof parsedTransform !== "function") {
1542
1484
  console.error("Error: Transform must be a function");
1543
1485
  process.exit(1);
1544
1486
  }
1487
+ transform = parsedTransform;
1488
+ } catch (error) {
1489
+ console.error("Error: Failed to parse transform function");
1490
+ console.error(` ${error instanceof Error ? error.message : String(error)}`);
1491
+ process.exit(1);
1492
+ }
1493
+ const db = LinesDB.create({ dataDir });
1494
+ const initResult = await db.initialize({
1495
+ tableName,
1496
+ transform,
1497
+ detailedValidate: true
1498
+ });
1499
+ if (initResult.warnings.length > 0) for (const warning of initResult.warnings) console.warn(styleText("yellow", `⚠ ${warning}`));
1500
+ if (!initResult.valid) {
1501
+ console.error(`Error: Failed to initialize database due to validation errors:`);
1502
+ const formatter = new ErrorFormatter({ verbose: options.verbose });
1503
+ for (const error of initResult.errors) console.error(formatter.formatValidationErrors([{
1504
+ file: error.file,
1505
+ rowIndex: error.rowIndex,
1506
+ issues: error.issues
1507
+ }]));
1508
+ await db.close();
1509
+ process.exit(1);
1510
+ }
1511
+ try {
1545
1512
  let filter = void 0;
1546
1513
  if (options.filter) try {
1547
1514
  filter = JSON.parse(options.filter);
1548
1515
  } catch {
1549
1516
  filter = runInSandbox(`(${options.filter})`);
1550
1517
  }
1551
- let rowsToMigrate;
1552
- try {
1553
- rowsToMigrate = filter ? db.find(tableName, filter) : db.find(tableName);
1554
- } catch (error) {
1555
- console.error(`Error: Failed to access table '${tableName}'`);
1556
- console.error(` ${error instanceof Error ? error.message : String(error)}`);
1557
- console.error(`\nThe table may have failed to load during initialization.`);
1558
- console.error(`Check the table's data and schema for any constraint violations.`);
1559
- await db.close();
1560
- process.exit(1);
1561
- }
1562
- console.log(`Found ${rowsToMigrate.length} row(s) to migrate in table '${tableName}'`);
1563
- if (rowsToMigrate.length === 0) {
1564
- console.log("No rows to migrate. Exiting.");
1565
- await db.close();
1566
- process.exit(0);
1567
- }
1568
- const transformedRows = rowsToMigrate.map((row) => transform(row));
1569
- try {
1570
- await db.transaction(async () => {
1571
- db.batchUpdate(tableName, transformedRows, { validate: true });
1572
- });
1518
+ if (filter) {
1519
+ let rowsToMigrate;
1520
+ try {
1521
+ rowsToMigrate = db.find(tableName, filter);
1522
+ } catch (error) {
1523
+ console.error(`Error: Failed to access table '${tableName}'`);
1524
+ console.error(` ${error instanceof Error ? error.message : String(error)}`);
1525
+ console.error(`\nThe table may have failed to load during initialization.`);
1526
+ console.error(`Check the table's data and schema for any constraint violations.`);
1527
+ await db.close();
1528
+ process.exit(1);
1529
+ }
1530
+ console.log(`Found ${rowsToMigrate.length} row(s) to migrate in table '${tableName}'`);
1531
+ if (rowsToMigrate.length === 0) {
1532
+ console.log("No rows to migrate. Exiting.");
1533
+ await db.close();
1534
+ process.exit(0);
1535
+ }
1536
+ const transformedRows = rowsToMigrate.map((row) => transform(row));
1537
+ try {
1538
+ await db.transaction(async () => {
1539
+ db.batchUpdate(tableName, transformedRows, { validate: true });
1540
+ });
1541
+ await db.close();
1542
+ console.log(`\nMigration completed successfully:`);
1543
+ console.log(` ✓ ${rowsToMigrate.length} row(s) updated`);
1544
+ process.exit(0);
1545
+ } catch (error) {
1546
+ await db.close();
1547
+ if (options.errorOutput) try {
1548
+ const jsonlContent = transformedRows.map((row) => JSON.stringify(row)).join("\n");
1549
+ await writeFile(options.errorOutput, jsonlContent, "utf-8");
1550
+ console.error(styleText("yellow", `\n⚠ Transformed data (${transformedRows.length} rows) written to: ${options.errorOutput}`));
1551
+ } catch (writeError) {
1552
+ console.error(styleText("red", `\n✗ Failed to write error output file: ${writeError instanceof Error ? writeError.message : String(writeError)}`));
1553
+ }
1554
+ const formatter = new ErrorFormatter({ verbose: options.verbose });
1555
+ console.error(formatter.formatMigrationFailureHeader());
1556
+ if (error instanceof Error && error.name === "ValidationError") {
1557
+ const validationError = error;
1558
+ if (validationError.validationErrors) {
1559
+ console.error(`\nFound ${validationError.validationErrors.length} validation error(s) in transformed data:\n`);
1560
+ const errorInfos = validationError.validationErrors.map(({ rowIndex, rowData, error: rowError }) => ({
1561
+ file: filePath,
1562
+ rowIndex,
1563
+ issues: rowError.issues,
1564
+ data: rowData,
1565
+ originalData: rowsToMigrate[rowIndex]
1566
+ }));
1567
+ const formatted = formatter.formatValidationErrors(errorInfos);
1568
+ console.error(formatted);
1569
+ } else {
1570
+ console.error("\nValidation error:\n");
1571
+ const errorInfo = {
1572
+ file: filePath,
1573
+ rowIndex: 0,
1574
+ issues: validationError.issues
1575
+ };
1576
+ const formatted = formatter.formatValidationErrors([errorInfo]);
1577
+ console.error(formatted);
1578
+ }
1579
+ } else if (error instanceof Error) {
1580
+ console.error(`\n ${error.message}`);
1581
+ if (options.verbose && error.stack) console.error(`\nStack trace:\n${error.stack}`);
1582
+ if (error.message.includes("UNIQUE constraint failed") || error.message.includes("FOREIGN KEY constraint failed") || error.message.includes("NOT NULL constraint failed") || error.message.includes("CHECK constraint failed")) {
1583
+ console.error("\n This is a SQLite constraint violation.");
1584
+ console.error(" Please check your data and schema requirements.");
1585
+ }
1586
+ } else console.error(`\n ${String(error)}`);
1587
+ console.error("");
1588
+ process.exit(1);
1589
+ }
1590
+ } else try {
1591
+ const allRows = db.find(tableName);
1592
+ console.log(`Migrated ${allRows.length} row(s) in table '${tableName}'`);
1593
+ await db.sync(tableName);
1573
1594
  await db.close();
1574
1595
  console.log(`\nMigration completed successfully:`);
1575
- console.log(` ✓ ${rowsToMigrate.length} row(s) updated`);
1596
+ console.log(` ✓ ${allRows.length} row(s) updated`);
1576
1597
  process.exit(0);
1577
1598
  } catch (error) {
1578
1599
  await db.close();
1579
- if (options.errorOutput) try {
1580
- const jsonlContent = transformedRows.map((row) => JSON.stringify(row)).join("\n");
1581
- await writeFile(options.errorOutput, jsonlContent, "utf-8");
1582
- console.error(styleText("yellow", `\n⚠ Transformed data (${transformedRows.length} rows) written to: ${options.errorOutput}`));
1583
- } catch (writeError) {
1584
- console.error(styleText("red", `\n✗ Failed to write error output file: ${writeError instanceof Error ? writeError.message : String(writeError)}`));
1585
- }
1586
- const formatter = new ErrorFormatter({ verbose: options.verbose });
1587
- console.error(formatter.formatMigrationFailureHeader());
1588
- if (error instanceof Error && error.name === "ValidationError") {
1589
- const validationError = error;
1590
- if (validationError.validationErrors) {
1591
- console.error(`\nFound ${validationError.validationErrors.length} validation error(s) in transformed data:\n`);
1592
- const errorInfos = validationError.validationErrors.map(({ rowIndex, rowData, error: rowError }) => ({
1593
- file: filePath,
1594
- rowIndex,
1595
- issues: rowError.issues,
1596
- data: rowData,
1597
- originalData: rowsToMigrate[rowIndex]
1598
- }));
1599
- const formatted = formatter.formatValidationErrors(errorInfos);
1600
- console.error(formatted);
1601
- } else {
1602
- console.error("\nValidation error:\n");
1603
- const errorInfo = {
1604
- file: filePath,
1605
- rowIndex: 0,
1606
- issues: validationError.issues
1607
- };
1608
- const formatted = formatter.formatValidationErrors([errorInfo]);
1609
- console.error(formatted);
1610
- }
1611
- } else if (error instanceof Error) {
1612
- console.error(`\n ${error.message}`);
1613
- if (options.verbose && error.stack) console.error(`\nStack trace:\n${error.stack}`);
1614
- if (error.message.includes("UNIQUE constraint failed") || error.message.includes("FOREIGN KEY constraint failed") || error.message.includes("NOT NULL constraint failed") || error.message.includes("CHECK constraint failed")) {
1615
- console.error("\n This is a SQLite constraint violation.");
1616
- console.error(" Please check your data and schema requirements.");
1617
- }
1618
- } else console.error(`\n ${String(error)}`);
1619
- console.error("");
1600
+ console.error("Error: Failed to sync changes to file");
1601
+ console.error(` ${error instanceof Error ? error.message : String(error)}`);
1620
1602
  process.exit(1);
1621
1603
  }
1622
1604
  } catch (error) {