@toiroakr/lines-db 0.4.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/cli.js CHANGED
@@ -119,6 +119,54 @@ function sanitizeIdentifier(value) {
119
119
  return value.replace(/[^A-Za-z0-9_$]/g, "");
120
120
  }
121
121
 
122
+ //#endregion
123
+ //#region src/runtime.ts
124
+ function detectRuntime() {
125
+ if (typeof process !== "undefined" && process.versions && process.versions.node) return "node";
126
+ return "unknown";
127
+ }
128
+ const RUNTIME = detectRuntime();
129
+
130
+ //#endregion
131
+ //#region src/sqlite-adapter.ts
132
+ /**
133
+ * Create a SQLite database instance for Node.js
134
+ */
135
+ function createDatabase(path = ":memory:") {
136
+ if (RUNTIME === "node") return createNodeDatabase(path);
137
+ else throw new Error(`Unsupported runtime: ${RUNTIME}`);
138
+ }
139
+ /**
140
+ * Create a Node.js SQLite database
141
+ */
142
+ function createNodeDatabase(path) {
143
+ const { DatabaseSync } = __require("node:sqlite");
144
+ const db = new DatabaseSync(path);
145
+ db.exec("PRAGMA foreign_keys = ON");
146
+ return {
147
+ prepare(sql) {
148
+ const stmt = db.prepare(sql);
149
+ return {
150
+ run(...params) {
151
+ return stmt.run(...params);
152
+ },
153
+ get(...params) {
154
+ return stmt.get(...params);
155
+ },
156
+ all(...params) {
157
+ return stmt.all(...params);
158
+ }
159
+ };
160
+ },
161
+ exec(sql) {
162
+ db.exec(sql);
163
+ },
164
+ close() {
165
+ db.close();
166
+ }
167
+ };
168
+ }
169
+
122
170
  //#endregion
123
171
  //#region src/jsonl-reader.ts
124
172
  var JsonlReader = class {
@@ -199,6 +247,31 @@ var JsonlReader = class {
199
247
  }
200
248
  };
201
249
 
250
+ //#endregion
251
+ //#region src/jsonl-writer.ts
252
+ var JsonlWriter = class {
253
+ /**
254
+ * Write data to JSONL file
255
+ */
256
+ static async write(filePath, data) {
257
+ await writeFile(filePath, data.map((obj) => JSON.stringify(obj)).join("\n") + "\n", "utf-8");
258
+ }
259
+ /**
260
+ * Append data to JSONL file
261
+ */
262
+ static async append(filePath, data) {
263
+ const { readFile: readFile$1, writeFile: writeFile$1 } = await import("node:fs/promises");
264
+ try {
265
+ const existing = await readFile$1(filePath, "utf-8");
266
+ const lines = data.map((obj) => JSON.stringify(obj)).join("\n");
267
+ await writeFile$1(filePath, existing.trim() + "\n" + lines + "\n", "utf-8");
268
+ } catch (error) {
269
+ if (error.code === "ENOENT") await this.write(filePath, data);
270
+ else throw error;
271
+ }
272
+ }
273
+ };
274
+
202
275
  //#endregion
203
276
  //#region src/schema-loader.ts
204
277
  var SchemaLoader = class {
@@ -248,79 +321,6 @@ var SchemaLoader = class {
248
321
  }
249
322
  };
250
323
 
251
- //#endregion
252
- //#region src/runtime.ts
253
- function detectRuntime() {
254
- if (typeof process !== "undefined" && process.versions && process.versions.node) return "node";
255
- return "unknown";
256
- }
257
- const RUNTIME = detectRuntime();
258
-
259
- //#endregion
260
- //#region src/sqlite-adapter.ts
261
- /**
262
- * Create a SQLite database instance for Node.js
263
- */
264
- function createDatabase(path = ":memory:") {
265
- if (RUNTIME === "node") return createNodeDatabase(path);
266
- else throw new Error(`Unsupported runtime: ${RUNTIME}`);
267
- }
268
- /**
269
- * Create a Node.js SQLite database
270
- */
271
- function createNodeDatabase(path) {
272
- const { DatabaseSync } = __require("node:sqlite");
273
- const db = new DatabaseSync(path);
274
- db.exec("PRAGMA foreign_keys = ON");
275
- return {
276
- prepare(sql) {
277
- const stmt = db.prepare(sql);
278
- return {
279
- run(...params) {
280
- return stmt.run(...params);
281
- },
282
- get(...params) {
283
- return stmt.get(...params);
284
- },
285
- all(...params) {
286
- return stmt.all(...params);
287
- }
288
- };
289
- },
290
- exec(sql) {
291
- db.exec(sql);
292
- },
293
- close() {
294
- db.close();
295
- }
296
- };
297
- }
298
-
299
- //#endregion
300
- //#region src/jsonl-writer.ts
301
- var JsonlWriter = class {
302
- /**
303
- * Write data to JSONL file
304
- */
305
- static async write(filePath, data) {
306
- await writeFile(filePath, data.map((obj) => JSON.stringify(obj)).join("\n") + "\n", "utf-8");
307
- }
308
- /**
309
- * Append data to JSONL file
310
- */
311
- static async append(filePath, data) {
312
- const { readFile: readFile$1, writeFile: writeFile$1 } = await import("node:fs/promises");
313
- try {
314
- const existing = await readFile$1(filePath, "utf-8");
315
- const lines = data.map((obj) => JSON.stringify(obj)).join("\n");
316
- await writeFile$1(filePath, existing.trim() + "\n" + lines + "\n", "utf-8");
317
- } catch (error) {
318
- if (error.code === "ENOENT") await this.write(filePath, data);
319
- else throw error;
320
- }
321
- }
322
- };
323
-
324
324
  //#endregion
325
325
  //#region src/directory-scanner.ts
326
326
  var DirectoryScanner = class {
@@ -373,27 +373,49 @@ var LinesDB = class LinesDB {
373
373
  return new LinesDB(config, dbPath);
374
374
  }
375
375
  /**
376
- * Initialize database by loading all JSONL files
376
+ * Initialize database by loading all JSONL files or a specific table
377
377
  * Uses dependency resolution to ensure foreign key references are loaded in correct order
378
+ * @param options Optional configuration for initialization
379
+ * @param options.tableName Optional table name to initialize. If not provided, initializes all tables
380
+ * @param options.detailedValidate If true, performs detailed validation by inserting rows one by one to catch constraint violations
381
+ * @param options.transform Optional transform function to apply to rows before validation (only applied to the specified tableName)
382
+ * @returns ValidationResult containing validation status, errors, and warnings
378
383
  */
379
- async initialize() {
384
+ async initialize(options) {
385
+ const allErrors = [];
386
+ const allWarnings = [];
387
+ const tableName = options?.tableName;
388
+ const detailedValidate = options?.detailedValidate ?? false;
389
+ const transform = options?.transform;
380
390
  this.tables = await DirectoryScanner.scanDirectory(this.config.dataDir);
391
+ const tablesToLoad = tableName ? [tableName] : Array.from(this.tables.keys());
392
+ for (const tableNameToLoad of tablesToLoad) if (!this.tables.has(tableNameToLoad)) throw new Error(`Table '${tableNameToLoad}' not found in directory '${this.config.dataDir}'`);
381
393
  const loadedTables = /* @__PURE__ */ new Set();
382
394
  const loadingTables = /* @__PURE__ */ new Set();
383
- for (const [tableName] of this.tables) if (!loadedTables.has(tableName)) try {
384
- await this.loadTableWithDependencies(tableName, loadedTables, loadingTables);
385
- } catch (error) {
386
- console.warn(`Warning: Failed to load table '${tableName}':`, error instanceof Error ? error.message : String(error));
387
- this.tables.delete(tableName);
388
- this.schemas.delete(tableName);
389
- this.validationSchemas.delete(tableName);
395
+ const attemptedTables = /* @__PURE__ */ new Set();
396
+ for (const tableNameToLoad of tablesToLoad) if (!attemptedTables.has(tableNameToLoad)) {
397
+ const tableTransform = tableNameToLoad === tableName ? transform : void 0;
398
+ const { errors, warnings } = await this.loadTableWithDependencies(tableNameToLoad, loadedTables, loadingTables, attemptedTables, detailedValidate, tableTransform);
399
+ allErrors.push(...errors);
400
+ allWarnings.push(...warnings);
390
401
  }
402
+ return {
403
+ valid: allErrors.length === 0,
404
+ errors: allErrors,
405
+ warnings: allWarnings
406
+ };
391
407
  }
392
408
  /**
393
409
  * Load a table and its dependencies recursively
394
410
  */
395
- async loadTableWithDependencies(tableName, loadedTables, loadingTables) {
396
- if (loadedTables.has(tableName)) return;
411
+ async loadTableWithDependencies(tableName, loadedTables, loadingTables, attemptedTables, detailedValidate, transform) {
412
+ const errors = [];
413
+ const warnings = [];
414
+ if (attemptedTables.has(tableName)) return {
415
+ errors,
416
+ warnings
417
+ };
418
+ attemptedTables.add(tableName);
397
419
  if (loadingTables.has(tableName)) throw new Error(`Circular dependency detected for table '${tableName}'`);
398
420
  const tableConfig = this.tables.get(tableName);
399
421
  if (!tableConfig) throw new Error(`Table configuration not found for '${tableName}'`);
@@ -407,21 +429,35 @@ var LinesDB = class LinesDB {
407
429
  } catch {}
408
430
  if (foreignKeys && foreignKeys.length > 0) for (const fk of foreignKeys) {
409
431
  const referencedTable = fk.references.table;
410
- if (!loadedTables.has(referencedTable)) if (this.tables.has(referencedTable)) await this.loadTableWithDependencies(referencedTable, loadedTables, loadingTables);
411
- else throw new Error(`Foreign key reference to non-existent table '${referencedTable}' in table '${tableName}'`);
432
+ if (referencedTable === tableName) continue;
433
+ if (!attemptedTables.has(referencedTable)) if (this.tables.has(referencedTable)) {
434
+ const depResult = await this.loadTableWithDependencies(referencedTable, loadedTables, loadingTables, attemptedTables, detailedValidate, void 0);
435
+ errors.push(...depResult.errors);
436
+ warnings.push(...depResult.warnings);
437
+ } else throw new Error(`Foreign key reference to non-existent table '${referencedTable}' in table '${tableName}'`);
438
+ }
439
+ const { loaded, errors: loadErrors } = await this.loadTable(tableName, tableConfig, detailedValidate, transform);
440
+ errors.push(...loadErrors);
441
+ if (loaded) loadedTables.add(tableName);
442
+ else {
443
+ warnings.push(`Table '${tableName}' was not loaded (no data or skipped)`);
444
+ this.tables.delete(tableName);
412
445
  }
413
- if (await this.loadTable(tableName, tableConfig)) loadedTables.add(tableName);
414
- else this.tables.delete(tableName);
415
446
  } finally {
416
447
  loadingTables.delete(tableName);
417
448
  }
449
+ return {
450
+ errors,
451
+ warnings
452
+ };
418
453
  }
419
454
  /**
420
455
  * Load a single table from JSONL file
421
- * @returns true if table was loaded, false if skipped
456
+ * @returns Object with loaded status and validation errors
422
457
  */
423
- async loadTable(tableName, config) {
424
- const data = await JsonlReader.read(config.jsonlPath);
458
+ async loadTable(tableName, config, detailedValidate, transform) {
459
+ let data = await JsonlReader.read(config.jsonlPath);
460
+ if (transform) data = data.map((row) => transform(row));
425
461
  let validationSchema = config.validationSchema;
426
462
  const schemaMetadata = {};
427
463
  if (!validationSchema) try {
@@ -455,18 +491,32 @@ var LinesDB = class LinesDB {
455
491
  else throw error;
456
492
  }
457
493
  }
458
- if (validationErrors.length > 0) {
459
- const enhancedError = /* @__PURE__ */ new Error(`Validation failed for ${validationErrors.length} row(s) in table ${tableName}`);
460
- enhancedError.name = "ValidationError";
461
- enhancedError.validationErrors = validationErrors;
462
- enhancedError.issues = validationErrors[0].error.issues;
463
- throw enhancedError;
464
- }
494
+ const validationErrorDetails = validationErrors.map((ve) => ({
495
+ file: config.jsonlPath,
496
+ tableName,
497
+ rowIndex: ve.rowIndex,
498
+ issues: ve.error.issues,
499
+ type: "schema"
500
+ }));
501
+ if (validationErrors.length > 0) return {
502
+ loaded: false,
503
+ errors: validationErrorDetails
504
+ };
465
505
  let schema;
466
- if (config.schema) schema = config.schema;
467
- else if (config.autoInferSchema !== false) {
468
- if (validatedData.length === 0) return false;
469
- schema = JsonlReader.inferSchema(tableName, validatedData);
506
+ let inferredSchema;
507
+ if (validatedData.length > 0) inferredSchema = JsonlReader.inferSchema(tableName, validatedData);
508
+ if (config.schema) {
509
+ schema = config.schema;
510
+ if (inferredSchema) for (const inferredCol of inferredSchema.columns) {
511
+ const schemaCol = schema.columns.find((c) => c.name === inferredCol.name);
512
+ if (schemaCol && inferredCol.valueType && !schemaCol.valueType) schemaCol.valueType = inferredCol.valueType;
513
+ }
514
+ } else if (config.autoInferSchema !== false) {
515
+ if (validatedData.length === 0) return {
516
+ loaded: false,
517
+ errors: []
518
+ };
519
+ schema = inferredSchema;
470
520
  } else throw new Error(`No schema provided for table ${tableName} and autoInferSchema is disabled`);
471
521
  const biSchema = validationSchema;
472
522
  const primaryKey = biSchema?.primaryKey || schemaMetadata.primaryKey;
@@ -483,8 +533,17 @@ var LinesDB = class LinesDB {
483
533
  if (indexes) schema.indexes = indexes;
484
534
  this.schemas.set(tableName, schema);
485
535
  this.createTable(schema);
486
- this.insertData(tableName, schema, validatedData);
487
- return true;
536
+ if (detailedValidate) {
537
+ const insertErrors = this.insertDataWithDetailedValidation(tableName, schema, validatedData, config.jsonlPath);
538
+ if (insertErrors.length > 0) return {
539
+ loaded: false,
540
+ errors: insertErrors
541
+ };
542
+ } else this.insertData(tableName, schema, validatedData);
543
+ return {
544
+ loaded: true,
545
+ errors: []
546
+ };
488
547
  }
489
548
  /**
490
549
  * Create table in SQLite with constraints and indexes
@@ -529,18 +588,85 @@ var LinesDB = class LinesDB {
529
588
  return `"${identifier.replace(/"/g, "\"\"")}"`;
530
589
  }
531
590
  /**
532
- * Insert data into table
591
+ * Insert data into table using batch insert (multiple rows per SQL)
592
+ * SQLite has a parameter limit (default 999), so we batch rows accordingly
593
+ * Throws exception if any constraint violation occurs
533
594
  */
534
595
  insertData(tableName, schema, data) {
596
+ if (data.length === 0) return;
597
+ const columnNames = schema.columns.map((col) => col.name);
598
+ const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
599
+ const columnCount = columnNames.length;
600
+ const maxBatchSize = Math.floor(900 / columnCount);
601
+ const batchSize = Math.max(1, Math.min(maxBatchSize, 100));
602
+ for (let i = 0; i < data.length; i += batchSize) {
603
+ const batch = data.slice(i, i + batchSize);
604
+ const rowPlaceholders = columnNames.map(() => "?").join(", ");
605
+ const valuesPlaceholders = batch.map(() => `(${rowPlaceholders})`).join(", ");
606
+ const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES ${valuesPlaceholders}`;
607
+ const values = [];
608
+ for (const row of batch) for (const col of columnNames) values.push(this.normalizeValue(row[col]));
609
+ this.db.prepare(sql).run(...values);
610
+ }
611
+ }
612
+ /**
613
+ * Insert data into table one row at a time with detailed error reporting
614
+ * This is used for validation to catch constraint violations
615
+ */
616
+ insertDataWithDetailedValidation(tableName, schema, data, filePath) {
617
+ const errors = [];
535
618
  const columnNames = schema.columns.map((col) => col.name);
536
619
  const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
537
620
  const placeholders = columnNames.map(() => "?").join(", ");
538
621
  const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
539
622
  const stmt = this.db.prepare(sql);
540
- for (const row of data) {
541
- const values = columnNames.map((col) => this.normalizeValue(row[col]));
542
- stmt.run(...values);
623
+ for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
624
+ const row = data[rowIndex];
625
+ try {
626
+ const values = columnNames.map((col) => this.normalizeValue(row[col]));
627
+ stmt.run(...values);
628
+ } catch (error) {
629
+ const constraintError = this.analyzeConstraintError(error, filePath, tableName, rowIndex, row, schema.foreignKeys || []);
630
+ if (constraintError) errors.push(constraintError);
631
+ }
543
632
  }
633
+ return errors;
634
+ }
635
+ /**
636
+ * Analyze constraint error and extract detailed information
637
+ */
638
+ analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys) {
639
+ const errorMessage = error instanceof Error ? error.message : String(error);
640
+ if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
641
+ const fkValue = row[fk.column];
642
+ if (fkValue === null || fkValue === void 0) continue;
643
+ try {
644
+ const result = this.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [this.normalizeValue(fkValue)]);
645
+ if (result.length > 0 && result[0].count === 0) return {
646
+ file,
647
+ tableName,
648
+ rowIndex,
649
+ issues: [],
650
+ type: "foreignKey",
651
+ foreignKeyError: {
652
+ column: fk.column,
653
+ value: fkValue,
654
+ referencedTable: fk.references.table,
655
+ referencedColumn: fk.references.column
656
+ }
657
+ };
658
+ } catch (_) {}
659
+ }
660
+ return {
661
+ file,
662
+ tableName,
663
+ rowIndex,
664
+ issues: [{
665
+ message: errorMessage,
666
+ path: []
667
+ }],
668
+ type: "schema"
669
+ };
544
670
  }
545
671
  /**
546
672
  * Execute a raw SQL query
@@ -952,9 +1078,13 @@ var LinesDB = class LinesDB {
952
1078
  /**
953
1079
  * Sync database changes back to JSONL files
954
1080
  * Uses backward transformation when available
1081
+ * @param tableName Optional table name to sync. If not provided, syncs all loaded tables
955
1082
  */
956
- async sync() {
957
- for (const [tableName] of this.tables) await this.syncTable(tableName);
1083
+ async sync(tableName) {
1084
+ if (tableName) {
1085
+ if (!this.schemas.has(tableName)) throw new Error(`Table '${tableName}' is not loaded`);
1086
+ await this.syncTable(tableName);
1087
+ } else for (const [name] of this.schemas) await this.syncTable(name);
958
1088
  }
959
1089
  /**
960
1090
  * Execute a function within a transaction
@@ -992,241 +1122,6 @@ var LinesDB = class LinesDB {
992
1122
  }
993
1123
  };
994
1124
 
995
- //#endregion
996
- //#region src/validator.ts
997
- var Validator = class {
998
- path;
999
- projectRoot;
1000
- constructor(options) {
1001
- this.path = options.path;
1002
- this.projectRoot = options.projectRoot || process.cwd();
1003
- }
1004
- /**
1005
- * Validate JSONL file(s)
1006
- */
1007
- async validate() {
1008
- const fullPath = this.path.startsWith("/") ? this.path : join(this.projectRoot, this.path);
1009
- const stats = await stat(fullPath);
1010
- if (stats.isDirectory()) return this.validateDirectory(fullPath);
1011
- else if (stats.isFile() && fullPath.endsWith(".jsonl")) return this.validateFile(fullPath);
1012
- else throw new Error(`Invalid path: ${this.path}. Must be a directory or .jsonl file.`);
1013
- }
1014
- /**
1015
- * Validate all JSONL files in a directory
1016
- */
1017
- async validateDirectory(dirPath) {
1018
- const jsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
1019
- if (jsonlFiles.length === 0) throw new Error(`No JSONL files found in directory: ${dirPath}`);
1020
- const allErrors = [];
1021
- const allWarnings = [];
1022
- const filesWithSchema = [];
1023
- for (const file of jsonlFiles) if (await SchemaLoader.hasSchema(file)) filesWithSchema.push(file);
1024
- else {
1025
- const tableName = basename(file, ".jsonl");
1026
- allWarnings.push(`Skipping validation for '${tableName}': schema file not found`);
1027
- }
1028
- for (const file of filesWithSchema) {
1029
- const result = await this.validateFile(file);
1030
- allErrors.push(...result.errors);
1031
- allWarnings.push(...result.warnings);
1032
- }
1033
- if (filesWithSchema.length > 0 && allErrors.length === 0) {
1034
- const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
1035
- allErrors.push(...dbErrors);
1036
- }
1037
- return {
1038
- valid: allErrors.length === 0,
1039
- errors: allErrors,
1040
- warnings: allWarnings
1041
- };
1042
- }
1043
- /**
1044
- * Validate by loading data into database one row at a time
1045
- * This catches constraint violations and extracts detailed error information
1046
- */
1047
- async validateWithDatabase(dirPath, jsonlFiles) {
1048
- const errors = [];
1049
- try {
1050
- const db = LinesDB.create({ dataDir: ":memory:" });
1051
- for (const file of jsonlFiles) {
1052
- const tableName = basename(file, ".jsonl");
1053
- const data = await JsonlReader.read(file);
1054
- let schema;
1055
- let foreignKeys = [];
1056
- let indexes = [];
1057
- let primaryKey;
1058
- try {
1059
- schema = await SchemaLoader.loadSchema(file);
1060
- const { pathToFileURL: pathToFileURL$1 } = await import("node:url");
1061
- const schemaModule = await import(`${pathToFileURL$1(file.replace(".jsonl", ".schema.ts")).href}?t=${Date.now()}`);
1062
- const schemaExport = schemaModule.schema || schemaModule.default;
1063
- if (schemaExport?.foreignKeys) foreignKeys = schemaExport.foreignKeys;
1064
- if (schemaExport?.indexes) indexes = schemaExport.indexes;
1065
- if (schemaExport?.primaryKey) primaryKey = schemaExport.primaryKey;
1066
- } catch (_error) {
1067
- continue;
1068
- }
1069
- try {
1070
- const tableSchema = this.createTableSchema(tableName, data, schema, foreignKeys, indexes, primaryKey);
1071
- this.createTableInDb(db, tableSchema);
1072
- for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
1073
- const row = data[rowIndex];
1074
- try {
1075
- this.insertRowIntoDb(db, tableName, tableSchema, row);
1076
- } catch (error) {
1077
- const constraintError = this.analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db);
1078
- if (constraintError) errors.push(constraintError);
1079
- }
1080
- }
1081
- } catch (_error) {
1082
- continue;
1083
- }
1084
- }
1085
- await db.close();
1086
- } catch (error) {
1087
- errors.push({
1088
- file: dirPath,
1089
- tableName: "database",
1090
- rowIndex: 0,
1091
- issues: [{
1092
- message: `Database initialization failed: ${error instanceof Error ? error.message : String(error)}`,
1093
- path: []
1094
- }],
1095
- type: "schema"
1096
- });
1097
- }
1098
- return errors;
1099
- }
1100
- /**
1101
- * Create table schema from data and validation schema
1102
- */
1103
- createTableSchema(tableName, data, validationSchema, foreignKeys, indexes, primaryKey) {
1104
- if (data.length === 0) throw new Error(`No data found in ${tableName}`);
1105
- const schema = JsonlReader.inferSchema(tableName, data);
1106
- if (primaryKey) {
1107
- const pkColumn = schema.columns.find((col) => col.name === primaryKey);
1108
- if (pkColumn) pkColumn.primaryKey = true;
1109
- } else if (!schema.columns.some((col) => col.primaryKey)) {
1110
- const idColumn = schema.columns.find((c) => c.name === "id");
1111
- if (idColumn) idColumn.primaryKey = true;
1112
- }
1113
- if (foreignKeys && foreignKeys.length > 0) schema.foreignKeys = foreignKeys;
1114
- if (indexes && indexes.length > 0) schema.indexes = indexes;
1115
- return schema;
1116
- }
1117
- /**
1118
- * Create table in database
1119
- */
1120
- createTableInDb(db, schema) {
1121
- const columns = schema.columns.map((col) => {
1122
- let colDef = `${this.quoteIdentifier(col.name)} ${col.type.toUpperCase()}`;
1123
- if (col.primaryKey) colDef += " PRIMARY KEY";
1124
- return colDef;
1125
- });
1126
- if (schema.foreignKeys && schema.foreignKeys.length > 0) for (const fk of schema.foreignKeys) columns.push(`FOREIGN KEY (${this.quoteIdentifier(fk.column)}) REFERENCES ${this.quoteIdentifier(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`);
1127
- const sql = `CREATE TABLE IF NOT EXISTS ${this.quoteIdentifier(schema.name)} (${columns.join(", ")})`;
1128
- db.execute(sql);
1129
- if (schema.indexes && schema.indexes.length > 0) for (const index of schema.indexes) {
1130
- const indexName = index.name || `idx_${schema.name}_${index.columns.join("_")}`;
1131
- const uniqueKeyword = index.unique ? "UNIQUE" : "";
1132
- const indexColumns = index.columns.map((col) => this.quoteIdentifier(col)).join(", ");
1133
- const indexSql = `CREATE ${uniqueKeyword} INDEX IF NOT EXISTS ${this.quoteIdentifier(indexName)} ON ${this.quoteIdentifier(schema.name)} (${indexColumns})`;
1134
- db.execute(indexSql);
1135
- }
1136
- }
1137
- /**
1138
- * Insert a row into database
1139
- */
1140
- insertRowIntoDb(db, tableName, schema, row) {
1141
- const columnNames = schema.columns.map((col) => col.name);
1142
- const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
1143
- const placeholders = columnNames.map(() => "?").join(", ");
1144
- const sql = `INSERT INTO ${this.quoteIdentifier(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
1145
- const values = columnNames.map((col) => {
1146
- const value = row[col];
1147
- if (value === null || value === void 0) return null;
1148
- if (typeof value === "object") return JSON.stringify(value);
1149
- if (typeof value === "boolean") return value ? 1 : 0;
1150
- return value;
1151
- });
1152
- db.execute(sql, values);
1153
- }
1154
- /**
1155
- * Analyze constraint error and extract detailed information
1156
- */
1157
- analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db) {
1158
- const errorMessage = error instanceof Error ? error.message : String(error);
1159
- if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
1160
- const fkValue = row[fk.column];
1161
- if (fkValue === null || fkValue === void 0) continue;
1162
- try {
1163
- const result = db.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [fkValue]);
1164
- if (result.length > 0 && result[0].count === 0) return {
1165
- file,
1166
- tableName,
1167
- rowIndex,
1168
- issues: [],
1169
- type: "foreignKey",
1170
- foreignKeyError: {
1171
- column: fk.column,
1172
- value: fkValue,
1173
- referencedTable: fk.references.table,
1174
- referencedColumn: fk.references.column
1175
- }
1176
- };
1177
- } catch (_) {}
1178
- }
1179
- return {
1180
- file,
1181
- tableName,
1182
- rowIndex,
1183
- issues: [{
1184
- message: errorMessage,
1185
- path: []
1186
- }],
1187
- type: "schema"
1188
- };
1189
- }
1190
- /**
1191
- * Quote SQL identifier
1192
- */
1193
- quoteIdentifier(name) {
1194
- return `"${name.replace(/"/g, "\"\"")}"`;
1195
- }
1196
- /**
1197
- * Validate a single JSONL file
1198
- */
1199
- async validateFile(filePath) {
1200
- const tableName = basename(filePath, ".jsonl");
1201
- const data = await JsonlReader.read(filePath);
1202
- const schema = await SchemaLoader.loadSchema(filePath);
1203
- const errors = [];
1204
- for (let i = 0; i < data.length; i++) {
1205
- const row = data[i];
1206
- const result = schema["~standard"].validate(row);
1207
- if (result instanceof Promise) throw new Error("Asynchronous validation is not supported.");
1208
- if (result.issues && result.issues.length > 0) errors.push({
1209
- file: filePath,
1210
- tableName,
1211
- rowIndex: i,
1212
- issues: result.issues,
1213
- type: "schema"
1214
- });
1215
- }
1216
- if (errors.length === 0) {
1217
- const dirPath = dirname(filePath);
1218
- const allJsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
1219
- const dbErrors = await this.validateWithDatabase(dirPath, allJsonlFiles);
1220
- errors.push(...dbErrors.filter((e) => e.file === filePath));
1221
- }
1222
- return {
1223
- valid: errors.length === 0,
1224
- errors,
1225
- warnings: []
1226
- };
1227
- }
1228
- };
1229
-
1230
1125
  //#endregion
1231
1126
  //#region src/error-formatter.ts
1232
1127
  var ErrorFormatter = class {
@@ -1374,7 +1269,24 @@ program.command("generate").description("Generate TypeScript type definitions fr
1374
1269
  });
1375
1270
  program.command("validate").description("Validate JSONL file(s) against schema").argument("<path>", "File or directory path to validate").option("-v, --verbose", "Show verbose error output", false).action(async (path, options) => {
1376
1271
  try {
1377
- const result = await new Validator({ path }).validate();
1272
+ const stats = await stat(path);
1273
+ let dataDir;
1274
+ let tableName;
1275
+ if (stats.isDirectory()) dataDir = path;
1276
+ else if (stats.isFile() && path.endsWith(".jsonl")) {
1277
+ dataDir = dirname(path);
1278
+ tableName = basename(path, ".jsonl");
1279
+ } else throw new Error(`Invalid path: ${path}. Must be a directory or .jsonl file.`);
1280
+ const db = LinesDB.create({ dataDir });
1281
+ let result;
1282
+ try {
1283
+ result = await db.initialize({
1284
+ tableName,
1285
+ detailedValidate: true
1286
+ });
1287
+ } finally {
1288
+ await db.close();
1289
+ }
1378
1290
  if (result.warnings.length > 0) {
1379
1291
  for (const warning of result.warnings) console.warn(styleText("yellow", `⚠ ${warning}`));
1380
1292
  console.log("");
@@ -1423,20 +1335,57 @@ program.command("validate").description("Validate JSONL file(s) against schema")
1423
1335
  process.exit(1);
1424
1336
  }
1425
1337
  });
1426
- program.command("migrate").description("Migrate data with transformation function").argument("<file>", "JSONL file to migrate").argument("<transform>", "Transform function (e.g., \"(row) => ({ ...row, age: row.age + 1 })\")").option("-f, --filter <expr>", "Filter expression").option("-e, --errorOutput <path>", "Output file path for transformed data when migration fails").option("-v, --verbose", "Show verbose error output", false).action(async (filePath, transformStr, options) => {
1427
- const tableName = (filePath.split("/").pop() || "").replace(".jsonl", "");
1428
- if (!tableName) {
1429
- console.error("Error: Invalid file path. Must be a .jsonl file");
1338
+ program.command("migrate").description("Migrate data with transformation function").argument("<path>", "File or directory path to migrate").argument("<transform>", "Transform function (e.g., \"(row) => ({ ...row, age: row.age + 1 })\")").option("-f, --filter <expr>", "Filter expression").option("-e, --errorOutput <path>", "Output file path for transformed data when migration fails").option("-v, --verbose", "Show verbose error output", false).action(async (path, transformStr, options) => {
1339
+ try {
1340
+ const stats = await stat(path);
1341
+ if (stats.isDirectory()) await migrateDirectory(path, transformStr, options);
1342
+ else if (stats.isFile() && path.endsWith(".jsonl")) await migrateFile(path, transformStr, options);
1343
+ else {
1344
+ console.error(`Error: Invalid path: ${path}. Must be a directory or .jsonl file.`);
1345
+ process.exit(1);
1346
+ }
1347
+ } catch (error) {
1348
+ if (error instanceof Error && "code" in error && error.code === "ENOENT") console.error(`Error: Path not found: ${path}`);
1349
+ else console.error(`Error: ${String(error)}`);
1430
1350
  process.exit(1);
1431
1351
  }
1432
- const lastSlashIndex = filePath.lastIndexOf("/");
1433
- const dataDir = lastSlashIndex > 0 ? filePath.substring(0, lastSlashIndex) : ".";
1434
- const db = LinesDB.create({ dataDir });
1435
- await db.initialize();
1352
+ });
1353
+ /**
1354
+ * Migrate all JSONL files in a directory
1355
+ */
1356
+ async function migrateDirectory(dirPath, transformStr, options) {
1357
+ const jsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => entry.name);
1358
+ if (jsonlFiles.length === 0) {
1359
+ console.error(`Error: No JSONL files found in directory: ${dirPath}`);
1360
+ process.exit(1);
1361
+ }
1362
+ console.log(`Found ${jsonlFiles.length} JSONL file(s) in directory`);
1363
+ const db = LinesDB.create({ dataDir: dirPath });
1364
+ const initResult = await db.initialize({ detailedValidate: true });
1365
+ if (initResult.warnings.length > 0) for (const warning of initResult.warnings) console.warn(styleText("yellow", `⚠ ${warning}`));
1366
+ if (!initResult.valid) {
1367
+ console.error(`Error: Failed to initialize database due to validation errors:`);
1368
+ const formatter = new ErrorFormatter({ verbose: options.verbose });
1369
+ for (const error of initResult.errors) console.error(formatter.formatValidationErrors([{
1370
+ file: error.file,
1371
+ rowIndex: error.rowIndex,
1372
+ issues: error.issues
1373
+ }]));
1374
+ await db.close();
1375
+ process.exit(1);
1376
+ }
1377
+ const tableNames = db.getTableNames();
1378
+ if (tableNames.length === 0) {
1379
+ console.error(`Error: No tables could be loaded from directory: ${dirPath}`);
1380
+ await db.close();
1381
+ process.exit(1);
1382
+ }
1383
+ console.log(`Loaded ${tableNames.length} table(s): ${tableNames.join(", ")}\n`);
1436
1384
  try {
1437
1385
  const transform = runInSandbox(`(${transformStr})`);
1438
1386
  if (typeof transform !== "function") {
1439
1387
  console.error("Error: Transform must be a function");
1388
+ await db.close();
1440
1389
  process.exit(1);
1441
1390
  }
1442
1391
  let filter = void 0;
@@ -1445,49 +1394,33 @@ program.command("migrate").description("Migrate data with transformation functio
1445
1394
  } catch {
1446
1395
  filter = runInSandbox(`(${options.filter})`);
1447
1396
  }
1448
- let rowsToMigrate;
1449
- try {
1450
- rowsToMigrate = filter ? db.find(tableName, filter) : db.find(tableName);
1451
- } catch (error) {
1452
- console.error(`Error: Failed to access table '${tableName}'`);
1453
- console.error(` ${error instanceof Error ? error.message : String(error)}`);
1454
- console.error(`\nThe table may have failed to load during initialization.`);
1455
- console.error(`Check the table's data and schema for any constraint violations.`);
1456
- await db.close();
1457
- process.exit(1);
1458
- }
1459
- console.log(`Found ${rowsToMigrate.length} row(s) to migrate in table '${tableName}'`);
1460
- if (rowsToMigrate.length === 0) {
1461
- console.log("No rows to migrate. Exiting.");
1462
- await db.close();
1463
- process.exit(0);
1464
- }
1465
- const transformedRows = rowsToMigrate.map((row) => transform(row));
1466
- try {
1397
+ let totalRowsMigrated = 0;
1398
+ let hasErrors = false;
1399
+ for (const tableName of tableNames) try {
1400
+ console.log(`Processing table '${tableName}'...`);
1401
+ const rowsToMigrate = filter ? db.find(tableName, filter) : db.find(tableName);
1402
+ if (rowsToMigrate.length === 0) {
1403
+ console.log(` No rows to migrate`);
1404
+ continue;
1405
+ }
1406
+ console.log(` Found ${rowsToMigrate.length} row(s) to migrate`);
1407
+ const transformedRows = rowsToMigrate.map((row) => transform(row));
1467
1408
  await db.transaction(async () => {
1468
1409
  db.batchUpdate(tableName, transformedRows, { validate: true });
1469
1410
  });
1470
- await db.close();
1471
- console.log(`\nMigration completed successfully:`);
1472
- console.log(` ✓ ${rowsToMigrate.length} row(s) updated`);
1473
- process.exit(0);
1411
+ console.log(` ✓ ${rowsToMigrate.length} row(s) updated\n`);
1412
+ totalRowsMigrated += rowsToMigrate.length;
1474
1413
  } catch (error) {
1475
- await db.close();
1476
- if (options.errorOutput) try {
1477
- const jsonlContent = transformedRows.map((row) => JSON.stringify(row)).join("\n");
1478
- await writeFile(options.errorOutput, jsonlContent, "utf-8");
1479
- console.error(styleText("yellow", `\n⚠ Transformed data (${transformedRows.length} rows) written to: ${options.errorOutput}`));
1480
- } catch (writeError) {
1481
- console.error(styleText("red", `\n✗ Failed to write error output file: ${writeError instanceof Error ? writeError.message : String(writeError)}`));
1482
- }
1414
+ hasErrors = true;
1415
+ console.error(styleText("red", ` ✗ Failed to migrate table '${tableName}'`));
1483
1416
  const formatter = new ErrorFormatter({ verbose: options.verbose });
1484
- console.error(formatter.formatMigrationFailureHeader());
1485
1417
  if (error instanceof Error && error.name === "ValidationError") {
1486
1418
  const validationError = error;
1487
1419
  if (validationError.validationErrors) {
1488
- console.error(`\nFound ${validationError.validationErrors.length} validation error(s) in transformed data:\n`);
1420
+ console.error(` Found ${validationError.validationErrors.length} validation error(s):\n`);
1421
+ const rowsToMigrate = filter ? db.find(tableName, filter) : db.find(tableName);
1489
1422
  const errorInfos = validationError.validationErrors.map(({ rowIndex, rowData, error: rowError }) => ({
1490
- file: filePath,
1423
+ file: `${dirPath}/${tableName}.jsonl`,
1491
1424
  rowIndex,
1492
1425
  issues: rowError.issues,
1493
1426
  data: rowData,
@@ -1495,32 +1428,165 @@ program.command("migrate").description("Migrate data with transformation functio
1495
1428
  }));
1496
1429
  const formatted = formatter.formatValidationErrors(errorInfos);
1497
1430
  console.error(formatted);
1498
- } else {
1499
- console.error("\nValidation error:\n");
1500
- const errorInfo = {
1501
- file: filePath,
1502
- rowIndex: 0,
1503
- issues: validationError.issues
1504
- };
1505
- const formatted = formatter.formatValidationErrors([errorInfo]);
1506
- console.error(formatted);
1507
1431
  }
1508
- } else if (error instanceof Error) {
1509
- console.error(`\n ${error.message}`);
1510
- if (options.verbose && error.stack) console.error(`\nStack trace:\n${error.stack}`);
1511
- if (error.message.includes("UNIQUE constraint failed") || error.message.includes("FOREIGN KEY constraint failed") || error.message.includes("NOT NULL constraint failed") || error.message.includes("CHECK constraint failed")) {
1512
- console.error("\n This is a SQLite constraint violation.");
1513
- console.error(" Please check your data and schema requirements.");
1514
- }
1515
- } else console.error(`\n ${String(error)}`);
1432
+ } else if (error instanceof Error) console.error(` ${error.message}`);
1516
1433
  console.error("");
1434
+ }
1435
+ await db.close();
1436
+ if (hasErrors) {
1437
+ console.error(styleText("red", `\n✗ Migration completed with errors for some tables`));
1438
+ console.log(`Total rows migrated: ${totalRowsMigrated}`);
1517
1439
  process.exit(1);
1440
+ } else {
1441
+ console.log(styleText("green", `\n✓ Migration completed successfully for all tables`));
1442
+ console.log(`Total rows migrated: ${totalRowsMigrated}`);
1443
+ process.exit(0);
1518
1444
  }
1519
1445
  } catch (error) {
1520
1446
  await db.close();
1521
1447
  throw error;
1522
1448
  }
1523
- });
1449
+ }
1450
+ /**
1451
+ * Migrate a single JSONL file
1452
+ */
1453
+ async function migrateFile(filePath, transformStr, options) {
1454
+ const tableName = (filePath.split("/").pop() || "").replace(".jsonl", "");
1455
+ if (!tableName) {
1456
+ console.error("Error: Invalid file path. Must be a .jsonl file");
1457
+ process.exit(1);
1458
+ }
1459
+ const lastSlashIndex = filePath.lastIndexOf("/");
1460
+ const dataDir = lastSlashIndex > 0 ? filePath.substring(0, lastSlashIndex) : ".";
1461
+ let transform;
1462
+ try {
1463
+ const parsedTransform = runInSandbox(`(${transformStr})`);
1464
+ if (typeof parsedTransform !== "function") {
1465
+ console.error("Error: Transform must be a function");
1466
+ process.exit(1);
1467
+ }
1468
+ transform = parsedTransform;
1469
+ } catch (error) {
1470
+ console.error("Error: Failed to parse transform function");
1471
+ console.error(` ${error instanceof Error ? error.message : String(error)}`);
1472
+ process.exit(1);
1473
+ }
1474
+ const db = LinesDB.create({ dataDir });
1475
+ const initResult = await db.initialize({
1476
+ tableName,
1477
+ transform,
1478
+ detailedValidate: true
1479
+ });
1480
+ if (initResult.warnings.length > 0) for (const warning of initResult.warnings) console.warn(styleText("yellow", `⚠ ${warning}`));
1481
+ if (!initResult.valid) {
1482
+ console.error(`Error: Failed to initialize database due to validation errors:`);
1483
+ const formatter = new ErrorFormatter({ verbose: options.verbose });
1484
+ for (const error of initResult.errors) console.error(formatter.formatValidationErrors([{
1485
+ file: error.file,
1486
+ rowIndex: error.rowIndex,
1487
+ issues: error.issues
1488
+ }]));
1489
+ await db.close();
1490
+ process.exit(1);
1491
+ }
1492
+ try {
1493
+ let filter = void 0;
1494
+ if (options.filter) try {
1495
+ filter = JSON.parse(options.filter);
1496
+ } catch {
1497
+ filter = runInSandbox(`(${options.filter})`);
1498
+ }
1499
+ if (filter) {
1500
+ let rowsToMigrate;
1501
+ try {
1502
+ rowsToMigrate = db.find(tableName, filter);
1503
+ } catch (error) {
1504
+ console.error(`Error: Failed to access table '${tableName}'`);
1505
+ console.error(` ${error instanceof Error ? error.message : String(error)}`);
1506
+ console.error(`\nThe table may have failed to load during initialization.`);
1507
+ console.error(`Check the table's data and schema for any constraint violations.`);
1508
+ await db.close();
1509
+ process.exit(1);
1510
+ }
1511
+ console.log(`Found ${rowsToMigrate.length} row(s) to migrate in table '${tableName}'`);
1512
+ if (rowsToMigrate.length === 0) {
1513
+ console.log("No rows to migrate. Exiting.");
1514
+ await db.close();
1515
+ process.exit(0);
1516
+ }
1517
+ const transformedRows = rowsToMigrate.map((row) => transform(row));
1518
+ try {
1519
+ await db.transaction(async () => {
1520
+ db.batchUpdate(tableName, transformedRows, { validate: true });
1521
+ });
1522
+ await db.close();
1523
+ console.log(`\nMigration completed successfully:`);
1524
+ console.log(` ✓ ${rowsToMigrate.length} row(s) updated`);
1525
+ process.exit(0);
1526
+ } catch (error) {
1527
+ await db.close();
1528
+ if (options.errorOutput) try {
1529
+ const jsonlContent = transformedRows.map((row) => JSON.stringify(row)).join("\n");
1530
+ await writeFile(options.errorOutput, jsonlContent, "utf-8");
1531
+ console.error(styleText("yellow", `\n⚠ Transformed data (${transformedRows.length} rows) written to: ${options.errorOutput}`));
1532
+ } catch (writeError) {
1533
+ console.error(styleText("red", `\n✗ Failed to write error output file: ${writeError instanceof Error ? writeError.message : String(writeError)}`));
1534
+ }
1535
+ const formatter = new ErrorFormatter({ verbose: options.verbose });
1536
+ console.error(formatter.formatMigrationFailureHeader());
1537
+ if (error instanceof Error && error.name === "ValidationError") {
1538
+ const validationError = error;
1539
+ if (validationError.validationErrors) {
1540
+ console.error(`\nFound ${validationError.validationErrors.length} validation error(s) in transformed data:\n`);
1541
+ const errorInfos = validationError.validationErrors.map(({ rowIndex, rowData, error: rowError }) => ({
1542
+ file: filePath,
1543
+ rowIndex,
1544
+ issues: rowError.issues,
1545
+ data: rowData,
1546
+ originalData: rowsToMigrate[rowIndex]
1547
+ }));
1548
+ const formatted = formatter.formatValidationErrors(errorInfos);
1549
+ console.error(formatted);
1550
+ } else {
1551
+ console.error("\nValidation error:\n");
1552
+ const errorInfo = {
1553
+ file: filePath,
1554
+ rowIndex: 0,
1555
+ issues: validationError.issues
1556
+ };
1557
+ const formatted = formatter.formatValidationErrors([errorInfo]);
1558
+ console.error(formatted);
1559
+ }
1560
+ } else if (error instanceof Error) {
1561
+ console.error(`\n ${error.message}`);
1562
+ if (options.verbose && error.stack) console.error(`\nStack trace:\n${error.stack}`);
1563
+ if (error.message.includes("UNIQUE constraint failed") || error.message.includes("FOREIGN KEY constraint failed") || error.message.includes("NOT NULL constraint failed") || error.message.includes("CHECK constraint failed")) {
1564
+ console.error("\n This is a SQLite constraint violation.");
1565
+ console.error(" Please check your data and schema requirements.");
1566
+ }
1567
+ } else console.error(`\n ${String(error)}`);
1568
+ console.error("");
1569
+ process.exit(1);
1570
+ }
1571
+ } else try {
1572
+ const allRows = db.find(tableName);
1573
+ console.log(`Migrated ${allRows.length} row(s) in table '${tableName}'`);
1574
+ await db.sync(tableName);
1575
+ await db.close();
1576
+ console.log(`\nMigration completed successfully:`);
1577
+ console.log(` ✓ ${allRows.length} row(s) updated`);
1578
+ process.exit(0);
1579
+ } catch (error) {
1580
+ await db.close();
1581
+ console.error("Error: Failed to sync changes to file");
1582
+ console.error(` ${error instanceof Error ? error.message : String(error)}`);
1583
+ process.exit(1);
1584
+ }
1585
+ } catch (error) {
1586
+ await db.close();
1587
+ throw error;
1588
+ }
1589
+ }
1524
1590
  program.parse();
1525
1591
 
1526
1592
  //#endregion