@toiroakr/lines-db 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import { createRequire } from "node:module";
2
- import { access, mkdir, readFile, readdir, stat, writeFile } from "node:fs/promises";
2
+ import { access, mkdir, readFile, readdir, writeFile } from "node:fs/promises";
3
3
  import { basename, dirname, extname, isAbsolute, join, normalize, relative } from "node:path";
4
4
  import { pathToFileURL } from "node:url";
5
5
 
@@ -240,8 +240,7 @@ var DirectoryScanner = class {
240
240
  * Define a bidirectional schema with optional backward transformation
241
241
  *
242
242
  * @param schema - Standard Schema for validation
243
- * @param optionsOrBackward - Optional SchemaOptions object or backward transformation function (Output → Input)
244
- * Required when schema performs transformations
243
+ * @param options - SchemaOptions object. When Input and Output types differ, backward transformation is required
245
244
  *
246
245
  * @example
247
246
  * // No transformation - backward not needed
@@ -250,10 +249,12 @@ var DirectoryScanner = class {
250
249
  * );
251
250
  *
252
251
  * @example
253
- * // With transformation - backward recommended (legacy)
252
+ * // With transformation - backward REQUIRED
254
253
  * const schema = defineSchema(
255
254
  * v.pipe(v.string(), v.transform(Number)),
256
- * (num) => String(num) // backward: number → string
255
+ * {
256
+ * backward: (num) => String(num) // backward: number → string (REQUIRED)
257
+ * }
257
258
  * );
258
259
  *
259
260
  * @example
@@ -268,14 +269,14 @@ var DirectoryScanner = class {
268
269
  * }
269
270
  * );
270
271
  */
271
- function defineSchema(schema, optionsOrBackward) {
272
+ function defineSchema(schema, ...args) {
273
+ const options = args[0];
272
274
  const bidirectionalSchema = Object.create(schema);
273
- if (optionsOrBackward) if (typeof optionsOrBackward === "function") bidirectionalSchema.backward = optionsOrBackward;
274
- else {
275
- if (optionsOrBackward.backward) bidirectionalSchema.backward = optionsOrBackward.backward;
276
- if (optionsOrBackward.primaryKey) bidirectionalSchema.primaryKey = optionsOrBackward.primaryKey;
277
- if (optionsOrBackward.foreignKeys) bidirectionalSchema.foreignKeys = optionsOrBackward.foreignKeys;
278
- if (optionsOrBackward.indexes) bidirectionalSchema.indexes = optionsOrBackward.indexes;
275
+ if (options) {
276
+ if (options.backward) bidirectionalSchema.backward = options.backward;
277
+ if (options.primaryKey) bidirectionalSchema.primaryKey = options.primaryKey;
278
+ if (options.foreignKeys) bidirectionalSchema.foreignKeys = options.foreignKeys;
279
+ if (options.indexes) bidirectionalSchema.indexes = options.indexes;
279
280
  }
280
281
  Object.defineProperty(bidirectionalSchema, "~standard", {
281
282
  value: schema["~standard"],
@@ -308,27 +309,49 @@ var LinesDB = class LinesDB {
308
309
  return new LinesDB(config, dbPath);
309
310
  }
310
311
  /**
311
- * Initialize database by loading all JSONL files
312
+ * Initialize database by loading all JSONL files or a specific table
312
313
  * Uses dependency resolution to ensure foreign key references are loaded in correct order
314
+ * @param options Optional configuration for initialization
315
+ * @param options.tableName Optional table name to initialize. If not provided, initializes all tables
316
+ * @param options.detailedValidate If true, performs detailed validation by inserting rows one by one to catch constraint violations
317
+ * @param options.transform Optional transform function to apply to rows before validation (only applied to the specified tableName)
318
+ * @returns ValidationResult containing validation status, errors, and warnings
313
319
  */
314
- async initialize() {
320
+ async initialize(options) {
321
+ const allErrors = [];
322
+ const allWarnings = [];
323
+ const tableName = options?.tableName;
324
+ const detailedValidate = options?.detailedValidate ?? false;
325
+ const transform = options?.transform;
315
326
  this.tables = await DirectoryScanner.scanDirectory(this.config.dataDir);
327
+ const tablesToLoad = tableName ? [tableName] : Array.from(this.tables.keys());
328
+ for (const tableNameToLoad of tablesToLoad) if (!this.tables.has(tableNameToLoad)) throw new Error(`Table '${tableNameToLoad}' not found in directory '${this.config.dataDir}'`);
316
329
  const loadedTables = /* @__PURE__ */ new Set();
317
330
  const loadingTables = /* @__PURE__ */ new Set();
318
- for (const [tableName] of this.tables) if (!loadedTables.has(tableName)) try {
319
- await this.loadTableWithDependencies(tableName, loadedTables, loadingTables);
320
- } catch (error) {
321
- console.warn(`Warning: Failed to load table '${tableName}':`, error instanceof Error ? error.message : String(error));
322
- this.tables.delete(tableName);
323
- this.schemas.delete(tableName);
324
- this.validationSchemas.delete(tableName);
331
+ const attemptedTables = /* @__PURE__ */ new Set();
332
+ for (const tableNameToLoad of tablesToLoad) if (!attemptedTables.has(tableNameToLoad)) {
333
+ const tableTransform = tableNameToLoad === tableName ? transform : void 0;
334
+ const { errors, warnings } = await this.loadTableWithDependencies(tableNameToLoad, loadedTables, loadingTables, attemptedTables, detailedValidate, tableTransform);
335
+ allErrors.push(...errors);
336
+ allWarnings.push(...warnings);
325
337
  }
338
+ return {
339
+ valid: allErrors.length === 0,
340
+ errors: allErrors,
341
+ warnings: allWarnings
342
+ };
326
343
  }
327
344
  /**
328
345
  * Load a table and its dependencies recursively
329
346
  */
330
- async loadTableWithDependencies(tableName, loadedTables, loadingTables) {
331
- if (loadedTables.has(tableName)) return;
347
+ async loadTableWithDependencies(tableName, loadedTables, loadingTables, attemptedTables, detailedValidate, transform) {
348
+ const errors = [];
349
+ const warnings = [];
350
+ if (attemptedTables.has(tableName)) return {
351
+ errors,
352
+ warnings
353
+ };
354
+ attemptedTables.add(tableName);
332
355
  if (loadingTables.has(tableName)) throw new Error(`Circular dependency detected for table '${tableName}'`);
333
356
  const tableConfig = this.tables.get(tableName);
334
357
  if (!tableConfig) throw new Error(`Table configuration not found for '${tableName}'`);
@@ -342,21 +365,35 @@ var LinesDB = class LinesDB {
342
365
  } catch {}
343
366
  if (foreignKeys && foreignKeys.length > 0) for (const fk of foreignKeys) {
344
367
  const referencedTable = fk.references.table;
345
- if (!loadedTables.has(referencedTable)) if (this.tables.has(referencedTable)) await this.loadTableWithDependencies(referencedTable, loadedTables, loadingTables);
346
- else throw new Error(`Foreign key reference to non-existent table '${referencedTable}' in table '${tableName}'`);
368
+ if (referencedTable === tableName) continue;
369
+ if (!attemptedTables.has(referencedTable)) if (this.tables.has(referencedTable)) {
370
+ const depResult = await this.loadTableWithDependencies(referencedTable, loadedTables, loadingTables, attemptedTables, detailedValidate, void 0);
371
+ errors.push(...depResult.errors);
372
+ warnings.push(...depResult.warnings);
373
+ } else throw new Error(`Foreign key reference to non-existent table '${referencedTable}' in table '${tableName}'`);
374
+ }
375
+ const { loaded, errors: loadErrors } = await this.loadTable(tableName, tableConfig, detailedValidate, transform);
376
+ errors.push(...loadErrors);
377
+ if (loaded) loadedTables.add(tableName);
378
+ else {
379
+ warnings.push(`Table '${tableName}' was not loaded (no data or skipped)`);
380
+ this.tables.delete(tableName);
347
381
  }
348
- if (await this.loadTable(tableName, tableConfig)) loadedTables.add(tableName);
349
- else this.tables.delete(tableName);
350
382
  } finally {
351
383
  loadingTables.delete(tableName);
352
384
  }
385
+ return {
386
+ errors,
387
+ warnings
388
+ };
353
389
  }
354
390
  /**
355
391
  * Load a single table from JSONL file
356
- * @returns true if table was loaded, false if skipped
392
+ * @returns Object with loaded status and validation errors
357
393
  */
358
- async loadTable(tableName, config) {
359
- const data = await JsonlReader.read(config.jsonlPath);
394
+ async loadTable(tableName, config, detailedValidate, transform) {
395
+ let data = await JsonlReader.read(config.jsonlPath);
396
+ if (transform) data = data.map((row) => transform(row));
360
397
  let validationSchema = config.validationSchema;
361
398
  const schemaMetadata = {};
362
399
  if (!validationSchema) try {
@@ -372,7 +409,15 @@ var LinesDB = class LinesDB {
372
409
  else if (schemaModule.foreignKeys) schemaMetadata.foreignKeys = schemaModule.foreignKeys;
373
410
  if (schemaExport?.indexes) schemaMetadata.indexes = schemaExport.indexes;
374
411
  else if (schemaModule.indexes) schemaMetadata.indexes = schemaModule.indexes;
375
- } catch (_error) {}
412
+ if (process.env.DEBUG_LINES_DB) {
413
+ console.log(`[lines-db] Schema metadata for ${tableName}:`);
414
+ console.log(` primaryKey: ${schemaMetadata.primaryKey}`);
415
+ console.log(` foreignKeys: ${JSON.stringify(schemaMetadata.foreignKeys)}`);
416
+ console.log(` indexes: ${JSON.stringify(schemaMetadata.indexes)}`);
417
+ }
418
+ } catch (_error) {
419
+ if (process.env.DEBUG_LINES_DB) console.warn(`[lines-db] Failed to load schema metadata for ${tableName}:`, _error instanceof Error ? _error.message : String(_error));
420
+ }
376
421
  this.validationSchemas.set(tableName, validationSchema);
377
422
  const validationErrors = [];
378
423
  const validatedData = [];
@@ -390,18 +435,32 @@ var LinesDB = class LinesDB {
390
435
  else throw error;
391
436
  }
392
437
  }
393
- if (validationErrors.length > 0) {
394
- const enhancedError = /* @__PURE__ */ new Error(`Validation failed for ${validationErrors.length} row(s) in table ${tableName}`);
395
- enhancedError.name = "ValidationError";
396
- enhancedError.validationErrors = validationErrors;
397
- enhancedError.issues = validationErrors[0].error.issues;
398
- throw enhancedError;
399
- }
438
+ const validationErrorDetails = validationErrors.map((ve) => ({
439
+ file: config.jsonlPath,
440
+ tableName,
441
+ rowIndex: ve.rowIndex,
442
+ issues: ve.error.issues,
443
+ type: "schema"
444
+ }));
445
+ if (validationErrors.length > 0) return {
446
+ loaded: false,
447
+ errors: validationErrorDetails
448
+ };
400
449
  let schema;
401
- if (config.schema) schema = config.schema;
402
- else if (config.autoInferSchema !== false) {
403
- if (validatedData.length === 0) return false;
404
- schema = JsonlReader.inferSchema(tableName, validatedData);
450
+ let inferredSchema;
451
+ if (validatedData.length > 0) inferredSchema = JsonlReader.inferSchema(tableName, validatedData);
452
+ if (config.schema) {
453
+ schema = config.schema;
454
+ if (inferredSchema) for (const inferredCol of inferredSchema.columns) {
455
+ const schemaCol = schema.columns.find((c) => c.name === inferredCol.name);
456
+ if (schemaCol && inferredCol.valueType && !schemaCol.valueType) schemaCol.valueType = inferredCol.valueType;
457
+ }
458
+ } else if (config.autoInferSchema !== false) {
459
+ if (validatedData.length === 0) return {
460
+ loaded: false,
461
+ errors: []
462
+ };
463
+ schema = inferredSchema;
405
464
  } else throw new Error(`No schema provided for table ${tableName} and autoInferSchema is disabled`);
406
465
  const biSchema = validationSchema;
407
466
  const primaryKey = biSchema?.primaryKey || schemaMetadata.primaryKey;
@@ -415,23 +474,43 @@ var LinesDB = class LinesDB {
415
474
  if (idColumn) idColumn.primaryKey = true;
416
475
  }
417
476
  if (foreignKeys) schema.foreignKeys = foreignKeys;
418
- if (indexes) schema.indexes = indexes;
477
+ if (indexes) {
478
+ schema.indexes = indexes;
479
+ for (const index of indexes) if (index.unique && index.columns.length === 1) {
480
+ const col = schema.columns.find((c) => c.name === index.columns[0]);
481
+ if (col && !col.unique && !col.primaryKey) col.unique = true;
482
+ }
483
+ }
419
484
  this.schemas.set(tableName, schema);
420
485
  this.createTable(schema);
421
- this.insertData(tableName, schema, validatedData);
422
- return true;
486
+ if (detailedValidate) {
487
+ const insertErrors = this.insertDataWithDetailedValidation(tableName, schema, validatedData, config.jsonlPath);
488
+ if (insertErrors.length > 0) return {
489
+ loaded: false,
490
+ errors: insertErrors
491
+ };
492
+ } else this.insertData(tableName, schema, validatedData);
493
+ return {
494
+ loaded: true,
495
+ errors: []
496
+ };
423
497
  }
424
498
  /**
425
499
  * Create table in SQLite with constraints and indexes
426
500
  */
427
501
  createTable(schema) {
428
502
  const quotedTableName = this.quoteTableName(schema.name);
503
+ const uniqueColumns = /* @__PURE__ */ new Set();
504
+ for (const col of schema.columns) if (col.unique) uniqueColumns.add(col.name);
505
+ if (schema.indexes) {
506
+ for (const index of schema.indexes) if (index.unique && index.columns.length === 1) uniqueColumns.add(index.columns[0]);
507
+ }
429
508
  const columnDefs = schema.columns.map((col) => {
430
509
  const sqlType = col.type === "JSON" ? "TEXT" : col.type;
431
510
  const parts = [this.quoteIdentifier(col.name), sqlType];
432
511
  if (col.primaryKey) parts.push("PRIMARY KEY");
433
512
  if (col.notNull) parts.push("NOT NULL");
434
- if (col.unique) parts.push("UNIQUE");
513
+ if (uniqueColumns.has(col.name) && !col.primaryKey) parts.push("UNIQUE");
435
514
  return parts.join(" ");
436
515
  });
437
516
  const foreignKeyDefs = [];
@@ -464,18 +543,85 @@ var LinesDB = class LinesDB {
464
543
  return `"${identifier.replace(/"/g, "\"\"")}"`;
465
544
  }
466
545
  /**
467
- * Insert data into table
546
+ * Insert data into table using batch insert (multiple rows per SQL)
547
+ * SQLite has a parameter limit (default 999), so we batch rows accordingly
548
+ * Throws exception if any constraint violation occurs
468
549
  */
469
550
  insertData(tableName, schema, data) {
551
+ if (data.length === 0) return;
552
+ const columnNames = schema.columns.map((col) => col.name);
553
+ const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
554
+ const columnCount = columnNames.length;
555
+ const maxBatchSize = Math.floor(900 / columnCount);
556
+ const batchSize = Math.max(1, Math.min(maxBatchSize, 100));
557
+ for (let i = 0; i < data.length; i += batchSize) {
558
+ const batch = data.slice(i, i + batchSize);
559
+ const rowPlaceholders = columnNames.map(() => "?").join(", ");
560
+ const valuesPlaceholders = batch.map(() => `(${rowPlaceholders})`).join(", ");
561
+ const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES ${valuesPlaceholders}`;
562
+ const values = [];
563
+ for (const row of batch) for (const col of columnNames) values.push(this.normalizeValue(row[col]));
564
+ this.db.prepare(sql).run(...values);
565
+ }
566
+ }
567
+ /**
568
+ * Insert data into table one row at a time with detailed error reporting
569
+ * This is used for validation to catch constraint violations
570
+ */
571
+ insertDataWithDetailedValidation(tableName, schema, data, filePath) {
572
+ const errors = [];
470
573
  const columnNames = schema.columns.map((col) => col.name);
471
574
  const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
472
575
  const placeholders = columnNames.map(() => "?").join(", ");
473
576
  const sql = `INSERT INTO ${this.quoteTableName(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
474
577
  const stmt = this.db.prepare(sql);
475
- for (const row of data) {
476
- const values = columnNames.map((col) => this.normalizeValue(row[col]));
477
- stmt.run(...values);
578
+ for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
579
+ const row = data[rowIndex];
580
+ try {
581
+ const values = columnNames.map((col) => this.normalizeValue(row[col]));
582
+ stmt.run(...values);
583
+ } catch (error) {
584
+ const constraintError = this.analyzeConstraintError(error, filePath, tableName, rowIndex, row, schema.foreignKeys || []);
585
+ if (constraintError) errors.push(constraintError);
586
+ }
478
587
  }
588
+ return errors;
589
+ }
590
+ /**
591
+ * Analyze constraint error and extract detailed information
592
+ */
593
+ analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys) {
594
+ const errorMessage = error instanceof Error ? error.message : String(error);
595
+ if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
596
+ const fkValue = row[fk.column];
597
+ if (fkValue === null || fkValue === void 0) continue;
598
+ try {
599
+ const result = this.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [this.normalizeValue(fkValue)]);
600
+ if (result.length > 0 && result[0].count === 0) return {
601
+ file,
602
+ tableName,
603
+ rowIndex,
604
+ issues: [],
605
+ type: "foreignKey",
606
+ foreignKeyError: {
607
+ column: fk.column,
608
+ value: fkValue,
609
+ referencedTable: fk.references.table,
610
+ referencedColumn: fk.references.column
611
+ }
612
+ };
613
+ } catch (_) {}
614
+ }
615
+ return {
616
+ file,
617
+ tableName,
618
+ rowIndex,
619
+ issues: [{
620
+ message: errorMessage,
621
+ path: []
622
+ }],
623
+ type: "schema"
624
+ };
479
625
  }
480
626
  /**
481
627
  * Execute a raw SQL query
@@ -887,9 +1033,13 @@ var LinesDB = class LinesDB {
887
1033
  /**
888
1034
  * Sync database changes back to JSONL files
889
1035
  * Uses backward transformation when available
1036
+ * @param tableName Optional table name to sync. If not provided, syncs all loaded tables
890
1037
  */
891
- async sync() {
892
- for (const [tableName] of this.tables) await this.syncTable(tableName);
1038
+ async sync(tableName) {
1039
+ if (tableName) {
1040
+ if (!this.schemas.has(tableName)) throw new Error(`Table '${tableName}' is not loaded`);
1041
+ await this.syncTable(tableName);
1042
+ } else for (const [name] of this.schemas) await this.syncTable(name);
893
1043
  }
894
1044
  /**
895
1045
  * Execute a function within a transaction
@@ -1036,295 +1186,33 @@ function sanitizeIdentifier(value) {
1036
1186
  return value.replace(/[^A-Za-z0-9_$]/g, "");
1037
1187
  }
1038
1188
 
1039
- //#endregion
1040
- //#region src/validator.ts
1041
- var Validator = class {
1042
- path;
1043
- projectRoot;
1044
- constructor(options) {
1045
- this.path = options.path;
1046
- this.projectRoot = options.projectRoot || process.cwd();
1047
- }
1048
- /**
1049
- * Validate JSONL file(s)
1050
- */
1051
- async validate() {
1052
- const fullPath = this.path.startsWith("/") ? this.path : join(this.projectRoot, this.path);
1053
- const stats = await stat(fullPath);
1054
- if (stats.isDirectory()) return this.validateDirectory(fullPath);
1055
- else if (stats.isFile() && fullPath.endsWith(".jsonl")) return this.validateFile(fullPath);
1056
- else throw new Error(`Invalid path: ${this.path}. Must be a directory or .jsonl file.`);
1057
- }
1058
- /**
1059
- * Validate all JSONL files in a directory
1060
- */
1061
- async validateDirectory(dirPath) {
1062
- const jsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
1063
- if (jsonlFiles.length === 0) throw new Error(`No JSONL files found in directory: ${dirPath}`);
1064
- const allErrors = [];
1065
- const allWarnings = [];
1066
- const filesWithSchema = [];
1067
- for (const file of jsonlFiles) if (await SchemaLoader.hasSchema(file)) filesWithSchema.push(file);
1068
- else {
1069
- const tableName = basename(file, ".jsonl");
1070
- allWarnings.push(`Skipping validation for '${tableName}': schema file not found`);
1071
- }
1072
- for (const file of filesWithSchema) {
1073
- const result = await this.validateFile(file);
1074
- allErrors.push(...result.errors);
1075
- allWarnings.push(...result.warnings);
1076
- }
1077
- if (filesWithSchema.length > 0 && allErrors.length === 0) {
1078
- const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
1079
- allErrors.push(...dbErrors);
1080
- }
1081
- return {
1082
- valid: allErrors.length === 0,
1083
- errors: allErrors,
1084
- warnings: allWarnings
1085
- };
1086
- }
1087
- /**
1088
- * Validate by loading data into database one row at a time
1089
- * This catches constraint violations and extracts detailed error information
1090
- */
1091
- async validateWithDatabase(dirPath, jsonlFiles) {
1092
- const errors = [];
1093
- try {
1094
- const db = LinesDB.create({ dataDir: ":memory:" });
1095
- for (const file of jsonlFiles) {
1096
- const tableName = basename(file, ".jsonl");
1097
- const data = await JsonlReader.read(file);
1098
- let schema;
1099
- let foreignKeys = [];
1100
- let indexes = [];
1101
- let primaryKey;
1102
- try {
1103
- schema = await SchemaLoader.loadSchema(file);
1104
- const { pathToFileURL: pathToFileURL$1 } = await import("node:url");
1105
- const schemaModule = await import(`${pathToFileURL$1(file.replace(".jsonl", ".schema.ts")).href}?t=${Date.now()}`);
1106
- const schemaExport = schemaModule.schema || schemaModule.default;
1107
- if (schemaExport?.foreignKeys) foreignKeys = schemaExport.foreignKeys;
1108
- if (schemaExport?.indexes) indexes = schemaExport.indexes;
1109
- if (schemaExport?.primaryKey) primaryKey = schemaExport.primaryKey;
1110
- } catch (_error) {
1111
- continue;
1112
- }
1113
- try {
1114
- const tableSchema = this.createTableSchema(tableName, data, schema, foreignKeys, indexes, primaryKey);
1115
- this.createTableInDb(db, tableSchema);
1116
- for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
1117
- const row = data[rowIndex];
1118
- try {
1119
- this.insertRowIntoDb(db, tableName, tableSchema, row);
1120
- } catch (error) {
1121
- const constraintError = this.analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db);
1122
- if (constraintError) errors.push(constraintError);
1123
- }
1124
- }
1125
- } catch (_error) {
1126
- continue;
1127
- }
1128
- }
1129
- await db.close();
1130
- } catch (error) {
1131
- errors.push({
1132
- file: dirPath,
1133
- tableName: "database",
1134
- rowIndex: 0,
1135
- issues: [{
1136
- message: `Database initialization failed: ${error instanceof Error ? error.message : String(error)}`,
1137
- path: []
1138
- }],
1139
- type: "schema"
1140
- });
1141
- }
1142
- return errors;
1143
- }
1144
- /**
1145
- * Create table schema from data and validation schema
1146
- */
1147
- createTableSchema(tableName, data, validationSchema, foreignKeys, indexes, primaryKey) {
1148
- if (data.length === 0) throw new Error(`No data found in ${tableName}`);
1149
- const schema = JsonlReader.inferSchema(tableName, data);
1150
- if (primaryKey) {
1151
- const pkColumn = schema.columns.find((col) => col.name === primaryKey);
1152
- if (pkColumn) pkColumn.primaryKey = true;
1153
- } else if (!schema.columns.some((col) => col.primaryKey)) {
1154
- const idColumn = schema.columns.find((c) => c.name === "id");
1155
- if (idColumn) idColumn.primaryKey = true;
1156
- }
1157
- if (foreignKeys && foreignKeys.length > 0) schema.foreignKeys = foreignKeys;
1158
- if (indexes && indexes.length > 0) schema.indexes = indexes;
1159
- return schema;
1160
- }
1161
- /**
1162
- * Create table in database
1163
- */
1164
- createTableInDb(db, schema) {
1165
- const columns = schema.columns.map((col) => {
1166
- let colDef = `${this.quoteIdentifier(col.name)} ${col.type.toUpperCase()}`;
1167
- if (col.primaryKey) colDef += " PRIMARY KEY";
1168
- return colDef;
1169
- });
1170
- if (schema.foreignKeys && schema.foreignKeys.length > 0) for (const fk of schema.foreignKeys) columns.push(`FOREIGN KEY (${this.quoteIdentifier(fk.column)}) REFERENCES ${this.quoteIdentifier(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`);
1171
- const sql = `CREATE TABLE IF NOT EXISTS ${this.quoteIdentifier(schema.name)} (${columns.join(", ")})`;
1172
- db.execute(sql);
1173
- if (schema.indexes && schema.indexes.length > 0) for (const index of schema.indexes) {
1174
- const indexName = index.name || `idx_${schema.name}_${index.columns.join("_")}`;
1175
- const uniqueKeyword = index.unique ? "UNIQUE" : "";
1176
- const indexColumns = index.columns.map((col) => this.quoteIdentifier(col)).join(", ");
1177
- const indexSql = `CREATE ${uniqueKeyword} INDEX IF NOT EXISTS ${this.quoteIdentifier(indexName)} ON ${this.quoteIdentifier(schema.name)} (${indexColumns})`;
1178
- db.execute(indexSql);
1179
- }
1180
- }
1181
- /**
1182
- * Insert a row into database
1183
- */
1184
- insertRowIntoDb(db, tableName, schema, row) {
1185
- const columnNames = schema.columns.map((col) => col.name);
1186
- const quotedColumns = columnNames.map((name) => this.quoteIdentifier(name));
1187
- const placeholders = columnNames.map(() => "?").join(", ");
1188
- const sql = `INSERT INTO ${this.quoteIdentifier(tableName)} (${quotedColumns.join(", ")}) VALUES (${placeholders})`;
1189
- const values = columnNames.map((col) => {
1190
- const value = row[col];
1191
- if (value === null || value === void 0) return null;
1192
- if (typeof value === "object") return JSON.stringify(value);
1193
- if (typeof value === "boolean") return value ? 1 : 0;
1194
- return value;
1195
- });
1196
- db.execute(sql, values);
1197
- }
1198
- /**
1199
- * Analyze constraint error and extract detailed information
1200
- */
1201
- analyzeConstraintError(error, file, tableName, rowIndex, row, foreignKeys, db) {
1202
- const errorMessage = error instanceof Error ? error.message : String(error);
1203
- if (errorMessage.includes("FOREIGN KEY constraint failed")) for (const fk of foreignKeys) {
1204
- const fkValue = row[fk.column];
1205
- if (fkValue === null || fkValue === void 0) continue;
1206
- try {
1207
- const result = db.query(`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`, [fkValue]);
1208
- if (result.length > 0 && result[0].count === 0) return {
1209
- file,
1210
- tableName,
1211
- rowIndex,
1212
- issues: [],
1213
- type: "foreignKey",
1214
- foreignKeyError: {
1215
- column: fk.column,
1216
- value: fkValue,
1217
- referencedTable: fk.references.table,
1218
- referencedColumn: fk.references.column
1219
- }
1220
- };
1221
- } catch (_) {}
1222
- }
1223
- return {
1224
- file,
1225
- tableName,
1226
- rowIndex,
1227
- issues: [{
1228
- message: errorMessage,
1229
- path: []
1230
- }],
1231
- type: "schema"
1232
- };
1233
- }
1234
- /**
1235
- * Quote SQL identifier
1236
- */
1237
- quoteIdentifier(name) {
1238
- return `"${name.replace(/"/g, "\"\"")}"`;
1239
- }
1240
- /**
1241
- * Validate a single JSONL file
1242
- */
1243
- async validateFile(filePath) {
1244
- const tableName = basename(filePath, ".jsonl");
1245
- const data = await JsonlReader.read(filePath);
1246
- const schema = await SchemaLoader.loadSchema(filePath);
1247
- const errors = [];
1248
- for (let i = 0; i < data.length; i++) {
1249
- const row = data[i];
1250
- const result = schema["~standard"].validate(row);
1251
- if (result instanceof Promise) throw new Error("Asynchronous validation is not supported.");
1252
- if (result.issues && result.issues.length > 0) errors.push({
1253
- file: filePath,
1254
- tableName,
1255
- rowIndex: i,
1256
- issues: result.issues,
1257
- type: "schema"
1258
- });
1259
- }
1260
- if (errors.length === 0) {
1261
- const dirPath = dirname(filePath);
1262
- const allJsonlFiles = (await readdir(dirPath, { withFileTypes: true })).filter((entry) => entry.isFile() && entry.name.endsWith(".jsonl")).map((entry) => join(dirPath, entry.name));
1263
- const dbErrors = await this.validateWithDatabase(dirPath, allJsonlFiles);
1264
- errors.push(...dbErrors.filter((e) => e.file === filePath));
1265
- }
1266
- return {
1267
- valid: errors.length === 0,
1268
- errors,
1269
- warnings: []
1270
- };
1271
- }
1272
- };
1273
-
1274
1189
  //#endregion
1275
1190
  //#region src/jsonl-migration.ts
1276
1191
  /**
1277
1192
  * Validate a table by temporarily supplying in-memory rows while reusing the existing LinesDB validation pipeline.
1278
- * If validation fails, the underlying LinesDB error is rethrown so callers can inspect validation details.
1193
+ * If validation fails, throws an error with validation details.
1279
1194
  */
1280
1195
  async function ensureTableRowsValid(options) {
1281
- console.log("[ensureTableRowsValid] START");
1282
- console.log("[ensureTableRowsValid] dataDir:", options.dataDir);
1283
- console.log("[ensureTableRowsValid] tableName:", options.tableName);
1284
- console.log("[ensureTableRowsValid] rows count:", options.rows.length);
1285
1196
  const tablePath = join(options.dataDir, `${options.tableName}.jsonl`);
1286
1197
  const overrides = new Map([[tablePath, options.rows]]);
1287
- console.log("[ensureTableRowsValid] tablePath:", tablePath);
1288
- let capturedError = null;
1289
- const originalWarn = console.warn;
1290
- const warnMessages = [];
1291
- console.warn = (...args) => {
1292
- const message = args.join(" ");
1293
- console.log("[ensureTableRowsValid] Captured warn:", message);
1294
- warnMessages.push(message);
1295
- if (message.includes(`Failed to load table '${options.tableName}'`) && message.includes("Validation failed")) {
1296
- capturedError = new Error(message);
1297
- console.log("[ensureTableRowsValid] Captured validation error!");
1298
- }
1299
- };
1300
- try {
1301
- console.log("[ensureTableRowsValid] Calling JsonlReader.withOverrides");
1302
- await JsonlReader.withOverrides(overrides, async () => {
1303
- console.log("[ensureTableRowsValid] Inside withOverrides callback");
1304
- const db = LinesDB.create({ dataDir: options.dataDir });
1305
- console.log("[ensureTableRowsValid] LinesDB created");
1306
- try {
1307
- console.log("[ensureTableRowsValid] Calling db.initialize()");
1308
- await db.initialize();
1309
- console.log("[ensureTableRowsValid] db.initialize() completed");
1310
- } finally {
1311
- console.log("[ensureTableRowsValid] Calling db.close()");
1312
- await db.close();
1198
+ await JsonlReader.withOverrides(overrides, async () => {
1199
+ const db = LinesDB.create({ dataDir: options.dataDir });
1200
+ try {
1201
+ const result = await db.initialize({ tableName: options.tableName });
1202
+ if (!result.valid) {
1203
+ const errorCount = result.errors.length;
1204
+ const errorDetails = result.errors.map((e) => {
1205
+ const issueMessages = e.issues.map((issue) => issue.message).join(", ");
1206
+ return ` Row ${e.rowIndex}: ${issueMessages}`;
1207
+ }).join("\n");
1208
+ throw new Error(`Validation failed for table '${options.tableName}' (${errorCount} error(s)):\n${errorDetails}`);
1313
1209
  }
1314
- });
1315
- console.log("[ensureTableRowsValid] withOverrides completed");
1316
- } finally {
1317
- console.warn = originalWarn;
1318
- }
1319
- console.log("[ensureTableRowsValid] Warnings captured:", warnMessages.length);
1320
- console.log("[ensureTableRowsValid] capturedError:", capturedError ? "YES" : "NO");
1321
- if (capturedError) {
1322
- console.log("[ensureTableRowsValid] Throwing captured error");
1323
- throw capturedError;
1324
- }
1325
- console.log("[ensureTableRowsValid] END (success)");
1210
+ } finally {
1211
+ await db.close();
1212
+ }
1213
+ });
1326
1214
  }
1327
1215
 
1328
1216
  //#endregion
1329
- export { DirectoryScanner, JsonlReader, JsonlWriter, LinesDB, RUNTIME, SchemaLoader, TypeGenerator, Validator, defineSchema, detectRuntime, ensureTableRowsValid, hasBackward };
1217
+ export { DirectoryScanner, JsonlReader, JsonlWriter, LinesDB, RUNTIME, SchemaLoader, TypeGenerator, defineSchema, detectRuntime, ensureTableRowsValid, hasBackward };
1330
1218
  //# sourceMappingURL=index.js.map