@toiroakr/lines-db 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -364,4 +364,144 @@ describe('Validator', () => {
364
364
  );
365
365
  });
366
366
  });
367
+
368
+ describe('constraint validation', () => {
369
+ it('should detect primary key constraint violations', async () => {
370
+ const jsonlPath = join(testDir, 'users.jsonl');
371
+ const schemaPath = join(testDir, 'users.schema.ts');
372
+
373
+ // Write data with duplicate id
374
+ await writeFile(
375
+ jsonlPath,
376
+ '{"id":"1","name":"Alice","email":"alice@example.com"}\n{"id":"1","name":"Bob","email":"bob@example.com"}\n',
377
+ );
378
+ await writeFile(
379
+ schemaPath,
380
+ `
381
+ export const schema = {
382
+ '~standard': {
383
+ version: 1,
384
+ vendor: 'test',
385
+ validate: (data) => ({ value: data, issues: [] })
386
+ },
387
+ primaryKey: 'id'
388
+ };
389
+ `,
390
+ );
391
+
392
+ const validator = new Validator({ path: jsonlPath });
393
+ const result = await validator.validate();
394
+
395
+ expect(result.valid).toBe(false);
396
+ expect(result.errors).toHaveLength(1);
397
+ expect(result.errors[0].rowIndex).toBe(1);
398
+ expect(result.errors[0].issues[0].message).toContain('UNIQUE constraint failed');
399
+ expect(result.errors[0].issues[0].message).toContain('id');
400
+ });
401
+
402
+ it('should detect unique index constraint violations', async () => {
403
+ const jsonlPath = join(testDir, 'users.jsonl');
404
+ const schemaPath = join(testDir, 'users.schema.ts');
405
+
406
+ // Write data with duplicate email
407
+ await writeFile(
408
+ jsonlPath,
409
+ '{"id":"1","name":"Alice","email":"alice@example.com"}\n{"id":"2","name":"Bob","email":"alice@example.com"}\n',
410
+ );
411
+ await writeFile(
412
+ schemaPath,
413
+ `
414
+ export const schema = {
415
+ '~standard': {
416
+ version: 1,
417
+ vendor: 'test',
418
+ validate: (data) => ({ value: data, issues: [] })
419
+ },
420
+ primaryKey: 'id',
421
+ indexes: [
422
+ { name: 'users_email_unique', columns: ['email'], unique: true }
423
+ ]
424
+ };
425
+ `,
426
+ );
427
+
428
+ const validator = new Validator({ path: jsonlPath });
429
+ const result = await validator.validate();
430
+
431
+ expect(result.valid).toBe(false);
432
+ expect(result.errors).toHaveLength(1);
433
+ expect(result.errors[0].rowIndex).toBe(1);
434
+ expect(result.errors[0].issues[0].message).toContain('UNIQUE constraint failed');
435
+ expect(result.errors[0].issues[0].message).toContain('email');
436
+ });
437
+
438
+ it('should use id column as primary key when primaryKey is not specified', async () => {
439
+ const jsonlPath = join(testDir, 'users.jsonl');
440
+ const schemaPath = join(testDir, 'users.schema.ts');
441
+
442
+ // Write data with duplicate id (no primaryKey specified in schema)
443
+ await writeFile(jsonlPath, '{"id":"1","name":"Alice"}\n{"id":"1","name":"Bob"}\n');
444
+ await writeFile(
445
+ schemaPath,
446
+ `
447
+ export const schema = {
448
+ '~standard': {
449
+ version: 1,
450
+ vendor: 'test',
451
+ validate: (data) => ({ value: data, issues: [] })
452
+ }
453
+ // Note: no primaryKey specified
454
+ };
455
+ `,
456
+ );
457
+
458
+ const validator = new Validator({ path: jsonlPath });
459
+ const result = await validator.validate();
460
+
461
+ expect(result.valid).toBe(false);
462
+ expect(result.errors).toHaveLength(1);
463
+ expect(result.errors[0].rowIndex).toBe(1);
464
+ expect(result.errors[0].issues[0].message).toContain('UNIQUE constraint failed');
465
+ expect(result.errors[0].issues[0].message).toContain('id');
466
+ });
467
+
468
+ it('should detect multiple constraint violations in single file', async () => {
469
+ const jsonlPath = join(testDir, 'users.jsonl');
470
+ const schemaPath = join(testDir, 'users.schema.ts');
471
+
472
+ // Write data with both duplicate id and duplicate email
473
+ await writeFile(
474
+ jsonlPath,
475
+ '{"id":"1","name":"Alice","email":"alice@example.com"}\n{"id":"1","name":"Bob","email":"bob@example.com"}\n{"id":"2","name":"Charlie","email":"alice@example.com"}\n',
476
+ );
477
+ await writeFile(
478
+ schemaPath,
479
+ `
480
+ export const schema = {
481
+ '~standard': {
482
+ version: 1,
483
+ vendor: 'test',
484
+ validate: (data) => ({ value: data, issues: [] })
485
+ },
486
+ primaryKey: 'id',
487
+ indexes: [
488
+ { name: 'users_email_unique', columns: ['email'], unique: true }
489
+ ]
490
+ };
491
+ `,
492
+ );
493
+
494
+ const validator = new Validator({ path: jsonlPath });
495
+ const result = await validator.validate();
496
+
497
+ expect(result.valid).toBe(false);
498
+ expect(result.errors).toHaveLength(2);
499
+ // First error: duplicate id
500
+ expect(result.errors[0].rowIndex).toBe(1);
501
+ expect(result.errors[0].issues[0].message).toContain('id');
502
+ // Second error: duplicate email
503
+ expect(result.errors[1].rowIndex).toBe(2);
504
+ expect(result.errors[1].issues[0].message).toContain('email');
505
+ });
506
+ });
367
507
  });
package/src/validator.ts CHANGED
@@ -91,7 +91,7 @@ export class Validator {
91
91
  allWarnings.push(...result.warnings);
92
92
  }
93
93
 
94
- // Then, validate by actually loading into database
94
+ // Validate by loading into database with detailed error tracking
95
95
  if (filesWithSchema.length > 0 && allErrors.length === 0) {
96
96
  const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
97
97
  allErrors.push(...dbErrors);
@@ -105,8 +105,8 @@ export class Validator {
105
105
  }
106
106
 
107
107
  /**
108
- * Validate by loading data into an actual database
109
- * This catches constraint violations (unique, primary key, foreign key, etc.)
108
+ * Validate by loading data into database one row at a time
109
+ * This catches constraint violations and extracts detailed error information
110
110
  */
111
111
  private async validateWithDatabase(
112
112
  dirPath: string,
@@ -114,47 +114,84 @@ export class Validator {
114
114
  ): Promise<ValidationErrorDetail[]> {
115
115
  const errors: ValidationErrorDetail[] = [];
116
116
 
117
- // Capture console.warn messages
118
- const warnMessages: string[] = [];
119
- const originalWarn = console.warn;
120
- console.warn = (...args: unknown[]) => {
121
- const message = args.map((arg) => String(arg)).join(' ');
122
- warnMessages.push(message);
123
- // Still output to console for debugging
124
- originalWarn(...args);
125
- };
126
-
127
117
  try {
128
- // Try to initialize database with the data directory
129
- const db = LinesDB.create({ dataDir: dirPath });
130
- await db.initialize();
131
- await db.close();
118
+ const db = LinesDB.create({ dataDir: ':memory:' });
132
119
 
133
- // Check if there were any loading errors
134
- for (const message of warnMessages) {
135
- if (message.includes('Failed to load table')) {
136
- // Extract table name from message
137
- const tableNameMatch = message.match(/Failed to load table '([^']+)'/);
138
- const tableName = tableNameMatch ? tableNameMatch[1] : 'unknown';
120
+ // Load all tables one by one, checking each row
121
+ for (const file of jsonlFiles) {
122
+ const tableName = basename(file, '.jsonl');
123
+ const data = await JsonlReader.read(file);
139
124
 
140
- const file = jsonlFiles.find((f) => basename(f, '.jsonl') === tableName);
125
+ // Load schema and metadata
126
+ let schema: any;
127
+ let foreignKeys: any[] = [];
128
+ let indexes: any[] = [];
129
+ let primaryKey: string | undefined;
130
+ try {
131
+ schema = await SchemaLoader.loadSchema(file);
132
+ const { pathToFileURL } = await import('node:url');
133
+ const schemaPath = file.replace('.jsonl', '.schema.ts');
134
+ const schemaUrl = pathToFileURL(schemaPath).href;
135
+ const schemaModule = await import(`${schemaUrl}?t=${Date.now()}`);
136
+ const schemaExport = schemaModule.schema || schemaModule.default;
137
+ if (schemaExport?.foreignKeys) {
138
+ foreignKeys = schemaExport.foreignKeys;
139
+ }
140
+ if (schemaExport?.indexes) {
141
+ indexes = schemaExport.indexes;
142
+ }
143
+ if (schemaExport?.primaryKey) {
144
+ primaryKey = schemaExport.primaryKey;
145
+ }
146
+ } catch (_error) {
147
+ // Schema not found or failed to load
148
+ continue;
149
+ }
141
150
 
142
- errors.push({
143
- file: file || `${dirPath}/${tableName}.jsonl`,
151
+ // Create table schema
152
+ try {
153
+ const tableSchema = this.createTableSchema(
144
154
  tableName,
145
- rowIndex: 0,
146
- issues: [
147
- {
148
- message: message.replace(/^Warning:\s*/, ''),
149
- path: [],
150
- },
151
- ],
152
- type: 'schema',
153
- });
155
+ data,
156
+ schema,
157
+ foreignKeys,
158
+ indexes,
159
+ primaryKey,
160
+ );
161
+
162
+ // Create the table in the database
163
+ this.createTableInDb(db, tableSchema);
164
+
165
+ // Insert rows one by one to catch constraint violations
166
+ for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
167
+ const row = data[rowIndex];
168
+ try {
169
+ this.insertRowIntoDb(db, tableName, tableSchema, row);
170
+ } catch (error) {
171
+ // Constraint violation occurred
172
+ const constraintError = this.analyzeConstraintError(
173
+ error,
174
+ file,
175
+ tableName,
176
+ rowIndex,
177
+ row,
178
+ foreignKeys,
179
+ db,
180
+ );
181
+ if (constraintError) {
182
+ errors.push(constraintError);
183
+ }
184
+ }
185
+ }
186
+ } catch (_error) {
187
+ // Skip this table and continue if table creation fails
188
+ continue;
154
189
  }
155
190
  }
191
+
192
+ await db.close();
156
193
  } catch (error) {
157
- // If initialization itself fails, report it
194
+ // Database initialization failed
158
195
  errors.push({
159
196
  file: dirPath,
160
197
  tableName: 'database',
@@ -167,14 +204,185 @@ export class Validator {
167
204
  ],
168
205
  type: 'schema',
169
206
  });
170
- } finally {
171
- // Restore console.warn
172
- console.warn = originalWarn;
173
207
  }
174
208
 
175
209
  return errors;
176
210
  }
177
211
 
212
+ /**
213
+ * Create table schema from data and validation schema
214
+ */
215
+ private createTableSchema(
216
+ tableName: string,
217
+ data: any[],
218
+ validationSchema: any,
219
+ foreignKeys: any[],
220
+ indexes: any[],
221
+ primaryKey?: string,
222
+ ): any {
223
+ if (data.length === 0) {
224
+ throw new Error(`No data found in ${tableName}`);
225
+ }
226
+
227
+ // Infer schema from data
228
+ const schema = JsonlReader.inferSchema(tableName, data);
229
+
230
+ // Set primary key if specified
231
+ if (primaryKey) {
232
+ const pkColumn = schema.columns.find((col: any) => col.name === primaryKey);
233
+ if (pkColumn) {
234
+ pkColumn.primaryKey = true;
235
+ }
236
+ } else if (!schema.columns.some((col: any) => col.primaryKey)) {
237
+ // If no primary key is defined, use 'id' column as primary key if it exists
238
+ // This matches the behavior of database.ts
239
+ const idColumn = schema.columns.find((c: any) => c.name === 'id');
240
+ if (idColumn) {
241
+ idColumn.primaryKey = true;
242
+ }
243
+ }
244
+
245
+ // Add foreign keys
246
+ if (foreignKeys && foreignKeys.length > 0) {
247
+ schema.foreignKeys = foreignKeys;
248
+ }
249
+
250
+ // Add indexes
251
+ if (indexes && indexes.length > 0) {
252
+ schema.indexes = indexes;
253
+ }
254
+
255
+ return schema;
256
+ }
257
+
258
+ /**
259
+ * Create table in database
260
+ */
261
+ private createTableInDb(db: LinesDB<any>, schema: any): void {
262
+ const columns = schema.columns.map((col: any) => {
263
+ let colDef = `${this.quoteIdentifier(col.name)} ${col.type.toUpperCase()}`;
264
+ if (col.primaryKey) {
265
+ colDef += ' PRIMARY KEY';
266
+ }
267
+ return colDef;
268
+ });
269
+
270
+ // Add foreign key constraints
271
+ if (schema.foreignKeys && schema.foreignKeys.length > 0) {
272
+ for (const fk of schema.foreignKeys) {
273
+ columns.push(
274
+ `FOREIGN KEY (${this.quoteIdentifier(fk.column)}) REFERENCES ${this.quoteIdentifier(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`,
275
+ );
276
+ }
277
+ }
278
+
279
+ const sql = `CREATE TABLE IF NOT EXISTS ${this.quoteIdentifier(schema.name)} (${columns.join(', ')})`;
280
+ db.execute(sql);
281
+
282
+ // Create indexes
283
+ if (schema.indexes && schema.indexes.length > 0) {
284
+ for (const index of schema.indexes) {
285
+ const indexName = index.name || `idx_${schema.name}_${index.columns.join('_')}`;
286
+ const uniqueKeyword = index.unique ? 'UNIQUE' : '';
287
+ const indexColumns = index.columns
288
+ .map((col: string) => this.quoteIdentifier(col))
289
+ .join(', ');
290
+ const indexSql = `CREATE ${uniqueKeyword} INDEX IF NOT EXISTS ${this.quoteIdentifier(indexName)} ON ${this.quoteIdentifier(schema.name)} (${indexColumns})`;
291
+ db.execute(indexSql);
292
+ }
293
+ }
294
+ }
295
+
296
+ /**
297
+ * Insert a row into database
298
+ */
299
+ private insertRowIntoDb(db: LinesDB<any>, tableName: string, schema: any, row: any): void {
300
+ const columnNames = schema.columns.map((col: any) => col.name);
301
+ const quotedColumns = columnNames.map((name: string) => this.quoteIdentifier(name));
302
+ const placeholders = columnNames.map(() => '?').join(', ');
303
+ const sql = `INSERT INTO ${this.quoteIdentifier(tableName)} (${quotedColumns.join(', ')}) VALUES (${placeholders})`;
304
+
305
+ const values = columnNames.map((col: string) => {
306
+ const value = row[col];
307
+ if (value === null || value === undefined) return null;
308
+ if (typeof value === 'object') return JSON.stringify(value);
309
+ if (typeof value === 'boolean') return value ? 1 : 0;
310
+ return value;
311
+ });
312
+
313
+ db.execute(sql, values);
314
+ }
315
+
316
+ /**
317
+ * Analyze constraint error and extract detailed information
318
+ */
319
+ private analyzeConstraintError(
320
+ error: any,
321
+ file: string,
322
+ tableName: string,
323
+ rowIndex: number,
324
+ row: any,
325
+ foreignKeys: any[],
326
+ db: LinesDB<any>,
327
+ ): ValidationErrorDetail | null {
328
+ const errorMessage = error instanceof Error ? error.message : String(error);
329
+
330
+ // Foreign key constraint
331
+ if (errorMessage.includes('FOREIGN KEY constraint failed')) {
332
+ // Find which foreign key failed
333
+ for (const fk of foreignKeys) {
334
+ const fkValue = row[fk.column];
335
+ if (fkValue === null || fkValue === undefined) continue;
336
+
337
+ // Check if referenced value exists
338
+ try {
339
+ const result = db.query(
340
+ `SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`,
341
+ [fkValue],
342
+ );
343
+ if (result.length > 0 && (result[0] as any).count === 0) {
344
+ return {
345
+ file,
346
+ tableName,
347
+ rowIndex,
348
+ issues: [],
349
+ type: 'foreignKey',
350
+ foreignKeyError: {
351
+ column: fk.column,
352
+ value: fkValue,
353
+ referencedTable: fk.references.table,
354
+ referencedColumn: fk.references.column,
355
+ },
356
+ };
357
+ }
358
+ } catch (_) {
359
+ // Referenced table doesn't exist yet
360
+ }
361
+ }
362
+ }
363
+
364
+ // Other constraint errors (primary key, unique, etc.)
365
+ return {
366
+ file,
367
+ tableName,
368
+ rowIndex,
369
+ issues: [
370
+ {
371
+ message: errorMessage,
372
+ path: [],
373
+ },
374
+ ],
375
+ type: 'schema',
376
+ };
377
+ }
378
+
379
+ /**
380
+ * Quote SQL identifier
381
+ */
382
+ private quoteIdentifier(name: string): string {
383
+ return `"${name.replace(/"/g, '""')}"`;
384
+ }
385
+
178
386
  /**
179
387
  * Validate a single JSONL file
180
388
  */
@@ -208,11 +416,20 @@ export class Validator {
208
416
  }
209
417
  }
210
418
 
211
- // If schema validation passed, validate with database
419
+ // If schema validation passed, validate database constraints
212
420
  if (errors.length === 0) {
213
421
  const dirPath = dirname(filePath);
214
- const dbErrors = await this.validateWithDatabase(dirPath, [filePath]);
215
- errors.push(...dbErrors);
422
+
423
+ // Get all JSONL files in the directory
424
+ const entries = await readdir(dirPath, { withFileTypes: true });
425
+ const allJsonlFiles = entries
426
+ .filter((entry) => entry.isFile() && entry.name.endsWith('.jsonl'))
427
+ .map((entry) => join(dirPath, entry.name));
428
+
429
+ // Validate database constraints (including foreign keys)
430
+ const dbErrors = await this.validateWithDatabase(dirPath, allJsonlFiles);
431
+ // Only include errors for the current file
432
+ errors.push(...dbErrors.filter((e) => e.file === filePath));
216
433
  }
217
434
 
218
435
  return {