@toiroakr/lines-db 0.3.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/bin/cli.js +250 -134
- package/dist/index.cjs +169 -53
- package/dist/index.d.cts +31 -10
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.ts +31 -10
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +169 -53
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/database.ts +32 -15
- package/src/schema.ts +6 -6
- package/src/types.ts +2 -2
- package/src/validator.test.ts +140 -0
- package/src/validator.ts +272 -57
package/src/database.ts
CHANGED
|
@@ -107,7 +107,10 @@ export class LinesDB<Tables extends TableDefs> {
|
|
|
107
107
|
const schemaPath = tableConfig.jsonlPath.replace('.jsonl', '.schema.ts');
|
|
108
108
|
const schemaUrl = pathToFileURL(schemaPath).href;
|
|
109
109
|
const schemaModule = await import(`${schemaUrl}?t=${Date.now()}`);
|
|
110
|
-
|
|
110
|
+
|
|
111
|
+
// Try to get foreign keys from exported 'schema' or directly from module
|
|
112
|
+
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
113
|
+
foreignKeys = schemaExport?.foreignKeys || schemaModule.foreignKeys;
|
|
111
114
|
} catch {
|
|
112
115
|
// Schema file not found - will continue without validation
|
|
113
116
|
}
|
|
@@ -154,7 +157,7 @@ export class LinesDB<Tables extends TableDefs> {
|
|
|
154
157
|
// Load validation schema if provided or try to auto-load
|
|
155
158
|
let validationSchema = config.validationSchema;
|
|
156
159
|
const schemaMetadata: {
|
|
157
|
-
primaryKey?:
|
|
160
|
+
primaryKey?: string;
|
|
158
161
|
foreignKeys?: BiDirectionalSchema['foreignKeys'];
|
|
159
162
|
indexes?: BiDirectionalSchema['indexes'];
|
|
160
163
|
} = {};
|
|
@@ -177,13 +180,24 @@ export class LinesDB<Tables extends TableDefs> {
|
|
|
177
180
|
const schemaUrl = pathToFileURL(schemaPath).href;
|
|
178
181
|
const schemaModule = await import(`${schemaUrl}?t=${Date.now()}`);
|
|
179
182
|
|
|
180
|
-
|
|
183
|
+
// Try to get metadata from exported 'schema' or directly from module
|
|
184
|
+
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
185
|
+
|
|
186
|
+
if (schemaExport?.primaryKey) {
|
|
187
|
+
schemaMetadata.primaryKey = schemaExport.primaryKey;
|
|
188
|
+
} else if (schemaModule.primaryKey) {
|
|
181
189
|
schemaMetadata.primaryKey = schemaModule.primaryKey;
|
|
182
190
|
}
|
|
183
|
-
|
|
191
|
+
|
|
192
|
+
if (schemaExport?.foreignKeys) {
|
|
193
|
+
schemaMetadata.foreignKeys = schemaExport.foreignKeys;
|
|
194
|
+
} else if (schemaModule.foreignKeys) {
|
|
184
195
|
schemaMetadata.foreignKeys = schemaModule.foreignKeys;
|
|
185
196
|
}
|
|
186
|
-
|
|
197
|
+
|
|
198
|
+
if (schemaExport?.indexes) {
|
|
199
|
+
schemaMetadata.indexes = schemaExport.indexes;
|
|
200
|
+
} else if (schemaModule.indexes) {
|
|
187
201
|
schemaMetadata.indexes = schemaModule.indexes;
|
|
188
202
|
}
|
|
189
203
|
} catch (_error) {
|
|
@@ -253,12 +267,17 @@ export class LinesDB<Tables extends TableDefs> {
|
|
|
253
267
|
const indexes = biSchema?.indexes || schemaMetadata.indexes;
|
|
254
268
|
|
|
255
269
|
if (primaryKey && !schema.columns.some((col) => col.primaryKey)) {
|
|
256
|
-
// Add primary key constraint to
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
270
|
+
// Add primary key constraint to column
|
|
271
|
+
const col = schema.columns.find((c) => c.name === primaryKey);
|
|
272
|
+
if (col) {
|
|
273
|
+
col.primaryKey = true;
|
|
274
|
+
}
|
|
275
|
+
} else if (!primaryKey && !schema.columns.some((col) => col.primaryKey)) {
|
|
276
|
+
// If no primary key is defined, use 'id' column as primary key if it exists
|
|
277
|
+
// This matches the behavior of JsonlReader.inferSchema()
|
|
278
|
+
const idColumn = schema.columns.find((c) => c.name === 'id');
|
|
279
|
+
if (idColumn) {
|
|
280
|
+
idColumn.primaryKey = true;
|
|
262
281
|
}
|
|
263
282
|
}
|
|
264
283
|
if (foreignKeys) {
|
|
@@ -304,10 +323,8 @@ export class LinesDB<Tables extends TableDefs> {
|
|
|
304
323
|
if (schema.foreignKeys && schema.foreignKeys.length > 0) {
|
|
305
324
|
for (const fk of schema.foreignKeys) {
|
|
306
325
|
const fkParts = [
|
|
307
|
-
`FOREIGN KEY (${
|
|
308
|
-
`REFERENCES ${this.quoteTableName(fk.references.table)}(${fk.references.
|
|
309
|
-
.map((col) => this.quoteIdentifier(col))
|
|
310
|
-
.join(', ')})`,
|
|
326
|
+
`FOREIGN KEY (${this.quoteIdentifier(fk.column)})`,
|
|
327
|
+
`REFERENCES ${this.quoteTableName(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`,
|
|
311
328
|
];
|
|
312
329
|
if (fk.onDelete) {
|
|
313
330
|
fkParts.push(`ON DELETE ${fk.onDelete}`);
|
package/src/schema.ts
CHANGED
|
@@ -5,9 +5,9 @@ import type { StandardSchema, Table, ForeignKeyDefinition, IndexDefinition } fro
|
|
|
5
5
|
*/
|
|
6
6
|
export interface SchemaOptions {
|
|
7
7
|
/**
|
|
8
|
-
* Primary key
|
|
8
|
+
* Primary key column
|
|
9
9
|
*/
|
|
10
|
-
primaryKey?: string
|
|
10
|
+
primaryKey?: string;
|
|
11
11
|
|
|
12
12
|
/**
|
|
13
13
|
* Foreign key constraints
|
|
@@ -39,9 +39,9 @@ export interface BiDirectionalSchema<Input extends Table = Table, Output extends
|
|
|
39
39
|
backward?: (output: Output) => Input;
|
|
40
40
|
|
|
41
41
|
/**
|
|
42
|
-
* Primary key
|
|
42
|
+
* Primary key column
|
|
43
43
|
*/
|
|
44
|
-
primaryKey?: string
|
|
44
|
+
primaryKey?: string;
|
|
45
45
|
|
|
46
46
|
/**
|
|
47
47
|
* Foreign key constraints
|
|
@@ -79,9 +79,9 @@ export interface BiDirectionalSchema<Input extends Table = Table, Output extends
|
|
|
79
79
|
* const schema = defineSchema(
|
|
80
80
|
* v.object({ id: v.number(), customerId: v.number() }),
|
|
81
81
|
* {
|
|
82
|
-
* primaryKey:
|
|
82
|
+
* primaryKey: 'id',
|
|
83
83
|
* foreignKeys: [
|
|
84
|
-
* {
|
|
84
|
+
* { column: 'customerId', references: { table: 'users', column: 'id' } }
|
|
85
85
|
* ]
|
|
86
86
|
* }
|
|
87
87
|
* );
|
package/src/types.ts
CHANGED
|
@@ -18,10 +18,10 @@ export type InferInput<T> = T extends StandardSchemaV1<infer I, unknown> ? I : n
|
|
|
18
18
|
export type InferOutput<T> = T extends StandardSchemaV1<unknown, infer O> ? O : never;
|
|
19
19
|
|
|
20
20
|
export interface ForeignKeyDefinition {
|
|
21
|
-
|
|
21
|
+
column: string;
|
|
22
22
|
references: {
|
|
23
23
|
table: string;
|
|
24
|
-
|
|
24
|
+
column: string;
|
|
25
25
|
};
|
|
26
26
|
onDelete?: 'CASCADE' | 'SET NULL' | 'RESTRICT' | 'NO ACTION';
|
|
27
27
|
onUpdate?: 'CASCADE' | 'SET NULL' | 'RESTRICT' | 'NO ACTION';
|
package/src/validator.test.ts
CHANGED
|
@@ -364,4 +364,144 @@ describe('Validator', () => {
|
|
|
364
364
|
);
|
|
365
365
|
});
|
|
366
366
|
});
|
|
367
|
+
|
|
368
|
+
describe('constraint validation', () => {
|
|
369
|
+
it('should detect primary key constraint violations', async () => {
|
|
370
|
+
const jsonlPath = join(testDir, 'users.jsonl');
|
|
371
|
+
const schemaPath = join(testDir, 'users.schema.ts');
|
|
372
|
+
|
|
373
|
+
// Write data with duplicate id
|
|
374
|
+
await writeFile(
|
|
375
|
+
jsonlPath,
|
|
376
|
+
'{"id":"1","name":"Alice","email":"alice@example.com"}\n{"id":"1","name":"Bob","email":"bob@example.com"}\n',
|
|
377
|
+
);
|
|
378
|
+
await writeFile(
|
|
379
|
+
schemaPath,
|
|
380
|
+
`
|
|
381
|
+
export const schema = {
|
|
382
|
+
'~standard': {
|
|
383
|
+
version: 1,
|
|
384
|
+
vendor: 'test',
|
|
385
|
+
validate: (data) => ({ value: data, issues: [] })
|
|
386
|
+
},
|
|
387
|
+
primaryKey: 'id'
|
|
388
|
+
};
|
|
389
|
+
`,
|
|
390
|
+
);
|
|
391
|
+
|
|
392
|
+
const validator = new Validator({ path: jsonlPath });
|
|
393
|
+
const result = await validator.validate();
|
|
394
|
+
|
|
395
|
+
expect(result.valid).toBe(false);
|
|
396
|
+
expect(result.errors).toHaveLength(1);
|
|
397
|
+
expect(result.errors[0].rowIndex).toBe(1);
|
|
398
|
+
expect(result.errors[0].issues[0].message).toContain('UNIQUE constraint failed');
|
|
399
|
+
expect(result.errors[0].issues[0].message).toContain('id');
|
|
400
|
+
});
|
|
401
|
+
|
|
402
|
+
it('should detect unique index constraint violations', async () => {
|
|
403
|
+
const jsonlPath = join(testDir, 'users.jsonl');
|
|
404
|
+
const schemaPath = join(testDir, 'users.schema.ts');
|
|
405
|
+
|
|
406
|
+
// Write data with duplicate email
|
|
407
|
+
await writeFile(
|
|
408
|
+
jsonlPath,
|
|
409
|
+
'{"id":"1","name":"Alice","email":"alice@example.com"}\n{"id":"2","name":"Bob","email":"alice@example.com"}\n',
|
|
410
|
+
);
|
|
411
|
+
await writeFile(
|
|
412
|
+
schemaPath,
|
|
413
|
+
`
|
|
414
|
+
export const schema = {
|
|
415
|
+
'~standard': {
|
|
416
|
+
version: 1,
|
|
417
|
+
vendor: 'test',
|
|
418
|
+
validate: (data) => ({ value: data, issues: [] })
|
|
419
|
+
},
|
|
420
|
+
primaryKey: 'id',
|
|
421
|
+
indexes: [
|
|
422
|
+
{ name: 'users_email_unique', columns: ['email'], unique: true }
|
|
423
|
+
]
|
|
424
|
+
};
|
|
425
|
+
`,
|
|
426
|
+
);
|
|
427
|
+
|
|
428
|
+
const validator = new Validator({ path: jsonlPath });
|
|
429
|
+
const result = await validator.validate();
|
|
430
|
+
|
|
431
|
+
expect(result.valid).toBe(false);
|
|
432
|
+
expect(result.errors).toHaveLength(1);
|
|
433
|
+
expect(result.errors[0].rowIndex).toBe(1);
|
|
434
|
+
expect(result.errors[0].issues[0].message).toContain('UNIQUE constraint failed');
|
|
435
|
+
expect(result.errors[0].issues[0].message).toContain('email');
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
it('should use id column as primary key when primaryKey is not specified', async () => {
|
|
439
|
+
const jsonlPath = join(testDir, 'users.jsonl');
|
|
440
|
+
const schemaPath = join(testDir, 'users.schema.ts');
|
|
441
|
+
|
|
442
|
+
// Write data with duplicate id (no primaryKey specified in schema)
|
|
443
|
+
await writeFile(jsonlPath, '{"id":"1","name":"Alice"}\n{"id":"1","name":"Bob"}\n');
|
|
444
|
+
await writeFile(
|
|
445
|
+
schemaPath,
|
|
446
|
+
`
|
|
447
|
+
export const schema = {
|
|
448
|
+
'~standard': {
|
|
449
|
+
version: 1,
|
|
450
|
+
vendor: 'test',
|
|
451
|
+
validate: (data) => ({ value: data, issues: [] })
|
|
452
|
+
}
|
|
453
|
+
// Note: no primaryKey specified
|
|
454
|
+
};
|
|
455
|
+
`,
|
|
456
|
+
);
|
|
457
|
+
|
|
458
|
+
const validator = new Validator({ path: jsonlPath });
|
|
459
|
+
const result = await validator.validate();
|
|
460
|
+
|
|
461
|
+
expect(result.valid).toBe(false);
|
|
462
|
+
expect(result.errors).toHaveLength(1);
|
|
463
|
+
expect(result.errors[0].rowIndex).toBe(1);
|
|
464
|
+
expect(result.errors[0].issues[0].message).toContain('UNIQUE constraint failed');
|
|
465
|
+
expect(result.errors[0].issues[0].message).toContain('id');
|
|
466
|
+
});
|
|
467
|
+
|
|
468
|
+
it('should detect multiple constraint violations in single file', async () => {
|
|
469
|
+
const jsonlPath = join(testDir, 'users.jsonl');
|
|
470
|
+
const schemaPath = join(testDir, 'users.schema.ts');
|
|
471
|
+
|
|
472
|
+
// Write data with both duplicate id and duplicate email
|
|
473
|
+
await writeFile(
|
|
474
|
+
jsonlPath,
|
|
475
|
+
'{"id":"1","name":"Alice","email":"alice@example.com"}\n{"id":"1","name":"Bob","email":"bob@example.com"}\n{"id":"2","name":"Charlie","email":"alice@example.com"}\n',
|
|
476
|
+
);
|
|
477
|
+
await writeFile(
|
|
478
|
+
schemaPath,
|
|
479
|
+
`
|
|
480
|
+
export const schema = {
|
|
481
|
+
'~standard': {
|
|
482
|
+
version: 1,
|
|
483
|
+
vendor: 'test',
|
|
484
|
+
validate: (data) => ({ value: data, issues: [] })
|
|
485
|
+
},
|
|
486
|
+
primaryKey: 'id',
|
|
487
|
+
indexes: [
|
|
488
|
+
{ name: 'users_email_unique', columns: ['email'], unique: true }
|
|
489
|
+
]
|
|
490
|
+
};
|
|
491
|
+
`,
|
|
492
|
+
);
|
|
493
|
+
|
|
494
|
+
const validator = new Validator({ path: jsonlPath });
|
|
495
|
+
const result = await validator.validate();
|
|
496
|
+
|
|
497
|
+
expect(result.valid).toBe(false);
|
|
498
|
+
expect(result.errors).toHaveLength(2);
|
|
499
|
+
// First error: duplicate id
|
|
500
|
+
expect(result.errors[0].rowIndex).toBe(1);
|
|
501
|
+
expect(result.errors[0].issues[0].message).toContain('id');
|
|
502
|
+
// Second error: duplicate email
|
|
503
|
+
expect(result.errors[1].rowIndex).toBe(2);
|
|
504
|
+
expect(result.errors[1].issues[0].message).toContain('email');
|
|
505
|
+
});
|
|
506
|
+
});
|
|
367
507
|
});
|
package/src/validator.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { readdir, stat } from 'node:fs/promises';
|
|
2
|
-
import { join, basename } from 'node:path';
|
|
2
|
+
import { join, basename, dirname } from 'node:path';
|
|
3
3
|
import { JsonlReader } from './jsonl-reader.js';
|
|
4
4
|
import { SchemaLoader } from './schema-loader.js';
|
|
5
|
-
import
|
|
6
|
-
import type {
|
|
5
|
+
import { LinesDB } from './database.js';
|
|
6
|
+
import type { StandardSchemaIssue } from './types.js';
|
|
7
7
|
|
|
8
8
|
export interface ValidationResult {
|
|
9
9
|
valid: boolean;
|
|
@@ -91,10 +91,10 @@ export class Validator {
|
|
|
91
91
|
allWarnings.push(...result.warnings);
|
|
92
92
|
}
|
|
93
93
|
|
|
94
|
-
//
|
|
95
|
-
if (filesWithSchema.length > 0) {
|
|
96
|
-
const
|
|
97
|
-
allErrors.push(...
|
|
94
|
+
// Validate by loading into database with detailed error tracking
|
|
95
|
+
if (filesWithSchema.length > 0 && allErrors.length === 0) {
|
|
96
|
+
const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
|
|
97
|
+
allErrors.push(...dbErrors);
|
|
98
98
|
}
|
|
99
99
|
|
|
100
100
|
return {
|
|
@@ -105,83 +105,282 @@ export class Validator {
|
|
|
105
105
|
}
|
|
106
106
|
|
|
107
107
|
/**
|
|
108
|
-
* Validate
|
|
108
|
+
* Validate by loading data into database one row at a time
|
|
109
|
+
* This catches constraint violations and extracts detailed error information
|
|
109
110
|
*/
|
|
110
|
-
private async
|
|
111
|
+
private async validateWithDatabase(
|
|
111
112
|
dirPath: string,
|
|
112
113
|
jsonlFiles: string[],
|
|
113
114
|
): Promise<ValidationErrorDetail[]> {
|
|
114
115
|
const errors: ValidationErrorDetail[] = [];
|
|
115
116
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
const tableSchemas = new Map<string, BiDirectionalSchema>();
|
|
117
|
+
try {
|
|
118
|
+
const db = LinesDB.create({ dataDir: ':memory:' });
|
|
119
119
|
|
|
120
|
-
|
|
121
|
-
const
|
|
122
|
-
|
|
123
|
-
|
|
120
|
+
// Load all tables one by one, checking each row
|
|
121
|
+
for (const file of jsonlFiles) {
|
|
122
|
+
const tableName = basename(file, '.jsonl');
|
|
123
|
+
const data = await JsonlReader.read(file);
|
|
124
|
+
|
|
125
|
+
// Load schema and metadata
|
|
126
|
+
let schema: any;
|
|
127
|
+
let foreignKeys: any[] = [];
|
|
128
|
+
let indexes: any[] = [];
|
|
129
|
+
let primaryKey: string | undefined;
|
|
130
|
+
try {
|
|
131
|
+
schema = await SchemaLoader.loadSchema(file);
|
|
132
|
+
const { pathToFileURL } = await import('node:url');
|
|
133
|
+
const schemaPath = file.replace('.jsonl', '.schema.ts');
|
|
134
|
+
const schemaUrl = pathToFileURL(schemaPath).href;
|
|
135
|
+
const schemaModule = await import(`${schemaUrl}?t=${Date.now()}`);
|
|
136
|
+
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
137
|
+
if (schemaExport?.foreignKeys) {
|
|
138
|
+
foreignKeys = schemaExport.foreignKeys;
|
|
139
|
+
}
|
|
140
|
+
if (schemaExport?.indexes) {
|
|
141
|
+
indexes = schemaExport.indexes;
|
|
142
|
+
}
|
|
143
|
+
if (schemaExport?.primaryKey) {
|
|
144
|
+
primaryKey = schemaExport.primaryKey;
|
|
145
|
+
}
|
|
146
|
+
} catch (_error) {
|
|
147
|
+
// Schema not found or failed to load
|
|
148
|
+
continue;
|
|
149
|
+
}
|
|
124
150
|
|
|
125
|
-
|
|
126
|
-
|
|
151
|
+
// Create table schema
|
|
152
|
+
try {
|
|
153
|
+
const tableSchema = this.createTableSchema(
|
|
154
|
+
tableName,
|
|
155
|
+
data,
|
|
156
|
+
schema,
|
|
157
|
+
foreignKeys,
|
|
158
|
+
indexes,
|
|
159
|
+
primaryKey,
|
|
160
|
+
);
|
|
161
|
+
|
|
162
|
+
// Create the table in the database
|
|
163
|
+
this.createTableInDb(db, tableSchema);
|
|
164
|
+
|
|
165
|
+
// Insert rows one by one to catch constraint violations
|
|
166
|
+
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
167
|
+
const row = data[rowIndex];
|
|
168
|
+
try {
|
|
169
|
+
this.insertRowIntoDb(db, tableName, tableSchema, row);
|
|
170
|
+
} catch (error) {
|
|
171
|
+
// Constraint violation occurred
|
|
172
|
+
const constraintError = this.analyzeConstraintError(
|
|
173
|
+
error,
|
|
174
|
+
file,
|
|
175
|
+
tableName,
|
|
176
|
+
rowIndex,
|
|
177
|
+
row,
|
|
178
|
+
foreignKeys,
|
|
179
|
+
db,
|
|
180
|
+
);
|
|
181
|
+
if (constraintError) {
|
|
182
|
+
errors.push(constraintError);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
} catch (_error) {
|
|
187
|
+
// Skip this table and continue if table creation fails
|
|
188
|
+
continue;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
await db.close();
|
|
193
|
+
} catch (error) {
|
|
194
|
+
// Database initialization failed
|
|
195
|
+
errors.push({
|
|
196
|
+
file: dirPath,
|
|
197
|
+
tableName: 'database',
|
|
198
|
+
rowIndex: 0,
|
|
199
|
+
issues: [
|
|
200
|
+
{
|
|
201
|
+
message: `Database initialization failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
202
|
+
path: [],
|
|
203
|
+
},
|
|
204
|
+
],
|
|
205
|
+
type: 'schema',
|
|
206
|
+
});
|
|
127
207
|
}
|
|
128
208
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
const tableName = basename(file, '.jsonl');
|
|
132
|
-
const schema = tableSchemas.get(tableName);
|
|
133
|
-
const data = tableData.get(tableName);
|
|
209
|
+
return errors;
|
|
210
|
+
}
|
|
134
211
|
|
|
135
|
-
|
|
136
|
-
|
|
212
|
+
/**
|
|
213
|
+
* Create table schema from data and validation schema
|
|
214
|
+
*/
|
|
215
|
+
private createTableSchema(
|
|
216
|
+
tableName: string,
|
|
217
|
+
data: any[],
|
|
218
|
+
validationSchema: any,
|
|
219
|
+
foreignKeys: any[],
|
|
220
|
+
indexes: any[],
|
|
221
|
+
primaryKey?: string,
|
|
222
|
+
): any {
|
|
223
|
+
if (data.length === 0) {
|
|
224
|
+
throw new Error(`No data found in ${tableName}`);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Infer schema from data
|
|
228
|
+
const schema = JsonlReader.inferSchema(tableName, data);
|
|
229
|
+
|
|
230
|
+
// Set primary key if specified
|
|
231
|
+
if (primaryKey) {
|
|
232
|
+
const pkColumn = schema.columns.find((col: any) => col.name === primaryKey);
|
|
233
|
+
if (pkColumn) {
|
|
234
|
+
pkColumn.primaryKey = true;
|
|
235
|
+
}
|
|
236
|
+
} else if (!schema.columns.some((col: any) => col.primaryKey)) {
|
|
237
|
+
// If no primary key is defined, use 'id' column as primary key if it exists
|
|
238
|
+
// This matches the behavior of database.ts
|
|
239
|
+
const idColumn = schema.columns.find((c: any) => c.name === 'id');
|
|
240
|
+
if (idColumn) {
|
|
241
|
+
idColumn.primaryKey = true;
|
|
137
242
|
}
|
|
243
|
+
}
|
|
138
244
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
245
|
+
// Add foreign keys
|
|
246
|
+
if (foreignKeys && foreignKeys.length > 0) {
|
|
247
|
+
schema.foreignKeys = foreignKeys;
|
|
248
|
+
}
|
|
143
249
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
250
|
+
// Add indexes
|
|
251
|
+
if (indexes && indexes.length > 0) {
|
|
252
|
+
schema.indexes = indexes;
|
|
253
|
+
}
|
|
148
254
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
for (const refRow of referencedData) {
|
|
152
|
-
// Build composite key from referenced columns
|
|
153
|
-
const keyValues = fk.references.columns.map((col) => refRow[col]);
|
|
154
|
-
const compositeKey = JSON.stringify(keyValues);
|
|
155
|
-
referencedValues.add(compositeKey);
|
|
156
|
-
}
|
|
255
|
+
return schema;
|
|
256
|
+
}
|
|
157
257
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
258
|
+
/**
|
|
259
|
+
* Create table in database
|
|
260
|
+
*/
|
|
261
|
+
private createTableInDb(db: LinesDB<any>, schema: any): void {
|
|
262
|
+
const columns = schema.columns.map((col: any) => {
|
|
263
|
+
let colDef = `${this.quoteIdentifier(col.name)} ${col.type.toUpperCase()}`;
|
|
264
|
+
if (col.primaryKey) {
|
|
265
|
+
colDef += ' PRIMARY KEY';
|
|
266
|
+
}
|
|
267
|
+
return colDef;
|
|
268
|
+
});
|
|
269
|
+
|
|
270
|
+
// Add foreign key constraints
|
|
271
|
+
if (schema.foreignKeys && schema.foreignKeys.length > 0) {
|
|
272
|
+
for (const fk of schema.foreignKeys) {
|
|
273
|
+
columns.push(
|
|
274
|
+
`FOREIGN KEY (${this.quoteIdentifier(fk.column)}) REFERENCES ${this.quoteIdentifier(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`,
|
|
275
|
+
);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const sql = `CREATE TABLE IF NOT EXISTS ${this.quoteIdentifier(schema.name)} (${columns.join(', ')})`;
|
|
280
|
+
db.execute(sql);
|
|
281
|
+
|
|
282
|
+
// Create indexes
|
|
283
|
+
if (schema.indexes && schema.indexes.length > 0) {
|
|
284
|
+
for (const index of schema.indexes) {
|
|
285
|
+
const indexName = index.name || `idx_${schema.name}_${index.columns.join('_')}`;
|
|
286
|
+
const uniqueKeyword = index.unique ? 'UNIQUE' : '';
|
|
287
|
+
const indexColumns = index.columns
|
|
288
|
+
.map((col: string) => this.quoteIdentifier(col))
|
|
289
|
+
.join(', ');
|
|
290
|
+
const indexSql = `CREATE ${uniqueKeyword} INDEX IF NOT EXISTS ${this.quoteIdentifier(indexName)} ON ${this.quoteIdentifier(schema.name)} (${indexColumns})`;
|
|
291
|
+
db.execute(indexSql);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Insert a row into database
|
|
298
|
+
*/
|
|
299
|
+
private insertRowIntoDb(db: LinesDB<any>, tableName: string, schema: any, row: any): void {
|
|
300
|
+
const columnNames = schema.columns.map((col: any) => col.name);
|
|
301
|
+
const quotedColumns = columnNames.map((name: string) => this.quoteIdentifier(name));
|
|
302
|
+
const placeholders = columnNames.map(() => '?').join(', ');
|
|
303
|
+
const sql = `INSERT INTO ${this.quoteIdentifier(tableName)} (${quotedColumns.join(', ')}) VALUES (${placeholders})`;
|
|
304
|
+
|
|
305
|
+
const values = columnNames.map((col: string) => {
|
|
306
|
+
const value = row[col];
|
|
307
|
+
if (value === null || value === undefined) return null;
|
|
308
|
+
if (typeof value === 'object') return JSON.stringify(value);
|
|
309
|
+
if (typeof value === 'boolean') return value ? 1 : 0;
|
|
310
|
+
return value;
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
db.execute(sql, values);
|
|
314
|
+
}
|
|
163
315
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
316
|
+
/**
|
|
317
|
+
* Analyze constraint error and extract detailed information
|
|
318
|
+
*/
|
|
319
|
+
private analyzeConstraintError(
|
|
320
|
+
error: any,
|
|
321
|
+
file: string,
|
|
322
|
+
tableName: string,
|
|
323
|
+
rowIndex: number,
|
|
324
|
+
row: any,
|
|
325
|
+
foreignKeys: any[],
|
|
326
|
+
db: LinesDB<any>,
|
|
327
|
+
): ValidationErrorDetail | null {
|
|
328
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
329
|
+
|
|
330
|
+
// Foreign key constraint
|
|
331
|
+
if (errorMessage.includes('FOREIGN KEY constraint failed')) {
|
|
332
|
+
// Find which foreign key failed
|
|
333
|
+
for (const fk of foreignKeys) {
|
|
334
|
+
const fkValue = row[fk.column];
|
|
335
|
+
if (fkValue === null || fkValue === undefined) continue;
|
|
336
|
+
|
|
337
|
+
// Check if referenced value exists
|
|
338
|
+
try {
|
|
339
|
+
const result = db.query(
|
|
340
|
+
`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`,
|
|
341
|
+
[fkValue],
|
|
342
|
+
);
|
|
343
|
+
if (result.length > 0 && (result[0] as any).count === 0) {
|
|
344
|
+
return {
|
|
167
345
|
file,
|
|
168
346
|
tableName,
|
|
169
|
-
rowIndex
|
|
347
|
+
rowIndex,
|
|
170
348
|
issues: [],
|
|
171
349
|
type: 'foreignKey',
|
|
172
350
|
foreignKeyError: {
|
|
173
|
-
column: fk.
|
|
174
|
-
value:
|
|
175
|
-
referencedTable:
|
|
176
|
-
referencedColumn: fk.references.
|
|
351
|
+
column: fk.column,
|
|
352
|
+
value: fkValue,
|
|
353
|
+
referencedTable: fk.references.table,
|
|
354
|
+
referencedColumn: fk.references.column,
|
|
177
355
|
},
|
|
178
|
-
}
|
|
356
|
+
};
|
|
179
357
|
}
|
|
358
|
+
} catch (_) {
|
|
359
|
+
// Referenced table doesn't exist yet
|
|
180
360
|
}
|
|
181
361
|
}
|
|
182
362
|
}
|
|
183
363
|
|
|
184
|
-
|
|
364
|
+
// Other constraint errors (primary key, unique, etc.)
|
|
365
|
+
return {
|
|
366
|
+
file,
|
|
367
|
+
tableName,
|
|
368
|
+
rowIndex,
|
|
369
|
+
issues: [
|
|
370
|
+
{
|
|
371
|
+
message: errorMessage,
|
|
372
|
+
path: [],
|
|
373
|
+
},
|
|
374
|
+
],
|
|
375
|
+
type: 'schema',
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
/**
|
|
380
|
+
* Quote SQL identifier
|
|
381
|
+
*/
|
|
382
|
+
private quoteIdentifier(name: string): string {
|
|
383
|
+
return `"${name.replace(/"/g, '""')}"`;
|
|
185
384
|
}
|
|
186
385
|
|
|
187
386
|
/**
|
|
@@ -196,7 +395,7 @@ export class Validator {
|
|
|
196
395
|
|
|
197
396
|
const errors: ValidationErrorDetail[] = [];
|
|
198
397
|
|
|
199
|
-
// Validate each row
|
|
398
|
+
// Validate each row with schema
|
|
200
399
|
for (let i = 0; i < data.length; i++) {
|
|
201
400
|
const row = data[i];
|
|
202
401
|
const result = schema['~standard'].validate(row);
|
|
@@ -217,6 +416,22 @@ export class Validator {
|
|
|
217
416
|
}
|
|
218
417
|
}
|
|
219
418
|
|
|
419
|
+
// If schema validation passed, validate database constraints
|
|
420
|
+
if (errors.length === 0) {
|
|
421
|
+
const dirPath = dirname(filePath);
|
|
422
|
+
|
|
423
|
+
// Get all JSONL files in the directory
|
|
424
|
+
const entries = await readdir(dirPath, { withFileTypes: true });
|
|
425
|
+
const allJsonlFiles = entries
|
|
426
|
+
.filter((entry) => entry.isFile() && entry.name.endsWith('.jsonl'))
|
|
427
|
+
.map((entry) => join(dirPath, entry.name));
|
|
428
|
+
|
|
429
|
+
// Validate database constraints (including foreign keys)
|
|
430
|
+
const dbErrors = await this.validateWithDatabase(dirPath, allJsonlFiles);
|
|
431
|
+
// Only include errors for the current file
|
|
432
|
+
errors.push(...dbErrors.filter((e) => e.file === filePath));
|
|
433
|
+
}
|
|
434
|
+
|
|
220
435
|
return {
|
|
221
436
|
valid: errors.length === 0,
|
|
222
437
|
errors,
|