@toiroakr/lines-db 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/bin/cli.js +222 -171
- package/dist/index.cjs +143 -92
- package/dist/index.d.cts +23 -11
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.ts +23 -11
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +143 -92
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/cli.ts +5 -2
- package/src/database.ts +221 -77
- package/src/jsonl-reader.ts +1 -1
- package/src/schema.ts +6 -6
- package/src/sqlite-adapter.ts +4 -0
- package/src/types.ts +2 -2
- package/src/validator.ts +70 -72
package/src/database.ts
CHANGED
|
@@ -40,108 +40,186 @@ export class LinesDB<Tables extends TableDefs> {
|
|
|
40
40
|
|
|
41
41
|
/**
|
|
42
42
|
* Initialize database by loading all JSONL files
|
|
43
|
+
* Uses dependency resolution to ensure foreign key references are loaded in correct order
|
|
43
44
|
*/
|
|
44
45
|
async initialize(): Promise<void> {
|
|
45
46
|
// Scan directory for JSONL files
|
|
46
47
|
this.tables = await DirectoryScanner.scanDirectory(this.config.dataDir);
|
|
47
48
|
|
|
48
|
-
//
|
|
49
|
-
|
|
49
|
+
// Track loaded tables and tables currently being loaded (for circular dependency detection)
|
|
50
|
+
const loadedTables = new Set<string>();
|
|
51
|
+
const loadingTables = new Set<string>();
|
|
52
|
+
|
|
53
|
+
// Load all tables with dependency resolution
|
|
54
|
+
for (const [tableName] of this.tables) {
|
|
55
|
+
if (!loadedTables.has(tableName)) {
|
|
56
|
+
try {
|
|
57
|
+
await this.loadTableWithDependencies(tableName, loadedTables, loadingTables);
|
|
58
|
+
} catch (error) {
|
|
59
|
+
// Log error but continue loading other tables
|
|
60
|
+
console.warn(
|
|
61
|
+
`Warning: Failed to load table '${tableName}':`,
|
|
62
|
+
error instanceof Error ? error.message : String(error),
|
|
63
|
+
);
|
|
64
|
+
// Remove the failed table from the tables map
|
|
65
|
+
this.tables.delete(tableName);
|
|
66
|
+
this.schemas.delete(tableName);
|
|
67
|
+
this.validationSchemas.delete(tableName);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Load a table and its dependencies recursively
|
|
75
|
+
*/
|
|
76
|
+
private async loadTableWithDependencies(
|
|
77
|
+
tableName: string,
|
|
78
|
+
loadedTables: Set<string>,
|
|
79
|
+
loadingTables: Set<string>,
|
|
80
|
+
): Promise<void> {
|
|
81
|
+
// Skip if already loaded
|
|
82
|
+
if (loadedTables.has(tableName)) {
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Check for circular dependencies
|
|
87
|
+
if (loadingTables.has(tableName)) {
|
|
88
|
+
throw new Error(`Circular dependency detected for table '${tableName}'`);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// Get table config
|
|
92
|
+
const tableConfig = this.tables.get(tableName);
|
|
93
|
+
if (!tableConfig) {
|
|
94
|
+
throw new Error(`Table configuration not found for '${tableName}'`);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Mark as currently loading
|
|
98
|
+
loadingTables.add(tableName);
|
|
99
|
+
|
|
100
|
+
try {
|
|
101
|
+
// Load schema module to check for foreign key dependencies
|
|
102
|
+
// We need to load the entire module to access foreignKeys export
|
|
103
|
+
let foreignKeys: BiDirectionalSchema['foreignKeys'];
|
|
104
|
+
|
|
50
105
|
try {
|
|
51
|
-
await
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
106
|
+
const { pathToFileURL } = await import('node:url');
|
|
107
|
+
const schemaPath = tableConfig.jsonlPath.replace('.jsonl', '.schema.ts');
|
|
108
|
+
const schemaUrl = pathToFileURL(schemaPath).href;
|
|
109
|
+
const schemaModule = await import(`${schemaUrl}?t=${Date.now()}`);
|
|
110
|
+
|
|
111
|
+
// Try to get foreign keys from exported 'schema' or directly from module
|
|
112
|
+
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
113
|
+
foreignKeys = schemaExport?.foreignKeys || schemaModule.foreignKeys;
|
|
114
|
+
} catch {
|
|
115
|
+
// Schema file not found - will continue without validation
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// If there are foreign key dependencies, load them first
|
|
119
|
+
if (foreignKeys && foreignKeys.length > 0) {
|
|
120
|
+
for (const fk of foreignKeys) {
|
|
121
|
+
const referencedTable = fk.references.table;
|
|
122
|
+
if (!loadedTables.has(referencedTable)) {
|
|
123
|
+
// Check if referenced table exists in our tables map
|
|
124
|
+
if (this.tables.has(referencedTable)) {
|
|
125
|
+
await this.loadTableWithDependencies(referencedTable, loadedTables, loadingTables);
|
|
126
|
+
} else {
|
|
127
|
+
throw new Error(
|
|
128
|
+
`Foreign key reference to non-existent table '${referencedTable}' in table '${tableName}'`,
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Now load this table
|
|
136
|
+
const wasLoaded = await this.loadTable(tableName, tableConfig);
|
|
137
|
+
if (wasLoaded) {
|
|
138
|
+
loadedTables.add(tableName);
|
|
139
|
+
} else {
|
|
140
|
+
// Table was not loaded (e.g., empty data)
|
|
59
141
|
this.tables.delete(tableName);
|
|
60
142
|
}
|
|
143
|
+
} finally {
|
|
144
|
+
// Remove from loading set
|
|
145
|
+
loadingTables.delete(tableName);
|
|
61
146
|
}
|
|
62
147
|
}
|
|
63
148
|
|
|
64
149
|
/**
|
|
65
150
|
* Load a single table from JSONL file
|
|
151
|
+
* @returns true if table was loaded, false if skipped
|
|
66
152
|
*/
|
|
67
|
-
private async loadTable(tableName: string, config: TableConfig): Promise<
|
|
153
|
+
private async loadTable(tableName: string, config: TableConfig): Promise<boolean> {
|
|
68
154
|
// Read JSONL file
|
|
69
155
|
const data = await JsonlReader.read(config.jsonlPath);
|
|
70
156
|
|
|
71
|
-
if (data.length === 0) {
|
|
72
|
-
console.warn(`Warning: Table ${tableName} has no data`);
|
|
73
|
-
return;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
157
|
// Load validation schema if provided or try to auto-load
|
|
77
158
|
let validationSchema = config.validationSchema;
|
|
159
|
+
const schemaMetadata: {
|
|
160
|
+
primaryKey?: string;
|
|
161
|
+
foreignKeys?: BiDirectionalSchema['foreignKeys'];
|
|
162
|
+
indexes?: BiDirectionalSchema['indexes'];
|
|
163
|
+
} = {};
|
|
164
|
+
|
|
78
165
|
if (!validationSchema) {
|
|
79
166
|
try {
|
|
80
167
|
validationSchema = await SchemaLoader.loadSchema(config.jsonlPath);
|
|
81
|
-
} catch (
|
|
168
|
+
} catch (_error) {
|
|
82
169
|
// Schema file not found or failed to load - this is OK, table can still be used without validation
|
|
83
|
-
console.log(
|
|
84
|
-
`[LinesDB] No validation schema for table '${tableName}':`,
|
|
85
|
-
error instanceof Error ? error.message : String(error),
|
|
86
|
-
);
|
|
87
170
|
}
|
|
88
171
|
}
|
|
89
|
-
console.log(
|
|
90
|
-
`[LinesDB] Loaded validation schema for table '${tableName}':`,
|
|
91
|
-
validationSchema ? 'FOUND' : 'NOT FOUND',
|
|
92
|
-
);
|
|
93
|
-
if (validationSchema) {
|
|
94
|
-
console.log(`[LinesDB] Schema type:`, typeof validationSchema);
|
|
95
|
-
console.log(`[LinesDB] Schema has '~standard':`, '~standard' in validationSchema);
|
|
96
|
-
}
|
|
97
|
-
this.validationSchemas.set(tableName, validationSchema);
|
|
98
172
|
|
|
99
|
-
//
|
|
100
|
-
|
|
101
|
-
if (config.
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
173
|
+
// Load schema metadata (foreignKeys, primaryKey, indexes) from schema module
|
|
174
|
+
// SchemaLoader.loadSchema() only returns the validation schema object, not metadata
|
|
175
|
+
if (!config.validationSchema) {
|
|
176
|
+
// Only load if not already provided via config
|
|
177
|
+
try {
|
|
178
|
+
const { pathToFileURL } = await import('node:url');
|
|
179
|
+
const schemaPath = config.jsonlPath.replace('.jsonl', '.schema.ts');
|
|
180
|
+
const schemaUrl = pathToFileURL(schemaPath).href;
|
|
181
|
+
const schemaModule = await import(`${schemaUrl}?t=${Date.now()}`);
|
|
182
|
+
|
|
183
|
+
// Try to get metadata from exported 'schema' or directly from module
|
|
184
|
+
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
185
|
+
|
|
186
|
+
if (schemaExport?.primaryKey) {
|
|
187
|
+
schemaMetadata.primaryKey = schemaExport.primaryKey;
|
|
188
|
+
} else if (schemaModule.primaryKey) {
|
|
189
|
+
schemaMetadata.primaryKey = schemaModule.primaryKey;
|
|
190
|
+
}
|
|
108
191
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
// Add primary key constraint to columns
|
|
114
|
-
for (const pkColumn of biSchema.primaryKey) {
|
|
115
|
-
const col = schema.columns.find((c) => c.name === pkColumn);
|
|
116
|
-
if (col) {
|
|
117
|
-
col.primaryKey = true;
|
|
118
|
-
}
|
|
192
|
+
if (schemaExport?.foreignKeys) {
|
|
193
|
+
schemaMetadata.foreignKeys = schemaExport.foreignKeys;
|
|
194
|
+
} else if (schemaModule.foreignKeys) {
|
|
195
|
+
schemaMetadata.foreignKeys = schemaModule.foreignKeys;
|
|
119
196
|
}
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
197
|
+
|
|
198
|
+
if (schemaExport?.indexes) {
|
|
199
|
+
schemaMetadata.indexes = schemaExport.indexes;
|
|
200
|
+
} else if (schemaModule.indexes) {
|
|
201
|
+
schemaMetadata.indexes = schemaModule.indexes;
|
|
202
|
+
}
|
|
203
|
+
} catch (_error) {
|
|
204
|
+
// Schema file not found - this is OK
|
|
126
205
|
}
|
|
127
206
|
}
|
|
128
207
|
|
|
129
|
-
this.
|
|
130
|
-
|
|
131
|
-
// Create table
|
|
132
|
-
this.createTable(schema);
|
|
208
|
+
this.validationSchemas.set(tableName, validationSchema);
|
|
133
209
|
|
|
134
|
-
// Validate data
|
|
210
|
+
// Validate data first and collect validated (transformed) data
|
|
135
211
|
const validationErrors: Array<{
|
|
136
212
|
rowIndex: number;
|
|
137
213
|
rowData: JsonObject;
|
|
138
214
|
error: ValidationError;
|
|
139
215
|
}> = [];
|
|
216
|
+
const validatedData: JsonObject[] = [];
|
|
140
217
|
|
|
141
218
|
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
142
219
|
const row = data[rowIndex];
|
|
143
220
|
try {
|
|
144
|
-
this.
|
|
221
|
+
const validatedRow = this.validateAndTransform(tableName, row);
|
|
222
|
+
validatedData.push(validatedRow);
|
|
145
223
|
} catch (error) {
|
|
146
224
|
if (error instanceof Error && error.name === 'ValidationError') {
|
|
147
225
|
validationErrors.push({
|
|
@@ -167,15 +245,65 @@ export class LinesDB<Tables extends TableDefs> {
|
|
|
167
245
|
throw enhancedError;
|
|
168
246
|
}
|
|
169
247
|
|
|
170
|
-
|
|
248
|
+
// Determine schema - infer from validated data if auto-inference is enabled
|
|
249
|
+
let schema: TableSchema;
|
|
250
|
+
if (config.schema) {
|
|
251
|
+
schema = config.schema;
|
|
252
|
+
} else if (config.autoInferSchema !== false) {
|
|
253
|
+
if (validatedData.length === 0) {
|
|
254
|
+
return false;
|
|
255
|
+
}
|
|
256
|
+
// Infer schema from validated data (which may have additional fields added by validation)
|
|
257
|
+
schema = JsonlReader.inferSchema(tableName, validatedData);
|
|
258
|
+
} else {
|
|
259
|
+
throw new Error(`No schema provided for table ${tableName} and autoInferSchema is disabled`);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Enhance schema with constraints from validation schema and schema metadata
|
|
263
|
+
// Priority: config.validationSchema (as BiDirectionalSchema) > schemaMetadata
|
|
264
|
+
const biSchema = validationSchema as BiDirectionalSchema;
|
|
265
|
+
const primaryKey = biSchema?.primaryKey || schemaMetadata.primaryKey;
|
|
266
|
+
const foreignKeys = biSchema?.foreignKeys || schemaMetadata.foreignKeys;
|
|
267
|
+
const indexes = biSchema?.indexes || schemaMetadata.indexes;
|
|
268
|
+
|
|
269
|
+
if (primaryKey && !schema.columns.some((col) => col.primaryKey)) {
|
|
270
|
+
// Add primary key constraint to column
|
|
271
|
+
const col = schema.columns.find((c) => c.name === primaryKey);
|
|
272
|
+
if (col) {
|
|
273
|
+
col.primaryKey = true;
|
|
274
|
+
}
|
|
275
|
+
} else if (!primaryKey && !schema.columns.some((col) => col.primaryKey)) {
|
|
276
|
+
// If no primary key is defined, use 'id' column as primary key if it exists
|
|
277
|
+
// This matches the behavior of JsonlReader.inferSchema()
|
|
278
|
+
const idColumn = schema.columns.find((c) => c.name === 'id');
|
|
279
|
+
if (idColumn) {
|
|
280
|
+
idColumn.primaryKey = true;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
if (foreignKeys) {
|
|
284
|
+
schema.foreignKeys = foreignKeys;
|
|
285
|
+
}
|
|
286
|
+
if (indexes) {
|
|
287
|
+
schema.indexes = indexes;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
this.schemas.set(tableName, schema);
|
|
291
|
+
|
|
292
|
+
// Create table
|
|
293
|
+
this.createTable(schema);
|
|
294
|
+
|
|
295
|
+
// Insert validated data
|
|
296
|
+
this.insertData(tableName, schema, validatedData);
|
|
297
|
+
|
|
298
|
+
return true;
|
|
171
299
|
}
|
|
172
300
|
|
|
173
301
|
/**
|
|
174
302
|
* Create table in SQLite with constraints and indexes
|
|
175
303
|
*/
|
|
176
304
|
private createTable(schema: TableSchema): void {
|
|
177
|
-
//
|
|
178
|
-
|
|
305
|
+
// Note: Foreign key constraints are enabled at database connection level (see sqlite-adapter.ts)
|
|
306
|
+
// No need to enable them here for each table
|
|
179
307
|
|
|
180
308
|
// Quote table name to handle special characters
|
|
181
309
|
const quotedTableName = this.quoteTableName(schema.name);
|
|
@@ -195,10 +323,8 @@ export class LinesDB<Tables extends TableDefs> {
|
|
|
195
323
|
if (schema.foreignKeys && schema.foreignKeys.length > 0) {
|
|
196
324
|
for (const fk of schema.foreignKeys) {
|
|
197
325
|
const fkParts = [
|
|
198
|
-
`FOREIGN KEY (${
|
|
199
|
-
`REFERENCES ${this.quoteTableName(fk.references.table)}(${fk.references.
|
|
200
|
-
.map((col) => this.quoteIdentifier(col))
|
|
201
|
-
.join(', ')})`,
|
|
326
|
+
`FOREIGN KEY (${this.quoteIdentifier(fk.column)})`,
|
|
327
|
+
`REFERENCES ${this.quoteTableName(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`,
|
|
202
328
|
];
|
|
203
329
|
if (fk.onDelete) {
|
|
204
330
|
fkParts.push(`ON DELETE ${fk.onDelete}`);
|
|
@@ -402,19 +528,14 @@ export class LinesDB<Tables extends TableDefs> {
|
|
|
402
528
|
}
|
|
403
529
|
|
|
404
530
|
/**
|
|
405
|
-
* Validate data using StandardSchema
|
|
531
|
+
* Validate data using StandardSchema and return the transformed value
|
|
406
532
|
* Note: Only synchronous validation is supported
|
|
407
533
|
*/
|
|
408
|
-
private
|
|
534
|
+
private validateAndTransform(tableName: string, data: unknown): JsonObject {
|
|
409
535
|
const schema = this.validationSchemas.get(tableName);
|
|
410
|
-
console.log(`[LinesDB] validateData called for table '${tableName}', schema exists:`, !!schema);
|
|
411
536
|
if (!schema) {
|
|
412
|
-
|
|
413
|
-
`[LinesDB] No validation schema found for table '${tableName}', skipping validation`,
|
|
414
|
-
);
|
|
415
|
-
return;
|
|
537
|
+
return data as JsonObject;
|
|
416
538
|
}
|
|
417
|
-
console.log(`[LinesDB] Validating data:`, JSON.stringify(data));
|
|
418
539
|
|
|
419
540
|
const result = schema['~standard'].validate(data);
|
|
420
541
|
|
|
@@ -452,6 +573,27 @@ export class LinesDB<Tables extends TableDefs> {
|
|
|
452
573
|
error.issues = result.issues;
|
|
453
574
|
throw error;
|
|
454
575
|
}
|
|
576
|
+
|
|
577
|
+
// Return the transformed value from validation
|
|
578
|
+
// When there are no issues, result.value should be present
|
|
579
|
+
const transformedValue = ('value' in result ? result.value : data) as JsonObject;
|
|
580
|
+
|
|
581
|
+
// Convert undefined values to null for JSON compatibility
|
|
582
|
+
const normalizedValue: JsonObject = {};
|
|
583
|
+
for (const [key, value] of Object.entries(transformedValue)) {
|
|
584
|
+
normalizedValue[key] = value === undefined ? null : value;
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
return normalizedValue;
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
/**
|
|
591
|
+
* Validate data using StandardSchema (without returning transformed value)
|
|
592
|
+
* Note: Only synchronous validation is supported
|
|
593
|
+
*/
|
|
594
|
+
private validateData(tableName: string, data: unknown): void {
|
|
595
|
+
// Use validateAndTransform but discard the result
|
|
596
|
+
this.validateAndTransform(tableName, data);
|
|
455
597
|
}
|
|
456
598
|
|
|
457
599
|
/**
|
|
@@ -999,7 +1141,9 @@ export class LinesDB<Tables extends TableDefs> {
|
|
|
999
1141
|
|
|
1000
1142
|
return result;
|
|
1001
1143
|
} catch (error) {
|
|
1002
|
-
this.
|
|
1144
|
+
if (this.inTransaction) {
|
|
1145
|
+
this.db.exec('ROLLBACK');
|
|
1146
|
+
}
|
|
1003
1147
|
this.inTransaction = false;
|
|
1004
1148
|
throw error;
|
|
1005
1149
|
}
|
package/src/jsonl-reader.ts
CHANGED
|
@@ -123,7 +123,7 @@ export class JsonlReader {
|
|
|
123
123
|
}
|
|
124
124
|
|
|
125
125
|
private static inferType(value: unknown): string {
|
|
126
|
-
if (value === null) return 'NULL';
|
|
126
|
+
if (value === null || value === undefined) return 'NULL';
|
|
127
127
|
if (typeof value === 'number') {
|
|
128
128
|
return Number.isInteger(value) ? 'INTEGER' : 'REAL';
|
|
129
129
|
}
|
package/src/schema.ts
CHANGED
|
@@ -5,9 +5,9 @@ import type { StandardSchema, Table, ForeignKeyDefinition, IndexDefinition } fro
|
|
|
5
5
|
*/
|
|
6
6
|
export interface SchemaOptions {
|
|
7
7
|
/**
|
|
8
|
-
* Primary key
|
|
8
|
+
* Primary key column
|
|
9
9
|
*/
|
|
10
|
-
primaryKey?: string
|
|
10
|
+
primaryKey?: string;
|
|
11
11
|
|
|
12
12
|
/**
|
|
13
13
|
* Foreign key constraints
|
|
@@ -39,9 +39,9 @@ export interface BiDirectionalSchema<Input extends Table = Table, Output extends
|
|
|
39
39
|
backward?: (output: Output) => Input;
|
|
40
40
|
|
|
41
41
|
/**
|
|
42
|
-
* Primary key
|
|
42
|
+
* Primary key column
|
|
43
43
|
*/
|
|
44
|
-
primaryKey?: string
|
|
44
|
+
primaryKey?: string;
|
|
45
45
|
|
|
46
46
|
/**
|
|
47
47
|
* Foreign key constraints
|
|
@@ -79,9 +79,9 @@ export interface BiDirectionalSchema<Input extends Table = Table, Output extends
|
|
|
79
79
|
* const schema = defineSchema(
|
|
80
80
|
* v.object({ id: v.number(), customerId: v.number() }),
|
|
81
81
|
* {
|
|
82
|
-
* primaryKey:
|
|
82
|
+
* primaryKey: 'id',
|
|
83
83
|
* foreignKeys: [
|
|
84
|
-
* {
|
|
84
|
+
* { column: 'customerId', references: { table: 'users', column: 'id' } }
|
|
85
85
|
* ]
|
|
86
86
|
* }
|
|
87
87
|
* );
|
package/src/sqlite-adapter.ts
CHANGED
|
@@ -39,6 +39,10 @@ function createNodeDatabase(path: string): SQLiteDatabase {
|
|
|
39
39
|
const { DatabaseSync } = require('node:sqlite');
|
|
40
40
|
const db = new DatabaseSync(path);
|
|
41
41
|
|
|
42
|
+
// CRITICAL: Enable foreign key constraints
|
|
43
|
+
// SQLite disables foreign keys by default, which is a major data integrity issue
|
|
44
|
+
db.exec('PRAGMA foreign_keys = ON');
|
|
45
|
+
|
|
42
46
|
return {
|
|
43
47
|
prepare(sql: string): SQLiteStatement {
|
|
44
48
|
const stmt = db.prepare(sql);
|
package/src/types.ts
CHANGED
|
@@ -18,10 +18,10 @@ export type InferInput<T> = T extends StandardSchemaV1<infer I, unknown> ? I : n
|
|
|
18
18
|
export type InferOutput<T> = T extends StandardSchemaV1<unknown, infer O> ? O : never;
|
|
19
19
|
|
|
20
20
|
export interface ForeignKeyDefinition {
|
|
21
|
-
|
|
21
|
+
column: string;
|
|
22
22
|
references: {
|
|
23
23
|
table: string;
|
|
24
|
-
|
|
24
|
+
column: string;
|
|
25
25
|
};
|
|
26
26
|
onDelete?: 'CASCADE' | 'SET NULL' | 'RESTRICT' | 'NO ACTION';
|
|
27
27
|
onUpdate?: 'CASCADE' | 'SET NULL' | 'RESTRICT' | 'NO ACTION';
|
package/src/validator.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { readdir, stat } from 'node:fs/promises';
|
|
2
|
-
import { join, basename } from 'node:path';
|
|
2
|
+
import { join, basename, dirname } from 'node:path';
|
|
3
3
|
import { JsonlReader } from './jsonl-reader.js';
|
|
4
4
|
import { SchemaLoader } from './schema-loader.js';
|
|
5
|
-
import
|
|
6
|
-
import type {
|
|
5
|
+
import { LinesDB } from './database.js';
|
|
6
|
+
import type { StandardSchemaIssue } from './types.js';
|
|
7
7
|
|
|
8
8
|
export interface ValidationResult {
|
|
9
9
|
valid: boolean;
|
|
@@ -91,10 +91,10 @@ export class Validator {
|
|
|
91
91
|
allWarnings.push(...result.warnings);
|
|
92
92
|
}
|
|
93
93
|
|
|
94
|
-
// Then, validate
|
|
95
|
-
if (filesWithSchema.length > 0) {
|
|
96
|
-
const
|
|
97
|
-
allErrors.push(...
|
|
94
|
+
// Then, validate by actually loading into database
|
|
95
|
+
if (filesWithSchema.length > 0 && allErrors.length === 0) {
|
|
96
|
+
const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
|
|
97
|
+
allErrors.push(...dbErrors);
|
|
98
98
|
}
|
|
99
99
|
|
|
100
100
|
return {
|
|
@@ -105,80 +105,71 @@ export class Validator {
|
|
|
105
105
|
}
|
|
106
106
|
|
|
107
107
|
/**
|
|
108
|
-
* Validate
|
|
108
|
+
* Validate by loading data into an actual database
|
|
109
|
+
* This catches constraint violations (unique, primary key, foreign key, etc.)
|
|
109
110
|
*/
|
|
110
|
-
private async
|
|
111
|
+
private async validateWithDatabase(
|
|
111
112
|
dirPath: string,
|
|
112
113
|
jsonlFiles: string[],
|
|
113
114
|
): Promise<ValidationErrorDetail[]> {
|
|
114
115
|
const errors: ValidationErrorDetail[] = [];
|
|
115
116
|
|
|
116
|
-
//
|
|
117
|
-
const
|
|
118
|
-
const
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
tableData.set(tableName, data);
|
|
126
|
-
tableSchemas.set(tableName, schema as BiDirectionalSchema);
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
// Check foreign keys for each table
|
|
130
|
-
for (const file of jsonlFiles) {
|
|
131
|
-
const tableName = basename(file, '.jsonl');
|
|
132
|
-
const schema = tableSchemas.get(tableName);
|
|
133
|
-
const data = tableData.get(tableName);
|
|
134
|
-
|
|
135
|
-
if (!schema || !data || !schema.foreignKeys) {
|
|
136
|
-
continue;
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
// Check each foreign key constraint
|
|
140
|
-
for (const fk of schema.foreignKeys) {
|
|
141
|
-
const referencedTable = fk.references.table;
|
|
142
|
-
const referencedData = tableData.get(referencedTable);
|
|
143
|
-
|
|
144
|
-
if (!referencedData) {
|
|
145
|
-
// Referenced table not found - skip validation
|
|
146
|
-
continue;
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
// Build index of referenced values for fast lookup
|
|
150
|
-
const referencedValues = new Set<string>();
|
|
151
|
-
for (const refRow of referencedData) {
|
|
152
|
-
// Build composite key from referenced columns
|
|
153
|
-
const keyValues = fk.references.columns.map((col) => refRow[col]);
|
|
154
|
-
const compositeKey = JSON.stringify(keyValues);
|
|
155
|
-
referencedValues.add(compositeKey);
|
|
156
|
-
}
|
|
117
|
+
// Capture console.warn messages
|
|
118
|
+
const warnMessages: string[] = [];
|
|
119
|
+
const originalWarn = console.warn;
|
|
120
|
+
console.warn = (...args: unknown[]) => {
|
|
121
|
+
const message = args.map((arg) => String(arg)).join(' ');
|
|
122
|
+
warnMessages.push(message);
|
|
123
|
+
// Still output to console for debugging
|
|
124
|
+
originalWarn(...args);
|
|
125
|
+
};
|
|
157
126
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
127
|
+
try {
|
|
128
|
+
// Try to initialize database with the data directory
|
|
129
|
+
const db = LinesDB.create({ dataDir: dirPath });
|
|
130
|
+
await db.initialize();
|
|
131
|
+
await db.close();
|
|
132
|
+
|
|
133
|
+
// Check if there were any loading errors
|
|
134
|
+
for (const message of warnMessages) {
|
|
135
|
+
if (message.includes('Failed to load table')) {
|
|
136
|
+
// Extract table name from message
|
|
137
|
+
const tableNameMatch = message.match(/Failed to load table '([^']+)'/);
|
|
138
|
+
const tableName = tableNameMatch ? tableNameMatch[1] : 'unknown';
|
|
139
|
+
|
|
140
|
+
const file = jsonlFiles.find((f) => basename(f, '.jsonl') === tableName);
|
|
141
|
+
|
|
142
|
+
errors.push({
|
|
143
|
+
file: file || `${dirPath}/${tableName}.jsonl`,
|
|
144
|
+
tableName,
|
|
145
|
+
rowIndex: 0,
|
|
146
|
+
issues: [
|
|
147
|
+
{
|
|
148
|
+
message: message.replace(/^Warning:\s*/, ''),
|
|
149
|
+
path: [],
|
|
177
150
|
},
|
|
178
|
-
|
|
179
|
-
|
|
151
|
+
],
|
|
152
|
+
type: 'schema',
|
|
153
|
+
});
|
|
180
154
|
}
|
|
181
155
|
}
|
|
156
|
+
} catch (error) {
|
|
157
|
+
// If initialization itself fails, report it
|
|
158
|
+
errors.push({
|
|
159
|
+
file: dirPath,
|
|
160
|
+
tableName: 'database',
|
|
161
|
+
rowIndex: 0,
|
|
162
|
+
issues: [
|
|
163
|
+
{
|
|
164
|
+
message: `Database initialization failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
165
|
+
path: [],
|
|
166
|
+
},
|
|
167
|
+
],
|
|
168
|
+
type: 'schema',
|
|
169
|
+
});
|
|
170
|
+
} finally {
|
|
171
|
+
// Restore console.warn
|
|
172
|
+
console.warn = originalWarn;
|
|
182
173
|
}
|
|
183
174
|
|
|
184
175
|
return errors;
|
|
@@ -196,7 +187,7 @@ export class Validator {
|
|
|
196
187
|
|
|
197
188
|
const errors: ValidationErrorDetail[] = [];
|
|
198
189
|
|
|
199
|
-
// Validate each row
|
|
190
|
+
// Validate each row with schema
|
|
200
191
|
for (let i = 0; i < data.length; i++) {
|
|
201
192
|
const row = data[i];
|
|
202
193
|
const result = schema['~standard'].validate(row);
|
|
@@ -217,6 +208,13 @@ export class Validator {
|
|
|
217
208
|
}
|
|
218
209
|
}
|
|
219
210
|
|
|
211
|
+
// If schema validation passed, validate with database
|
|
212
|
+
if (errors.length === 0) {
|
|
213
|
+
const dirPath = dirname(filePath);
|
|
214
|
+
const dbErrors = await this.validateWithDatabase(dirPath, [filePath]);
|
|
215
|
+
errors.push(...dbErrors);
|
|
216
|
+
}
|
|
217
|
+
|
|
220
218
|
return {
|
|
221
219
|
valid: errors.length === 0,
|
|
222
220
|
errors,
|