@toiroakr/lines-db 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/bin/cli.js +378 -415
- package/dist/index.cjs +195 -327
- package/dist/index.d.cts +64 -84
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.ts +64 -84
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +197 -328
- package/dist/index.js.map +1 -1
- package/package.json +2 -1
- package/src/cli.ts +226 -126
- package/src/database.ts +296 -52
- package/src/index.ts +2 -2
- package/src/jsonl-migration.ts +24 -56
- package/src/schema.ts +37 -32
- package/src/types.ts +21 -0
- package/src/validator.test.ts +0 -507
- package/src/validator.ts +0 -441
package/src/validator.ts
DELETED
|
@@ -1,441 +0,0 @@
|
|
|
1
|
-
import { readdir, stat } from 'node:fs/promises';
|
|
2
|
-
import { join, basename, dirname } from 'node:path';
|
|
3
|
-
import { JsonlReader } from './jsonl-reader.js';
|
|
4
|
-
import { SchemaLoader } from './schema-loader.js';
|
|
5
|
-
import { LinesDB } from './database.js';
|
|
6
|
-
import type { StandardSchemaIssue } from './types.js';
|
|
7
|
-
|
|
8
|
-
export interface ValidationResult {
|
|
9
|
-
valid: boolean;
|
|
10
|
-
errors: ValidationErrorDetail[];
|
|
11
|
-
warnings: string[];
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export interface ValidationErrorDetail {
|
|
15
|
-
file: string;
|
|
16
|
-
tableName: string;
|
|
17
|
-
rowIndex: number;
|
|
18
|
-
issues: ReadonlyArray<StandardSchemaIssue>;
|
|
19
|
-
type?: 'schema' | 'foreignKey';
|
|
20
|
-
foreignKeyError?: {
|
|
21
|
-
column: string;
|
|
22
|
-
value: unknown;
|
|
23
|
-
referencedTable: string;
|
|
24
|
-
referencedColumn: string;
|
|
25
|
-
};
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export interface ValidatorOptions {
|
|
29
|
-
path: string; // File or directory path
|
|
30
|
-
projectRoot?: string;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
export class Validator {
|
|
34
|
-
private path: string;
|
|
35
|
-
private projectRoot: string;
|
|
36
|
-
|
|
37
|
-
constructor(options: ValidatorOptions) {
|
|
38
|
-
this.path = options.path;
|
|
39
|
-
this.projectRoot = options.projectRoot || process.cwd();
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
/**
|
|
43
|
-
* Validate JSONL file(s)
|
|
44
|
-
*/
|
|
45
|
-
async validate(): Promise<ValidationResult> {
|
|
46
|
-
// Use absolute path if provided, otherwise resolve relative to projectRoot
|
|
47
|
-
const fullPath = this.path.startsWith('/') ? this.path : join(this.projectRoot, this.path);
|
|
48
|
-
const stats = await stat(fullPath);
|
|
49
|
-
|
|
50
|
-
if (stats.isDirectory()) {
|
|
51
|
-
return this.validateDirectory(fullPath);
|
|
52
|
-
} else if (stats.isFile() && fullPath.endsWith('.jsonl')) {
|
|
53
|
-
return this.validateFile(fullPath);
|
|
54
|
-
} else {
|
|
55
|
-
throw new Error(`Invalid path: ${this.path}. Must be a directory or .jsonl file.`);
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* Validate all JSONL files in a directory
|
|
61
|
-
*/
|
|
62
|
-
private async validateDirectory(dirPath: string): Promise<ValidationResult> {
|
|
63
|
-
const entries = await readdir(dirPath, { withFileTypes: true });
|
|
64
|
-
const jsonlFiles = entries
|
|
65
|
-
.filter((entry) => entry.isFile() && entry.name.endsWith('.jsonl'))
|
|
66
|
-
.map((entry) => join(dirPath, entry.name));
|
|
67
|
-
|
|
68
|
-
if (jsonlFiles.length === 0) {
|
|
69
|
-
throw new Error(`No JSONL files found in directory: ${dirPath}`);
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
const allErrors: ValidationErrorDetail[] = [];
|
|
73
|
-
const allWarnings: string[] = [];
|
|
74
|
-
const filesWithSchema: string[] = [];
|
|
75
|
-
|
|
76
|
-
// Filter files with schema and collect warnings for files without schema
|
|
77
|
-
for (const file of jsonlFiles) {
|
|
78
|
-
const hasSchema = await SchemaLoader.hasSchema(file);
|
|
79
|
-
if (hasSchema) {
|
|
80
|
-
filesWithSchema.push(file);
|
|
81
|
-
} else {
|
|
82
|
-
const tableName = basename(file, '.jsonl');
|
|
83
|
-
allWarnings.push(`Skipping validation for '${tableName}': schema file not found`);
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
// Validate schema for each file with schema
|
|
88
|
-
for (const file of filesWithSchema) {
|
|
89
|
-
const result = await this.validateFile(file);
|
|
90
|
-
allErrors.push(...result.errors);
|
|
91
|
-
allWarnings.push(...result.warnings);
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// Validate by loading into database with detailed error tracking
|
|
95
|
-
if (filesWithSchema.length > 0 && allErrors.length === 0) {
|
|
96
|
-
const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
|
|
97
|
-
allErrors.push(...dbErrors);
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
return {
|
|
101
|
-
valid: allErrors.length === 0,
|
|
102
|
-
errors: allErrors,
|
|
103
|
-
warnings: allWarnings,
|
|
104
|
-
};
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
/**
|
|
108
|
-
* Validate by loading data into database one row at a time
|
|
109
|
-
* This catches constraint violations and extracts detailed error information
|
|
110
|
-
*/
|
|
111
|
-
private async validateWithDatabase(
|
|
112
|
-
dirPath: string,
|
|
113
|
-
jsonlFiles: string[],
|
|
114
|
-
): Promise<ValidationErrorDetail[]> {
|
|
115
|
-
const errors: ValidationErrorDetail[] = [];
|
|
116
|
-
|
|
117
|
-
try {
|
|
118
|
-
const db = LinesDB.create({ dataDir: ':memory:' });
|
|
119
|
-
|
|
120
|
-
// Load all tables one by one, checking each row
|
|
121
|
-
for (const file of jsonlFiles) {
|
|
122
|
-
const tableName = basename(file, '.jsonl');
|
|
123
|
-
const data = await JsonlReader.read(file);
|
|
124
|
-
|
|
125
|
-
// Load schema and metadata
|
|
126
|
-
let schema: any;
|
|
127
|
-
let foreignKeys: any[] = [];
|
|
128
|
-
let indexes: any[] = [];
|
|
129
|
-
let primaryKey: string | undefined;
|
|
130
|
-
try {
|
|
131
|
-
schema = await SchemaLoader.loadSchema(file);
|
|
132
|
-
const { pathToFileURL } = await import('node:url');
|
|
133
|
-
const schemaPath = file.replace('.jsonl', '.schema.ts');
|
|
134
|
-
const schemaUrl = pathToFileURL(schemaPath).href;
|
|
135
|
-
const schemaModule = await import(`${schemaUrl}?t=${Date.now()}`);
|
|
136
|
-
const schemaExport = schemaModule.schema || schemaModule.default;
|
|
137
|
-
if (schemaExport?.foreignKeys) {
|
|
138
|
-
foreignKeys = schemaExport.foreignKeys;
|
|
139
|
-
}
|
|
140
|
-
if (schemaExport?.indexes) {
|
|
141
|
-
indexes = schemaExport.indexes;
|
|
142
|
-
}
|
|
143
|
-
if (schemaExport?.primaryKey) {
|
|
144
|
-
primaryKey = schemaExport.primaryKey;
|
|
145
|
-
}
|
|
146
|
-
} catch (_error) {
|
|
147
|
-
// Schema not found or failed to load
|
|
148
|
-
continue;
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
// Create table schema
|
|
152
|
-
try {
|
|
153
|
-
const tableSchema = this.createTableSchema(
|
|
154
|
-
tableName,
|
|
155
|
-
data,
|
|
156
|
-
schema,
|
|
157
|
-
foreignKeys,
|
|
158
|
-
indexes,
|
|
159
|
-
primaryKey,
|
|
160
|
-
);
|
|
161
|
-
|
|
162
|
-
// Create the table in the database
|
|
163
|
-
this.createTableInDb(db, tableSchema);
|
|
164
|
-
|
|
165
|
-
// Insert rows one by one to catch constraint violations
|
|
166
|
-
for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
|
|
167
|
-
const row = data[rowIndex];
|
|
168
|
-
try {
|
|
169
|
-
this.insertRowIntoDb(db, tableName, tableSchema, row);
|
|
170
|
-
} catch (error) {
|
|
171
|
-
// Constraint violation occurred
|
|
172
|
-
const constraintError = this.analyzeConstraintError(
|
|
173
|
-
error,
|
|
174
|
-
file,
|
|
175
|
-
tableName,
|
|
176
|
-
rowIndex,
|
|
177
|
-
row,
|
|
178
|
-
foreignKeys,
|
|
179
|
-
db,
|
|
180
|
-
);
|
|
181
|
-
if (constraintError) {
|
|
182
|
-
errors.push(constraintError);
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
} catch (_error) {
|
|
187
|
-
// Skip this table and continue if table creation fails
|
|
188
|
-
continue;
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
await db.close();
|
|
193
|
-
} catch (error) {
|
|
194
|
-
// Database initialization failed
|
|
195
|
-
errors.push({
|
|
196
|
-
file: dirPath,
|
|
197
|
-
tableName: 'database',
|
|
198
|
-
rowIndex: 0,
|
|
199
|
-
issues: [
|
|
200
|
-
{
|
|
201
|
-
message: `Database initialization failed: ${error instanceof Error ? error.message : String(error)}`,
|
|
202
|
-
path: [],
|
|
203
|
-
},
|
|
204
|
-
],
|
|
205
|
-
type: 'schema',
|
|
206
|
-
});
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
return errors;
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
/**
|
|
213
|
-
* Create table schema from data and validation schema
|
|
214
|
-
*/
|
|
215
|
-
private createTableSchema(
|
|
216
|
-
tableName: string,
|
|
217
|
-
data: any[],
|
|
218
|
-
validationSchema: any,
|
|
219
|
-
foreignKeys: any[],
|
|
220
|
-
indexes: any[],
|
|
221
|
-
primaryKey?: string,
|
|
222
|
-
): any {
|
|
223
|
-
if (data.length === 0) {
|
|
224
|
-
throw new Error(`No data found in ${tableName}`);
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
// Infer schema from data
|
|
228
|
-
const schema = JsonlReader.inferSchema(tableName, data);
|
|
229
|
-
|
|
230
|
-
// Set primary key if specified
|
|
231
|
-
if (primaryKey) {
|
|
232
|
-
const pkColumn = schema.columns.find((col: any) => col.name === primaryKey);
|
|
233
|
-
if (pkColumn) {
|
|
234
|
-
pkColumn.primaryKey = true;
|
|
235
|
-
}
|
|
236
|
-
} else if (!schema.columns.some((col: any) => col.primaryKey)) {
|
|
237
|
-
// If no primary key is defined, use 'id' column as primary key if it exists
|
|
238
|
-
// This matches the behavior of database.ts
|
|
239
|
-
const idColumn = schema.columns.find((c: any) => c.name === 'id');
|
|
240
|
-
if (idColumn) {
|
|
241
|
-
idColumn.primaryKey = true;
|
|
242
|
-
}
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
// Add foreign keys
|
|
246
|
-
if (foreignKeys && foreignKeys.length > 0) {
|
|
247
|
-
schema.foreignKeys = foreignKeys;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
// Add indexes
|
|
251
|
-
if (indexes && indexes.length > 0) {
|
|
252
|
-
schema.indexes = indexes;
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
return schema;
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
/**
|
|
259
|
-
* Create table in database
|
|
260
|
-
*/
|
|
261
|
-
private createTableInDb(db: LinesDB<any>, schema: any): void {
|
|
262
|
-
const columns = schema.columns.map((col: any) => {
|
|
263
|
-
let colDef = `${this.quoteIdentifier(col.name)} ${col.type.toUpperCase()}`;
|
|
264
|
-
if (col.primaryKey) {
|
|
265
|
-
colDef += ' PRIMARY KEY';
|
|
266
|
-
}
|
|
267
|
-
return colDef;
|
|
268
|
-
});
|
|
269
|
-
|
|
270
|
-
// Add foreign key constraints
|
|
271
|
-
if (schema.foreignKeys && schema.foreignKeys.length > 0) {
|
|
272
|
-
for (const fk of schema.foreignKeys) {
|
|
273
|
-
columns.push(
|
|
274
|
-
`FOREIGN KEY (${this.quoteIdentifier(fk.column)}) REFERENCES ${this.quoteIdentifier(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`,
|
|
275
|
-
);
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
const sql = `CREATE TABLE IF NOT EXISTS ${this.quoteIdentifier(schema.name)} (${columns.join(', ')})`;
|
|
280
|
-
db.execute(sql);
|
|
281
|
-
|
|
282
|
-
// Create indexes
|
|
283
|
-
if (schema.indexes && schema.indexes.length > 0) {
|
|
284
|
-
for (const index of schema.indexes) {
|
|
285
|
-
const indexName = index.name || `idx_${schema.name}_${index.columns.join('_')}`;
|
|
286
|
-
const uniqueKeyword = index.unique ? 'UNIQUE' : '';
|
|
287
|
-
const indexColumns = index.columns
|
|
288
|
-
.map((col: string) => this.quoteIdentifier(col))
|
|
289
|
-
.join(', ');
|
|
290
|
-
const indexSql = `CREATE ${uniqueKeyword} INDEX IF NOT EXISTS ${this.quoteIdentifier(indexName)} ON ${this.quoteIdentifier(schema.name)} (${indexColumns})`;
|
|
291
|
-
db.execute(indexSql);
|
|
292
|
-
}
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
/**
|
|
297
|
-
* Insert a row into database
|
|
298
|
-
*/
|
|
299
|
-
private insertRowIntoDb(db: LinesDB<any>, tableName: string, schema: any, row: any): void {
|
|
300
|
-
const columnNames = schema.columns.map((col: any) => col.name);
|
|
301
|
-
const quotedColumns = columnNames.map((name: string) => this.quoteIdentifier(name));
|
|
302
|
-
const placeholders = columnNames.map(() => '?').join(', ');
|
|
303
|
-
const sql = `INSERT INTO ${this.quoteIdentifier(tableName)} (${quotedColumns.join(', ')}) VALUES (${placeholders})`;
|
|
304
|
-
|
|
305
|
-
const values = columnNames.map((col: string) => {
|
|
306
|
-
const value = row[col];
|
|
307
|
-
if (value === null || value === undefined) return null;
|
|
308
|
-
if (typeof value === 'object') return JSON.stringify(value);
|
|
309
|
-
if (typeof value === 'boolean') return value ? 1 : 0;
|
|
310
|
-
return value;
|
|
311
|
-
});
|
|
312
|
-
|
|
313
|
-
db.execute(sql, values);
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
/**
|
|
317
|
-
* Analyze constraint error and extract detailed information
|
|
318
|
-
*/
|
|
319
|
-
private analyzeConstraintError(
|
|
320
|
-
error: any,
|
|
321
|
-
file: string,
|
|
322
|
-
tableName: string,
|
|
323
|
-
rowIndex: number,
|
|
324
|
-
row: any,
|
|
325
|
-
foreignKeys: any[],
|
|
326
|
-
db: LinesDB<any>,
|
|
327
|
-
): ValidationErrorDetail | null {
|
|
328
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
329
|
-
|
|
330
|
-
// Foreign key constraint
|
|
331
|
-
if (errorMessage.includes('FOREIGN KEY constraint failed')) {
|
|
332
|
-
// Find which foreign key failed
|
|
333
|
-
for (const fk of foreignKeys) {
|
|
334
|
-
const fkValue = row[fk.column];
|
|
335
|
-
if (fkValue === null || fkValue === undefined) continue;
|
|
336
|
-
|
|
337
|
-
// Check if referenced value exists
|
|
338
|
-
try {
|
|
339
|
-
const result = db.query(
|
|
340
|
-
`SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`,
|
|
341
|
-
[fkValue],
|
|
342
|
-
);
|
|
343
|
-
if (result.length > 0 && (result[0] as any).count === 0) {
|
|
344
|
-
return {
|
|
345
|
-
file,
|
|
346
|
-
tableName,
|
|
347
|
-
rowIndex,
|
|
348
|
-
issues: [],
|
|
349
|
-
type: 'foreignKey',
|
|
350
|
-
foreignKeyError: {
|
|
351
|
-
column: fk.column,
|
|
352
|
-
value: fkValue,
|
|
353
|
-
referencedTable: fk.references.table,
|
|
354
|
-
referencedColumn: fk.references.column,
|
|
355
|
-
},
|
|
356
|
-
};
|
|
357
|
-
}
|
|
358
|
-
} catch (_) {
|
|
359
|
-
// Referenced table doesn't exist yet
|
|
360
|
-
}
|
|
361
|
-
}
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
// Other constraint errors (primary key, unique, etc.)
|
|
365
|
-
return {
|
|
366
|
-
file,
|
|
367
|
-
tableName,
|
|
368
|
-
rowIndex,
|
|
369
|
-
issues: [
|
|
370
|
-
{
|
|
371
|
-
message: errorMessage,
|
|
372
|
-
path: [],
|
|
373
|
-
},
|
|
374
|
-
],
|
|
375
|
-
type: 'schema',
|
|
376
|
-
};
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
/**
|
|
380
|
-
* Quote SQL identifier
|
|
381
|
-
*/
|
|
382
|
-
private quoteIdentifier(name: string): string {
|
|
383
|
-
return `"${name.replace(/"/g, '""')}"`;
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
/**
|
|
387
|
-
* Validate a single JSONL file
|
|
388
|
-
*/
|
|
389
|
-
private async validateFile(filePath: string): Promise<ValidationResult> {
|
|
390
|
-
const tableName = basename(filePath, '.jsonl');
|
|
391
|
-
const data = await JsonlReader.read(filePath);
|
|
392
|
-
|
|
393
|
-
// Try to load schema
|
|
394
|
-
const schema = await SchemaLoader.loadSchema(filePath);
|
|
395
|
-
|
|
396
|
-
const errors: ValidationErrorDetail[] = [];
|
|
397
|
-
|
|
398
|
-
// Validate each row with schema
|
|
399
|
-
for (let i = 0; i < data.length; i++) {
|
|
400
|
-
const row = data[i];
|
|
401
|
-
const result = schema['~standard'].validate(row);
|
|
402
|
-
|
|
403
|
-
// Only synchronous validation is supported
|
|
404
|
-
if (result instanceof Promise) {
|
|
405
|
-
throw new Error('Asynchronous validation is not supported.');
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
if (result.issues && result.issues.length > 0) {
|
|
409
|
-
errors.push({
|
|
410
|
-
file: filePath,
|
|
411
|
-
tableName,
|
|
412
|
-
rowIndex: i, // 0-indexed, will be converted to 1-indexed in formatter
|
|
413
|
-
issues: result.issues,
|
|
414
|
-
type: 'schema',
|
|
415
|
-
});
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
|
|
419
|
-
// If schema validation passed, validate database constraints
|
|
420
|
-
if (errors.length === 0) {
|
|
421
|
-
const dirPath = dirname(filePath);
|
|
422
|
-
|
|
423
|
-
// Get all JSONL files in the directory
|
|
424
|
-
const entries = await readdir(dirPath, { withFileTypes: true });
|
|
425
|
-
const allJsonlFiles = entries
|
|
426
|
-
.filter((entry) => entry.isFile() && entry.name.endsWith('.jsonl'))
|
|
427
|
-
.map((entry) => join(dirPath, entry.name));
|
|
428
|
-
|
|
429
|
-
// Validate database constraints (including foreign keys)
|
|
430
|
-
const dbErrors = await this.validateWithDatabase(dirPath, allJsonlFiles);
|
|
431
|
-
// Only include errors for the current file
|
|
432
|
-
errors.push(...dbErrors.filter((e) => e.file === filePath));
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
return {
|
|
436
|
-
valid: errors.length === 0,
|
|
437
|
-
errors,
|
|
438
|
-
warnings: [],
|
|
439
|
-
};
|
|
440
|
-
}
|
|
441
|
-
}
|