@toiroakr/lines-db 0.4.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/validator.ts DELETED
@@ -1,441 +0,0 @@
1
- import { readdir, stat } from 'node:fs/promises';
2
- import { join, basename, dirname } from 'node:path';
3
- import { JsonlReader } from './jsonl-reader.js';
4
- import { SchemaLoader } from './schema-loader.js';
5
- import { LinesDB } from './database.js';
6
- import type { StandardSchemaIssue } from './types.js';
7
-
8
- export interface ValidationResult {
9
- valid: boolean;
10
- errors: ValidationErrorDetail[];
11
- warnings: string[];
12
- }
13
-
14
- export interface ValidationErrorDetail {
15
- file: string;
16
- tableName: string;
17
- rowIndex: number;
18
- issues: ReadonlyArray<StandardSchemaIssue>;
19
- type?: 'schema' | 'foreignKey';
20
- foreignKeyError?: {
21
- column: string;
22
- value: unknown;
23
- referencedTable: string;
24
- referencedColumn: string;
25
- };
26
- }
27
-
28
- export interface ValidatorOptions {
29
- path: string; // File or directory path
30
- projectRoot?: string;
31
- }
32
-
33
- export class Validator {
34
- private path: string;
35
- private projectRoot: string;
36
-
37
- constructor(options: ValidatorOptions) {
38
- this.path = options.path;
39
- this.projectRoot = options.projectRoot || process.cwd();
40
- }
41
-
42
- /**
43
- * Validate JSONL file(s)
44
- */
45
- async validate(): Promise<ValidationResult> {
46
- // Use absolute path if provided, otherwise resolve relative to projectRoot
47
- const fullPath = this.path.startsWith('/') ? this.path : join(this.projectRoot, this.path);
48
- const stats = await stat(fullPath);
49
-
50
- if (stats.isDirectory()) {
51
- return this.validateDirectory(fullPath);
52
- } else if (stats.isFile() && fullPath.endsWith('.jsonl')) {
53
- return this.validateFile(fullPath);
54
- } else {
55
- throw new Error(`Invalid path: ${this.path}. Must be a directory or .jsonl file.`);
56
- }
57
- }
58
-
59
- /**
60
- * Validate all JSONL files in a directory
61
- */
62
- private async validateDirectory(dirPath: string): Promise<ValidationResult> {
63
- const entries = await readdir(dirPath, { withFileTypes: true });
64
- const jsonlFiles = entries
65
- .filter((entry) => entry.isFile() && entry.name.endsWith('.jsonl'))
66
- .map((entry) => join(dirPath, entry.name));
67
-
68
- if (jsonlFiles.length === 0) {
69
- throw new Error(`No JSONL files found in directory: ${dirPath}`);
70
- }
71
-
72
- const allErrors: ValidationErrorDetail[] = [];
73
- const allWarnings: string[] = [];
74
- const filesWithSchema: string[] = [];
75
-
76
- // Filter files with schema and collect warnings for files without schema
77
- for (const file of jsonlFiles) {
78
- const hasSchema = await SchemaLoader.hasSchema(file);
79
- if (hasSchema) {
80
- filesWithSchema.push(file);
81
- } else {
82
- const tableName = basename(file, '.jsonl');
83
- allWarnings.push(`Skipping validation for '${tableName}': schema file not found`);
84
- }
85
- }
86
-
87
- // Validate schema for each file with schema
88
- for (const file of filesWithSchema) {
89
- const result = await this.validateFile(file);
90
- allErrors.push(...result.errors);
91
- allWarnings.push(...result.warnings);
92
- }
93
-
94
- // Validate by loading into database with detailed error tracking
95
- if (filesWithSchema.length > 0 && allErrors.length === 0) {
96
- const dbErrors = await this.validateWithDatabase(dirPath, filesWithSchema);
97
- allErrors.push(...dbErrors);
98
- }
99
-
100
- return {
101
- valid: allErrors.length === 0,
102
- errors: allErrors,
103
- warnings: allWarnings,
104
- };
105
- }
106
-
107
- /**
108
- * Validate by loading data into database one row at a time
109
- * This catches constraint violations and extracts detailed error information
110
- */
111
- private async validateWithDatabase(
112
- dirPath: string,
113
- jsonlFiles: string[],
114
- ): Promise<ValidationErrorDetail[]> {
115
- const errors: ValidationErrorDetail[] = [];
116
-
117
- try {
118
- const db = LinesDB.create({ dataDir: ':memory:' });
119
-
120
- // Load all tables one by one, checking each row
121
- for (const file of jsonlFiles) {
122
- const tableName = basename(file, '.jsonl');
123
- const data = await JsonlReader.read(file);
124
-
125
- // Load schema and metadata
126
- let schema: any;
127
- let foreignKeys: any[] = [];
128
- let indexes: any[] = [];
129
- let primaryKey: string | undefined;
130
- try {
131
- schema = await SchemaLoader.loadSchema(file);
132
- const { pathToFileURL } = await import('node:url');
133
- const schemaPath = file.replace('.jsonl', '.schema.ts');
134
- const schemaUrl = pathToFileURL(schemaPath).href;
135
- const schemaModule = await import(`${schemaUrl}?t=${Date.now()}`);
136
- const schemaExport = schemaModule.schema || schemaModule.default;
137
- if (schemaExport?.foreignKeys) {
138
- foreignKeys = schemaExport.foreignKeys;
139
- }
140
- if (schemaExport?.indexes) {
141
- indexes = schemaExport.indexes;
142
- }
143
- if (schemaExport?.primaryKey) {
144
- primaryKey = schemaExport.primaryKey;
145
- }
146
- } catch (_error) {
147
- // Schema not found or failed to load
148
- continue;
149
- }
150
-
151
- // Create table schema
152
- try {
153
- const tableSchema = this.createTableSchema(
154
- tableName,
155
- data,
156
- schema,
157
- foreignKeys,
158
- indexes,
159
- primaryKey,
160
- );
161
-
162
- // Create the table in the database
163
- this.createTableInDb(db, tableSchema);
164
-
165
- // Insert rows one by one to catch constraint violations
166
- for (let rowIndex = 0; rowIndex < data.length; rowIndex++) {
167
- const row = data[rowIndex];
168
- try {
169
- this.insertRowIntoDb(db, tableName, tableSchema, row);
170
- } catch (error) {
171
- // Constraint violation occurred
172
- const constraintError = this.analyzeConstraintError(
173
- error,
174
- file,
175
- tableName,
176
- rowIndex,
177
- row,
178
- foreignKeys,
179
- db,
180
- );
181
- if (constraintError) {
182
- errors.push(constraintError);
183
- }
184
- }
185
- }
186
- } catch (_error) {
187
- // Skip this table and continue if table creation fails
188
- continue;
189
- }
190
- }
191
-
192
- await db.close();
193
- } catch (error) {
194
- // Database initialization failed
195
- errors.push({
196
- file: dirPath,
197
- tableName: 'database',
198
- rowIndex: 0,
199
- issues: [
200
- {
201
- message: `Database initialization failed: ${error instanceof Error ? error.message : String(error)}`,
202
- path: [],
203
- },
204
- ],
205
- type: 'schema',
206
- });
207
- }
208
-
209
- return errors;
210
- }
211
-
212
- /**
213
- * Create table schema from data and validation schema
214
- */
215
- private createTableSchema(
216
- tableName: string,
217
- data: any[],
218
- validationSchema: any,
219
- foreignKeys: any[],
220
- indexes: any[],
221
- primaryKey?: string,
222
- ): any {
223
- if (data.length === 0) {
224
- throw new Error(`No data found in ${tableName}`);
225
- }
226
-
227
- // Infer schema from data
228
- const schema = JsonlReader.inferSchema(tableName, data);
229
-
230
- // Set primary key if specified
231
- if (primaryKey) {
232
- const pkColumn = schema.columns.find((col: any) => col.name === primaryKey);
233
- if (pkColumn) {
234
- pkColumn.primaryKey = true;
235
- }
236
- } else if (!schema.columns.some((col: any) => col.primaryKey)) {
237
- // If no primary key is defined, use 'id' column as primary key if it exists
238
- // This matches the behavior of database.ts
239
- const idColumn = schema.columns.find((c: any) => c.name === 'id');
240
- if (idColumn) {
241
- idColumn.primaryKey = true;
242
- }
243
- }
244
-
245
- // Add foreign keys
246
- if (foreignKeys && foreignKeys.length > 0) {
247
- schema.foreignKeys = foreignKeys;
248
- }
249
-
250
- // Add indexes
251
- if (indexes && indexes.length > 0) {
252
- schema.indexes = indexes;
253
- }
254
-
255
- return schema;
256
- }
257
-
258
- /**
259
- * Create table in database
260
- */
261
- private createTableInDb(db: LinesDB<any>, schema: any): void {
262
- const columns = schema.columns.map((col: any) => {
263
- let colDef = `${this.quoteIdentifier(col.name)} ${col.type.toUpperCase()}`;
264
- if (col.primaryKey) {
265
- colDef += ' PRIMARY KEY';
266
- }
267
- return colDef;
268
- });
269
-
270
- // Add foreign key constraints
271
- if (schema.foreignKeys && schema.foreignKeys.length > 0) {
272
- for (const fk of schema.foreignKeys) {
273
- columns.push(
274
- `FOREIGN KEY (${this.quoteIdentifier(fk.column)}) REFERENCES ${this.quoteIdentifier(fk.references.table)}(${this.quoteIdentifier(fk.references.column)})`,
275
- );
276
- }
277
- }
278
-
279
- const sql = `CREATE TABLE IF NOT EXISTS ${this.quoteIdentifier(schema.name)} (${columns.join(', ')})`;
280
- db.execute(sql);
281
-
282
- // Create indexes
283
- if (schema.indexes && schema.indexes.length > 0) {
284
- for (const index of schema.indexes) {
285
- const indexName = index.name || `idx_${schema.name}_${index.columns.join('_')}`;
286
- const uniqueKeyword = index.unique ? 'UNIQUE' : '';
287
- const indexColumns = index.columns
288
- .map((col: string) => this.quoteIdentifier(col))
289
- .join(', ');
290
- const indexSql = `CREATE ${uniqueKeyword} INDEX IF NOT EXISTS ${this.quoteIdentifier(indexName)} ON ${this.quoteIdentifier(schema.name)} (${indexColumns})`;
291
- db.execute(indexSql);
292
- }
293
- }
294
- }
295
-
296
- /**
297
- * Insert a row into database
298
- */
299
- private insertRowIntoDb(db: LinesDB<any>, tableName: string, schema: any, row: any): void {
300
- const columnNames = schema.columns.map((col: any) => col.name);
301
- const quotedColumns = columnNames.map((name: string) => this.quoteIdentifier(name));
302
- const placeholders = columnNames.map(() => '?').join(', ');
303
- const sql = `INSERT INTO ${this.quoteIdentifier(tableName)} (${quotedColumns.join(', ')}) VALUES (${placeholders})`;
304
-
305
- const values = columnNames.map((col: string) => {
306
- const value = row[col];
307
- if (value === null || value === undefined) return null;
308
- if (typeof value === 'object') return JSON.stringify(value);
309
- if (typeof value === 'boolean') return value ? 1 : 0;
310
- return value;
311
- });
312
-
313
- db.execute(sql, values);
314
- }
315
-
316
- /**
317
- * Analyze constraint error and extract detailed information
318
- */
319
- private analyzeConstraintError(
320
- error: any,
321
- file: string,
322
- tableName: string,
323
- rowIndex: number,
324
- row: any,
325
- foreignKeys: any[],
326
- db: LinesDB<any>,
327
- ): ValidationErrorDetail | null {
328
- const errorMessage = error instanceof Error ? error.message : String(error);
329
-
330
- // Foreign key constraint
331
- if (errorMessage.includes('FOREIGN KEY constraint failed')) {
332
- // Find which foreign key failed
333
- for (const fk of foreignKeys) {
334
- const fkValue = row[fk.column];
335
- if (fkValue === null || fkValue === undefined) continue;
336
-
337
- // Check if referenced value exists
338
- try {
339
- const result = db.query(
340
- `SELECT COUNT(*) as count FROM ${this.quoteIdentifier(fk.references.table)} WHERE ${this.quoteIdentifier(fk.references.column)} = ?`,
341
- [fkValue],
342
- );
343
- if (result.length > 0 && (result[0] as any).count === 0) {
344
- return {
345
- file,
346
- tableName,
347
- rowIndex,
348
- issues: [],
349
- type: 'foreignKey',
350
- foreignKeyError: {
351
- column: fk.column,
352
- value: fkValue,
353
- referencedTable: fk.references.table,
354
- referencedColumn: fk.references.column,
355
- },
356
- };
357
- }
358
- } catch (_) {
359
- // Referenced table doesn't exist yet
360
- }
361
- }
362
- }
363
-
364
- // Other constraint errors (primary key, unique, etc.)
365
- return {
366
- file,
367
- tableName,
368
- rowIndex,
369
- issues: [
370
- {
371
- message: errorMessage,
372
- path: [],
373
- },
374
- ],
375
- type: 'schema',
376
- };
377
- }
378
-
379
- /**
380
- * Quote SQL identifier
381
- */
382
- private quoteIdentifier(name: string): string {
383
- return `"${name.replace(/"/g, '""')}"`;
384
- }
385
-
386
- /**
387
- * Validate a single JSONL file
388
- */
389
- private async validateFile(filePath: string): Promise<ValidationResult> {
390
- const tableName = basename(filePath, '.jsonl');
391
- const data = await JsonlReader.read(filePath);
392
-
393
- // Try to load schema
394
- const schema = await SchemaLoader.loadSchema(filePath);
395
-
396
- const errors: ValidationErrorDetail[] = [];
397
-
398
- // Validate each row with schema
399
- for (let i = 0; i < data.length; i++) {
400
- const row = data[i];
401
- const result = schema['~standard'].validate(row);
402
-
403
- // Only synchronous validation is supported
404
- if (result instanceof Promise) {
405
- throw new Error('Asynchronous validation is not supported.');
406
- }
407
-
408
- if (result.issues && result.issues.length > 0) {
409
- errors.push({
410
- file: filePath,
411
- tableName,
412
- rowIndex: i, // 0-indexed, will be converted to 1-indexed in formatter
413
- issues: result.issues,
414
- type: 'schema',
415
- });
416
- }
417
- }
418
-
419
- // If schema validation passed, validate database constraints
420
- if (errors.length === 0) {
421
- const dirPath = dirname(filePath);
422
-
423
- // Get all JSONL files in the directory
424
- const entries = await readdir(dirPath, { withFileTypes: true });
425
- const allJsonlFiles = entries
426
- .filter((entry) => entry.isFile() && entry.name.endsWith('.jsonl'))
427
- .map((entry) => join(dirPath, entry.name));
428
-
429
- // Validate database constraints (including foreign keys)
430
- const dbErrors = await this.validateWithDatabase(dirPath, allJsonlFiles);
431
- // Only include errors for the current file
432
- errors.push(...dbErrors.filter((e) => e.file === filePath));
433
- }
434
-
435
- return {
436
- valid: errors.length === 0,
437
- errors,
438
- warnings: [],
439
- };
440
- }
441
- }