jtcsv 2.2.7 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. package/README.md +31 -1
  2. package/bin/jtcsv.js +891 -821
  3. package/bin/jtcsv.ts +2534 -0
  4. package/csv-to-json.js +168 -145
  5. package/dist/jtcsv-core.cjs.js +1407 -0
  6. package/dist/jtcsv-core.cjs.js.map +1 -0
  7. package/dist/jtcsv-core.esm.js +1379 -0
  8. package/dist/jtcsv-core.esm.js.map +1 -0
  9. package/dist/jtcsv-core.umd.js +1413 -0
  10. package/dist/jtcsv-core.umd.js.map +1 -0
  11. package/dist/jtcsv-full.cjs.js +1912 -0
  12. package/dist/jtcsv-full.cjs.js.map +1 -0
  13. package/dist/jtcsv-full.esm.js +1880 -0
  14. package/dist/jtcsv-full.esm.js.map +1 -0
  15. package/dist/jtcsv-full.umd.js +1918 -0
  16. package/dist/jtcsv-full.umd.js.map +1 -0
  17. package/dist/jtcsv-workers.esm.js +759 -0
  18. package/dist/jtcsv-workers.esm.js.map +1 -0
  19. package/dist/jtcsv-workers.umd.js +773 -0
  20. package/dist/jtcsv-workers.umd.js.map +1 -0
  21. package/dist/jtcsv.cjs.js +61 -19
  22. package/dist/jtcsv.cjs.js.map +1 -1
  23. package/dist/jtcsv.esm.js +61 -19
  24. package/dist/jtcsv.esm.js.map +1 -1
  25. package/dist/jtcsv.umd.js +61 -19
  26. package/dist/jtcsv.umd.js.map +1 -1
  27. package/errors.js +188 -2
  28. package/examples/advanced/conditional-transformations.js +446 -0
  29. package/examples/advanced/conditional-transformations.ts +446 -0
  30. package/examples/advanced/csv-parser.worker.js +89 -0
  31. package/examples/advanced/csv-parser.worker.ts +89 -0
  32. package/examples/advanced/nested-objects-example.js +306 -0
  33. package/examples/advanced/nested-objects-example.ts +306 -0
  34. package/examples/advanced/performance-optimization.js +504 -0
  35. package/examples/advanced/performance-optimization.ts +504 -0
  36. package/examples/advanced/run-demo-server.js +116 -0
  37. package/examples/advanced/run-demo-server.ts +116 -0
  38. package/examples/advanced/web-worker-usage.html +874 -0
  39. package/examples/async-multithreaded-example.ts +335 -0
  40. package/examples/cli-advanced-usage.md +288 -0
  41. package/examples/cli-batch-processing.ts +38 -0
  42. package/examples/cli-tool.js +0 -3
  43. package/examples/cli-tool.ts +183 -0
  44. package/examples/error-handling.js +21 -7
  45. package/examples/error-handling.ts +356 -0
  46. package/examples/express-api.js +0 -3
  47. package/examples/express-api.ts +164 -0
  48. package/examples/large-dataset-example.js +0 -3
  49. package/examples/large-dataset-example.ts +204 -0
  50. package/examples/ndjson-processing.js +1 -1
  51. package/examples/ndjson-processing.ts +456 -0
  52. package/examples/plugin-excel-exporter.js +3 -4
  53. package/examples/plugin-excel-exporter.ts +406 -0
  54. package/examples/react-integration.tsx +637 -0
  55. package/examples/schema-validation.ts +640 -0
  56. package/examples/simple-usage.js +254 -254
  57. package/examples/simple-usage.ts +194 -0
  58. package/examples/streaming-example.js +4 -5
  59. package/examples/streaming-example.ts +419 -0
  60. package/examples/web-workers-advanced.ts +28 -0
  61. package/index.d.ts +1 -3
  62. package/index.js +15 -1
  63. package/json-save.js +9 -3
  64. package/json-to-csv.js +168 -21
  65. package/package.json +69 -10
  66. package/plugins/express-middleware/README.md +21 -2
  67. package/plugins/express-middleware/example.js +3 -4
  68. package/plugins/express-middleware/example.ts +135 -0
  69. package/plugins/express-middleware/index.d.ts +1 -1
  70. package/plugins/express-middleware/index.js +270 -118
  71. package/plugins/express-middleware/index.ts +557 -0
  72. package/plugins/fastify-plugin/index.js +2 -4
  73. package/plugins/fastify-plugin/index.ts +443 -0
  74. package/plugins/hono/index.ts +226 -0
  75. package/plugins/nestjs/index.ts +201 -0
  76. package/plugins/nextjs-api/examples/ConverterComponent.tsx +386 -0
  77. package/plugins/nextjs-api/examples/api-convert.js +0 -2
  78. package/plugins/nextjs-api/examples/api-convert.ts +67 -0
  79. package/plugins/nextjs-api/index.tsx +339 -0
  80. package/plugins/nextjs-api/route.js +2 -3
  81. package/plugins/nextjs-api/route.ts +370 -0
  82. package/plugins/nuxt/index.ts +94 -0
  83. package/plugins/nuxt/runtime/composables/useJtcsv.ts +100 -0
  84. package/plugins/nuxt/runtime/plugin.ts +71 -0
  85. package/plugins/remix/index.js +1 -1
  86. package/plugins/remix/index.ts +260 -0
  87. package/plugins/sveltekit/index.js +1 -1
  88. package/plugins/sveltekit/index.ts +301 -0
  89. package/plugins/trpc/index.ts +267 -0
  90. package/src/browser/browser-functions.ts +402 -0
  91. package/src/browser/core.js +92 -0
  92. package/src/browser/core.ts +152 -0
  93. package/src/browser/csv-to-json-browser.d.ts +3 -0
  94. package/src/browser/csv-to-json-browser.js +36 -14
  95. package/src/browser/csv-to-json-browser.ts +264 -0
  96. package/src/browser/errors-browser.ts +303 -0
  97. package/src/browser/extensions/plugins.js +92 -0
  98. package/src/browser/extensions/plugins.ts +93 -0
  99. package/src/browser/extensions/workers.js +39 -0
  100. package/src/browser/extensions/workers.ts +39 -0
  101. package/src/browser/globals.d.ts +5 -0
  102. package/src/browser/index.ts +192 -0
  103. package/src/browser/json-to-csv-browser.d.ts +3 -0
  104. package/src/browser/json-to-csv-browser.js +13 -3
  105. package/src/browser/json-to-csv-browser.ts +262 -0
  106. package/src/browser/streams.js +12 -2
  107. package/src/browser/streams.ts +336 -0
  108. package/src/browser/workers/csv-parser.worker.ts +377 -0
  109. package/src/browser/workers/worker-pool.ts +548 -0
  110. package/src/core/delimiter-cache.js +22 -8
  111. package/src/core/delimiter-cache.ts +310 -0
  112. package/src/core/node-optimizations.ts +449 -0
  113. package/src/core/plugin-system.js +29 -11
  114. package/src/core/plugin-system.ts +400 -0
  115. package/src/core/transform-hooks.ts +558 -0
  116. package/src/engines/fast-path-engine-new.ts +347 -0
  117. package/src/engines/fast-path-engine.ts +854 -0
  118. package/src/errors.ts +72 -0
  119. package/src/formats/ndjson-parser.ts +469 -0
  120. package/src/formats/tsv-parser.ts +334 -0
  121. package/src/index-with-plugins.js +16 -9
  122. package/src/index-with-plugins.ts +395 -0
  123. package/src/types/index.ts +255 -0
  124. package/src/utils/bom-utils.js +259 -0
  125. package/src/utils/bom-utils.ts +373 -0
  126. package/src/utils/encoding-support.js +124 -0
  127. package/src/utils/encoding-support.ts +155 -0
  128. package/src/utils/schema-validator.js +19 -19
  129. package/src/utils/schema-validator.ts +819 -0
  130. package/src/utils/transform-loader.js +1 -1
  131. package/src/utils/transform-loader.ts +389 -0
  132. package/src/utils/zod-adapter.js +170 -0
  133. package/src/utils/zod-adapter.ts +280 -0
  134. package/src/web-server/index.js +10 -10
  135. package/src/web-server/index.ts +683 -0
  136. package/src/workers/csv-multithreaded.ts +310 -0
  137. package/src/workers/csv-parser.worker.ts +227 -0
  138. package/src/workers/worker-pool.ts +409 -0
  139. package/stream-csv-to-json.js +26 -8
  140. package/stream-json-to-csv.js +1 -0
@@ -0,0 +1,819 @@
1
+ /**
2
+ * Schema Validator Utility
3
+ *
4
+ * Utility for loading and applying JSON schema validation in CLI
5
+ */
6
+
7
+ import * as fs from 'fs';
8
+ import * as fsPromises from 'fs/promises';
9
+ import * as path from 'path';
10
+ import {
11
+ ValidationError,
12
+ SecurityError,
13
+ ConfigurationError
14
+ } from '../errors';
15
+
16
+ export interface SchemaRule {
17
+ type?: string | string[];
18
+ required?: boolean;
19
+ min?: number;
20
+ max?: number;
21
+ pattern?: string | RegExp;
22
+ enum?: any[];
23
+ minLength?: number;
24
+ maxLength?: number;
25
+ minimum?: number;
26
+ maximum?: number;
27
+ exclusiveMinimum?: number;
28
+ exclusiveMaximum?: number;
29
+ multipleOf?: number;
30
+ minItems?: number;
31
+ maxItems?: number;
32
+ uniqueItems?: boolean;
33
+ items?: SchemaRule;
34
+ properties?: Record<string, SchemaRule>;
35
+ format?: string;
36
+ }
37
+
38
+ export interface Schema extends Record<string, any> {
39
+ properties?: Record<string, SchemaRule>;
40
+ required?: string[];
41
+ }
42
+
43
+ export interface ValidationErrorItem {
44
+ row: number;
45
+ type: string;
46
+ field: string;
47
+ message: string;
48
+ value?: any;
49
+ expected?: any;
50
+ min?: number;
51
+ max?: number;
52
+ pattern?: string;
53
+ allowed?: any[];
54
+ }
55
+
56
+ export interface ValidationResult {
57
+ valid: boolean;
58
+ errors: ValidationErrorItem[];
59
+ warnings: any[];
60
+ summary: {
61
+ totalRows: number;
62
+ validRows: number;
63
+ errorCount: number;
64
+ warningCount: number;
65
+ };
66
+ }
67
+
68
+ export interface ApplySchemaValidationResult {
69
+ valid: boolean;
70
+ errors: Array<{ row: number; message: string; data: any }>;
71
+ data: any[];
72
+ summary: {
73
+ totalRows: number;
74
+ validRows: number;
75
+ errorCount: number;
76
+ errorRate: number;
77
+ };
78
+ }
79
+
80
+ export interface Validator {
81
+ validate(data: any[], options?: { stopOnFirstError?: boolean; transform?: boolean }): ValidationResult;
82
+ schema?(schema: any): void;
83
+ field?(field: string, rule: any): void;
84
+ }
85
+
86
+ /**
87
+ * Loads JSON schema from file or string
88
+ *
89
+ * @param schemaPathOrJson - Path to JSON file or JSON string
90
+ * @returns Parsed JSON schema
91
+ */
92
+ export function loadSchema(schemaPathOrJson: string): Schema {
93
+ if (!schemaPathOrJson || typeof schemaPathOrJson !== 'string') {
94
+ throw new ValidationError('Schema must be a string (JSON or file path)');
95
+ }
96
+
97
+ let schemaString = schemaPathOrJson;
98
+
99
+ // Check if it's a file path (ends with .json or contains path separators)
100
+ const isFilePath = schemaPathOrJson.endsWith('.json') ||
101
+ schemaPathOrJson.includes('/') ||
102
+ schemaPathOrJson.includes('\\');
103
+
104
+ if (isFilePath) {
105
+ // Validate file path
106
+ const safePath = path.resolve(schemaPathOrJson);
107
+
108
+ // Prevent directory traversal
109
+ const normalizedPath = path.normalize(schemaPathOrJson);
110
+ if (normalizedPath.includes('..') ||
111
+ /\\\.\.\\|\/\.\.\//.test(schemaPathOrJson) ||
112
+ schemaPathOrJson.startsWith('..') ||
113
+ schemaPathOrJson.includes('/..')) {
114
+ throw new SecurityError('Directory traversal detected in schema file path');
115
+ }
116
+
117
+ // Check file exists and has .json extension
118
+ if (!fs.existsSync(safePath)) {
119
+ throw new ValidationError(`Schema file not found: ${schemaPathOrJson}`);
120
+ }
121
+
122
+ if (!safePath.toLowerCase().endsWith('.json')) {
123
+ throw new ValidationError('Schema file must have .json extension');
124
+ }
125
+
126
+ try {
127
+ schemaString = fs.readFileSync(safePath, 'utf8');
128
+ } catch (error: any) {
129
+ if (error.code === 'EACCES') {
130
+ throw new SecurityError(`Permission denied reading schema file: ${schemaPathOrJson}`);
131
+ }
132
+ throw new ValidationError(`Failed to read schema file: ${error.message}`);
133
+ }
134
+ }
135
+
136
+ // Parse JSON schema
137
+ try {
138
+ const schema = JSON.parse(schemaString) as Schema;
139
+
140
+ // Validate basic schema structure
141
+ if (typeof schema !== 'object' || schema === null) {
142
+ throw new ValidationError('Schema must be a JSON object');
143
+ }
144
+
145
+ return schema;
146
+ } catch (error: any) {
147
+ if (error instanceof SyntaxError) {
148
+ throw new ValidationError(`Invalid JSON in schema: ${error.message}`);
149
+ }
150
+ throw new ValidationError(`Failed to parse schema: ${error.message}`);
151
+ }
152
+ }
153
+
154
+ /**
155
+ * Creates a simple validator for fallback when @jtcsv/validator is not available
156
+ */
157
+ function createSimpleValidator(schema: Schema): Validator {
158
+ return {
159
+ validate(data: any[], options: { stopOnFirstError?: boolean; transform?: boolean } = {}): ValidationResult {
160
+ const errors: ValidationErrorItem[] = [];
161
+ const warnings: any[] = [];
162
+
163
+ if (!Array.isArray(data)) {
164
+ return {
165
+ valid: false,
166
+ errors: [{
167
+ row: 0,
168
+ type: 'INVALID_DATA',
169
+ field: '',
170
+ message: 'Data must be an array'
171
+ }],
172
+ warnings: [],
173
+ summary: {
174
+ totalRows: 0,
175
+ validRows: 0,
176
+ errorCount: 1,
177
+ warningCount: 0
178
+ }
179
+ };
180
+ }
181
+
182
+ for (let i = 0; i < data.length; i++) {
183
+ const row = data[i];
184
+
185
+ for (const [field, rule] of Object.entries(schema)) {
186
+ const value = row[field];
187
+
188
+ // Check required
189
+ if (rule.required && (value === undefined || value === null || value === '')) {
190
+ errors.push({
191
+ row: i + 1,
192
+ type: 'REQUIRED',
193
+ field,
194
+ message: `Field "${field}" is required`,
195
+ value
196
+ });
197
+ continue;
198
+ }
199
+
200
+ // Skip further validation if value is empty and not required
201
+ if (value === undefined || value === null || value === '') {
202
+ continue;
203
+ }
204
+
205
+ // Check type
206
+ if (rule.type) {
207
+ const types = Array.isArray(rule.type) ? rule.type : [rule.type];
208
+ let typeValid = false;
209
+
210
+ for (const type of types) {
211
+ if (checkType(value, type)) {
212
+ typeValid = true;
213
+ break;
214
+ }
215
+ }
216
+
217
+ if (!typeValid) {
218
+ errors.push({
219
+ row: i + 1,
220
+ type: 'TYPE',
221
+ field,
222
+ message: `Field "${field}" must be of type ${types.join(' or ')}`,
223
+ value,
224
+ expected: types
225
+ });
226
+ }
227
+ }
228
+
229
+ // Check min/max for strings
230
+ if (rule.min !== undefined && typeof value === 'string' && value.length < rule.min) {
231
+ errors.push({
232
+ row: i + 1,
233
+ type: 'MIN_LENGTH',
234
+ field,
235
+ message: `Field "${field}" must be at least ${rule.min} characters`,
236
+ value,
237
+ min: rule.min
238
+ });
239
+ }
240
+
241
+ if (rule.max !== undefined && typeof value === 'string' && value.length > rule.max) {
242
+ errors.push({
243
+ row: i + 1,
244
+ type: 'MAX_LENGTH',
245
+ field,
246
+ message: `Field "${field}" must be at most ${rule.max} characters`,
247
+ value,
248
+ max: rule.max
249
+ });
250
+ }
251
+
252
+ // Check min/max for numbers
253
+ if (rule.min !== undefined && typeof value === 'number' && value < rule.min) {
254
+ errors.push({
255
+ row: i + 1,
256
+ type: 'MIN_VALUE',
257
+ field,
258
+ message: `Field "${field}" must be at least ${rule.min}`,
259
+ value,
260
+ min: rule.min
261
+ });
262
+ }
263
+
264
+ if (rule.max !== undefined && typeof value === 'number' && value > rule.max) {
265
+ errors.push({
266
+ row: i + 1,
267
+ type: 'MAX_VALUE',
268
+ field,
269
+ message: `Field "${field}" must be at most ${rule.max}`,
270
+ value,
271
+ max: rule.max
272
+ });
273
+ }
274
+
275
+ // Check pattern
276
+ if (rule.pattern && typeof value === 'string') {
277
+ const pattern = rule.pattern instanceof RegExp ? rule.pattern : new RegExp(rule.pattern);
278
+ if (!pattern.test(value)) {
279
+ errors.push({
280
+ row: i + 1,
281
+ type: 'PATTERN',
282
+ field,
283
+ message: `Field "${field}" must match pattern`,
284
+ value,
285
+ pattern: pattern.toString()
286
+ });
287
+ }
288
+ }
289
+
290
+ // Check enum
291
+ if (rule.enum && Array.isArray(rule.enum) && !rule.enum.includes(value)) {
292
+ errors.push({
293
+ row: i + 1,
294
+ type: 'ENUM',
295
+ field,
296
+ message: `Field "${field}" must be one of: ${rule.enum.join(', ')}`,
297
+ value,
298
+ allowed: rule.enum
299
+ });
300
+ }
301
+ }
302
+ }
303
+
304
+ return {
305
+ valid: errors.length === 0,
306
+ errors,
307
+ warnings,
308
+ summary: {
309
+ totalRows: data.length,
310
+ validRows: data.length - errors.length,
311
+ errorCount: errors.length,
312
+ warningCount: warnings.length
313
+ }
314
+ };
315
+ }
316
+ };
317
+ }
318
+
319
+ /**
320
+ * Checks if value matches type
321
+ */
322
+ function checkType(value: any, type: string): boolean {
323
+ switch (type) {
324
+ case 'string':
325
+ return typeof value === 'string';
326
+ case 'number':
327
+ return typeof value === 'number' && !isNaN(value);
328
+ case 'boolean':
329
+ return typeof value === 'boolean';
330
+ case 'integer':
331
+ return Number.isInteger(value);
332
+ case 'float':
333
+ return typeof value === 'number' && !Number.isInteger(value);
334
+ case 'date':
335
+ return value instanceof Date && !isNaN(value.getTime());
336
+ case 'array':
337
+ return Array.isArray(value);
338
+ case 'object':
339
+ return typeof value === 'object' && value !== null && !Array.isArray(value);
340
+ default:
341
+ return false;
342
+ }
343
+ }
344
+
345
+ /**
346
+ * Creates a validation hook for use with csvToJson/jsonToCsv hooks system
347
+ *
348
+ * @param schema - Schema object or path to schema file
349
+ * @returns Validation hook function
350
+ */
351
+ export function createValidationHook(schema: string | Schema): (row: any, index: number, context: any) => any {
352
+ let schemaObj: Schema;
353
+
354
+ if (typeof schema === 'string') {
355
+ // Load schema from file or JSON string
356
+ schemaObj = loadSchema(schema);
357
+ } else if (typeof schema === 'object' && schema !== null) {
358
+ // Use provided schema object
359
+ schemaObj = schema;
360
+ } else {
361
+ throw new ValidationError('Schema must be an object or a path to a JSON file');
362
+ }
363
+
364
+ // Try to use @jtcsv/validator if available
365
+ let validator: Validator;
366
+ try {
367
+ const JtcsvValidator = require('../../packages/jtcsv-validator/src/index');
368
+ validator = new JtcsvValidator();
369
+
370
+ // Convert simple schema format to validator format
371
+ if ((schemaObj as any).fields) {
372
+ // Assume it's already in validator format
373
+ validator.schema!((schemaObj as any).fields);
374
+ } else {
375
+ // Convert simple field definitions
376
+ Object.entries(schemaObj).forEach(([field, rule]) => {
377
+ if (typeof rule === 'object') {
378
+ validator.field!(field, rule);
379
+ }
380
+ });
381
+ }
382
+ } catch (error) {
383
+ // Fallback to simple validation if validator is not available
384
+ console.warn('@jtcsv/validator not available, using simple validation');
385
+ validator = createSimpleValidator(schemaObj);
386
+ }
387
+
388
+ // Return a hook function compatible with hooks.perRow
389
+ return function (row: any, index: number, context: any): any {
390
+ try {
391
+ const result = validator.validate([row], {
392
+ stopOnFirstError: true,
393
+ transform: false
394
+ });
395
+
396
+ if (!result.valid && result.errors.length > 0) {
397
+ const error = result.errors[0];
398
+ throw new ValidationError(
399
+ `Row ${index + 1}: ${error.message} (field: ${error.field})`
400
+ );
401
+ }
402
+
403
+ return row;
404
+ } catch (error: any) {
405
+ if (error instanceof ValidationError) {
406
+ throw error;
407
+ }
408
+ // Log error but don't crash - return original row
409
+ console.error(`Validation error at row ${index}: ${error.message}`);
410
+ if (process.env['NODE_ENV'] === 'development') {
411
+ console.error(error.stack);
412
+ }
413
+ return row;
414
+ }
415
+ };
416
+ }
417
+
418
+ /**
419
+ * Applies schema validation to data array
420
+ *
421
+ * @param data - Array of data to validate
422
+ * @param schema - Schema object or path to schema file
423
+ * @returns Validation result
424
+ */
425
+ export function applySchemaValidation(data: any[], schema: string | Schema): ApplySchemaValidationResult {
426
+ if (!Array.isArray(data)) {
427
+ throw new ValidationError('Data must be an array');
428
+ }
429
+
430
+ const validationHook = createValidationHook(schema);
431
+ const errors: Array<{ row: number; message: string; data: any }> = [];
432
+ const validatedData: any[] = [];
433
+
434
+ for (let i = 0; i < data.length; i++) {
435
+ try {
436
+ const validatedRow = validationHook(data[i], i, { operation: 'validate' });
437
+ validatedData.push(validatedRow);
438
+ } catch (error: any) {
439
+ if (error instanceof ValidationError) {
440
+ errors.push({
441
+ row: i + 1,
442
+ message: error.message,
443
+ data: data[i]
444
+ });
445
+ } else {
446
+ // Skip rows with non-validation errors
447
+ validatedData.push(data[i]);
448
+ }
449
+ }
450
+ }
451
+
452
+ return {
453
+ valid: errors.length === 0,
454
+ errors,
455
+ data: validatedData,
456
+ summary: {
457
+ totalRows: data.length,
458
+ validRows: validatedData.length,
459
+ errorCount: errors.length,
460
+ errorRate: data.length > 0 ? (errors.length / data.length) * 100 : 0
461
+ }
462
+ };
463
+ }
464
+
465
+ /**
466
+ * Creates a TransformHooks instance with validation
467
+ *
468
+ * @param schema - Schema object or path to schema file
469
+ * @returns TransformHooks instance
470
+ */
471
+ export function createValidationHooks(schema: Schema): any {
472
+ const { TransformHooks } = require('../core/transform-hooks');
473
+ const hooks = new TransformHooks();
474
+
475
+ const validationHook = createValidationHook(schema);
476
+ hooks.perRow(validationHook);
477
+
478
+ return hooks;
479
+ }
480
+
481
+ /**
482
+ * Creates schema validators from JSON schema
483
+ *
484
+ * @param schema - JSON schema
485
+ * @returns Validators object
486
+ */
487
+ export function createSchemaValidators(schema: Schema): Record<string, any> {
488
+ const validators: Record<string, any> = {};
489
+
490
+ // Handle both JSON Schema format and simple format
491
+ const properties = schema.properties || schema;
492
+ const requiredFields = schema.required || [];
493
+
494
+ if (!properties || typeof properties !== 'object') {
495
+ return validators;
496
+ }
497
+
498
+ for (const [key, definition] of Object.entries(properties)) {
499
+ const validator: any = {
500
+ type: definition.type,
501
+ required: requiredFields.includes(key)
502
+ };
503
+
504
+ // Add format function for dates and other formats
505
+ if (definition.type === 'string' && definition.format) {
506
+ validator.format = (value: any) => {
507
+ // Handle date-time format
508
+ if (definition.format === 'date-time') {
509
+ if (value instanceof Date) {
510
+ return value.toISOString();
511
+ }
512
+ /* istanbul ignore next */
513
+ if (typeof value === 'string') {
514
+ // Try to parse as date
515
+ const date = new Date(value);
516
+ if (!isNaN(date.getTime())) {
517
+ return date.toISOString();
518
+ }
519
+ }
520
+ }
521
+ // Handle email format
522
+ if (definition.format === 'email') {
523
+ if (typeof value === 'string') {
524
+ return value.toLowerCase().trim();
525
+ }
526
+ }
527
+ // Handle uri format
528
+ if (definition.format === 'uri') {
529
+ if (typeof value === 'string') {
530
+ return value.trim();
531
+ }
532
+ }
533
+ return value;
534
+ };
535
+ }
536
+
537
+ // Add validation function
538
+ validator.validate = (value: any) => {
539
+ if (value === null || value === undefined) {
540
+ return !validator.required;
541
+ }
542
+
543
+ // Type validation
544
+ if (definition.type === 'string' && typeof value !== 'string') {
545
+ // For date-time format, also accept Date objects
546
+ if (definition.format === 'date-time' && value instanceof Date) {
547
+ return true;
548
+ }
549
+ return false;
550
+ }
551
+ if (definition.type === 'number' && typeof value !== 'number') {
552
+ return false;
553
+ }
554
+ if (definition.type === 'integer' && (!Number.isInteger(value) || typeof value !== 'number')) {
555
+ return false;
556
+ }
557
+ if (definition.type === 'boolean' && typeof value !== 'boolean') {
558
+ return false;
559
+ }
560
+ if (definition.type === 'array' && !Array.isArray(value)) {
561
+ return false;
562
+ }
563
+ if (definition.type === 'object' && (typeof value !== 'object' || value === null || Array.isArray(value))) {
564
+ return false;
565
+ }
566
+
567
+ // Additional constraints for strings
568
+ if (definition.type === 'string') {
569
+ if (definition.minLength !== undefined && value.length < definition.minLength) {
570
+ return false;
571
+ }
572
+ if (definition.maxLength !== undefined && value.length > definition.maxLength) {
573
+ return false;
574
+ }
575
+ if (definition.pattern && !new RegExp(definition.pattern).test(value)) {
576
+ return false;
577
+ }
578
+ if (definition.format === 'email' && !/^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(value)) {
579
+ return false;
580
+ }
581
+ if (definition.format === 'uri') {
582
+ try {
583
+ new URL(value);
584
+ } catch {
585
+ return false;
586
+ }
587
+ }
588
+ }
589
+
590
+ // Additional constraints for numbers
591
+ if (definition.type === 'number' || definition.type === 'integer') {
592
+ if (definition.minimum !== undefined && value < definition.minimum) {
593
+ return false;
594
+ }
595
+ if (definition.maximum !== undefined && value > definition.maximum) {
596
+ return false;
597
+ }
598
+ if (definition.exclusiveMinimum !== undefined && value <= definition.exclusiveMinimum) {
599
+ return false;
600
+ }
601
+ if (definition.exclusiveMaximum !== undefined && value >= definition.exclusiveMaximum) {
602
+ return false;
603
+ }
604
+ if (definition.multipleOf !== undefined && value % definition.multipleOf !== 0) {
605
+ return false;
606
+ }
607
+ }
608
+
609
+ // Additional constraints for arrays
610
+ if (definition.type === 'array') {
611
+ if (definition.minItems !== undefined && value.length < definition.minItems) {
612
+ return false;
613
+ }
614
+ if (definition.maxItems !== undefined && value.length > definition.maxItems) {
615
+ return false;
616
+ }
617
+ if (definition.uniqueItems && new Set(value).size !== value.length) {
618
+ return false;
619
+ }
620
+ // Validate array items if schema is provided
621
+ if (definition.items) {
622
+ for (const item of value) {
623
+ const itemValidator = createSchemaValidators({ properties: { item: definition.items } });
624
+ if (itemValidator.item && !itemValidator.item.validate(item)) {
625
+ return false;
626
+ }
627
+ }
628
+ }
629
+ }
630
+
631
+ // Additional constraints for objects
632
+ if (definition.type === 'object' && definition.properties) {
633
+ const nestedValidators = createSchemaValidators(definition);
634
+ for (const [nestedKey, nestedValidator] of Object.entries(nestedValidators)) {
635
+ if (value[nestedKey] !== undefined && !nestedValidator.validate(value[nestedKey])) {
636
+ return false;
637
+ }
638
+ if (nestedValidator.required && value[nestedKey] === undefined) {
639
+ return false;
640
+ }
641
+ }
642
+ }
643
+
644
+ // Check enum
645
+ if (definition.enum && !definition.enum.includes(value)) {
646
+ return false;
647
+ }
648
+
649
+ return true;
650
+ };
651
+
652
+ validators[key] = validator;
653
+ }
654
+
655
+ return validators;
656
+ }
657
+
658
+ /**
659
+ * Async version of loadSchema that reads file asynchronously
660
+ *
661
+ * @param schemaPathOrJson - Path to JSON file or JSON string
662
+ * @returns Promise with parsed JSON schema
663
+ */
664
+ export async function loadSchemaAsync(schemaPathOrJson: string): Promise<Schema> {
665
+ if (!schemaPathOrJson || typeof schemaPathOrJson !== 'string') {
666
+ throw new ValidationError('Schema must be a string (JSON or file path)');
667
+ }
668
+
669
+ let schemaString = schemaPathOrJson;
670
+
671
+ // Check if it's a file path (ends with .json or contains path separators)
672
+ const isFilePath = schemaPathOrJson.endsWith('.json') ||
673
+ schemaPathOrJson.includes('/') ||
674
+ schemaPathOrJson.includes('\\');
675
+
676
+ if (isFilePath) {
677
+ // Validate file path
678
+ const safePath = path.resolve(schemaPathOrJson);
679
+
680
+ // Prevent directory traversal
681
+ const normalizedPath = path.normalize(schemaPathOrJson);
682
+ if (normalizedPath.includes('..') ||
683
+ /\\\.\.\\|\/\.\.\//.test(schemaPathOrJson) ||
684
+ schemaPathOrJson.startsWith('..') ||
685
+ schemaPathOrJson.includes('/..')) {
686
+ throw new SecurityError('Directory traversal detected in schema file path');
687
+ }
688
+
689
+ // Check file exists and has .json extension
690
+ try {
691
+ await fsPromises.access(safePath);
692
+ } catch {
693
+ throw new ValidationError(`Schema file not found: ${schemaPathOrJson}`);
694
+ }
695
+
696
+ if (!safePath.toLowerCase().endsWith('.json')) {
697
+ throw new ValidationError('Schema file must have .json extension');
698
+ }
699
+
700
+ try {
701
+ schemaString = await fsPromises.readFile(safePath, 'utf8');
702
+ } catch (error: any) {
703
+ if (error.code === 'EACCES') {
704
+ throw new SecurityError(`Permission denied reading schema file: ${schemaPathOrJson}`);
705
+ }
706
+ throw new ValidationError(`Failed to read schema file: ${error.message}`);
707
+ }
708
+ }
709
+
710
+ // Parse JSON schema
711
+ try {
712
+ const schema = JSON.parse(schemaString) as Schema;
713
+
714
+ // Validate basic schema structure
715
+ if (typeof schema !== 'object' || schema === null) {
716
+ throw new ValidationError('Schema must be a JSON object');
717
+ }
718
+
719
+ return schema;
720
+ } catch (error: any) {
721
+ if (error instanceof SyntaxError) {
722
+ throw new ValidationError(`Invalid JSON in schema: ${error.message}`);
723
+ }
724
+ throw new ValidationError(`Failed to parse schema: ${error.message}`);
725
+ }
726
+ }
727
+
728
+ /**
729
+ * Async version of applySchemaValidation that uses worker threads for parallel validation
730
+ *
731
+ * @param data - Array of data to validate
732
+ * @param schema - Schema object or path to schema file
733
+ * @returns Promise with validation result
734
+ */
735
+ export async function applySchemaValidationAsync(
736
+ data: any[],
737
+ schema: string | Schema
738
+ ): Promise<ApplySchemaValidationResult> {
739
+ if (!Array.isArray(data)) {
740
+ throw new ValidationError('Data must be an array');
741
+ }
742
+
743
+ const schemaObj = typeof schema === 'string' ? await loadSchemaAsync(schema) : schema;
744
+ const validationHook = createValidationHook(schemaObj);
745
+
746
+ // Use worker pool for parallel validation if data is large
747
+ if (data.length > 1000) {
748
+ const { createWorkerPool } = require('../workers/worker-pool');
749
+ const pool = createWorkerPool({
750
+ workerCount: Math.min(4, require('os').cpus().length),
751
+ workerScript: require.resolve('./validation-worker.js')
752
+ });
753
+
754
+ try {
755
+ const validationPromises = data.map((row, index) =>
756
+ pool.execute({ row, index, schema: schemaObj, operation: 'validate' })
757
+ );
758
+
759
+ const results = await Promise.all(validationPromises);
760
+ const errors: Array<{ row: number; message: string; data: any }> = [];
761
+ const validatedData: any[] = [];
762
+
763
+ results.forEach((result, index) => {
764
+ if (result.error) {
765
+ errors.push({
766
+ row: index + 1,
767
+ message: result.error.message,
768
+ data: data[index]
769
+ });
770
+ } else {
771
+ validatedData.push(result.validatedRow);
772
+ }
773
+ });
774
+
775
+ return {
776
+ valid: errors.length === 0,
777
+ errors,
778
+ data: validatedData,
779
+ summary: {
780
+ totalRows: data.length,
781
+ validRows: validatedData.length,
782
+ errorCount: errors.length,
783
+ errorRate: data.length > 0 ? (errors.length / data.length) * 100 : 0
784
+ }
785
+ };
786
+ } finally {
787
+ await pool.terminate();
788
+ }
789
+ }
790
+
791
+ // For small datasets, validate synchronously
792
+ return applySchemaValidation(data, schemaObj);
793
+ }
794
+
795
+ /**
796
+ * Creates an async validation hook that can be used with async hooks
797
+ *
798
+ * @param schema - Schema object or path to schema file
799
+ * @returns Async validation hook function
800
+ */
801
+ export function createAsyncValidationHook(schema: string | Schema): (row: any, index: number, context: any) => Promise<unknown> {
802
+ const syncHook = createValidationHook(schema);
803
+
804
+ return async function (row: any, index: number, context: any): Promise<unknown> {
805
+ return Promise.resolve(syncHook(row, index, context));
806
+ };
807
+ }
808
+
809
+ export default {
810
+ loadSchema,
811
+ loadSchemaAsync,
812
+ createValidationHook,
813
+ createAsyncValidationHook,
814
+ applySchemaValidation,
815
+ applySchemaValidationAsync,
816
+ createValidationHooks,
817
+ checkType,
818
+ createSchemaValidators
819
+ };