jtcsv 2.2.8 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/README.md +204 -115
  2. package/bin/jtcsv.ts +2612 -0
  3. package/browser.d.ts +142 -0
  4. package/dist/benchmark.js +446 -0
  5. package/dist/benchmark.js.map +1 -0
  6. package/dist/bin/jtcsv.js +1940 -0
  7. package/dist/bin/jtcsv.js.map +1 -0
  8. package/dist/csv-to-json.js +1262 -0
  9. package/dist/csv-to-json.js.map +1 -0
  10. package/dist/errors.js +291 -0
  11. package/dist/errors.js.map +1 -0
  12. package/dist/eslint.config.js +147 -0
  13. package/dist/eslint.config.js.map +1 -0
  14. package/dist/index-core.js +95 -0
  15. package/dist/index-core.js.map +1 -0
  16. package/dist/index.js +93 -0
  17. package/dist/index.js.map +1 -0
  18. package/dist/json-save.js +229 -0
  19. package/dist/json-save.js.map +1 -0
  20. package/dist/json-to-csv.js +576 -0
  21. package/dist/json-to-csv.js.map +1 -0
  22. package/dist/jtcsv-core.cjs.js +1736 -0
  23. package/dist/jtcsv-core.cjs.js.map +1 -0
  24. package/dist/jtcsv-core.esm.js +1708 -0
  25. package/dist/jtcsv-core.esm.js.map +1 -0
  26. package/dist/jtcsv-core.umd.js +1742 -0
  27. package/dist/jtcsv-core.umd.js.map +1 -0
  28. package/dist/jtcsv-full.cjs.js +2241 -0
  29. package/dist/jtcsv-full.cjs.js.map +1 -0
  30. package/dist/jtcsv-full.esm.js +2209 -0
  31. package/dist/jtcsv-full.esm.js.map +1 -0
  32. package/dist/jtcsv-full.umd.js +2247 -0
  33. package/dist/jtcsv-full.umd.js.map +1 -0
  34. package/dist/jtcsv-workers.esm.js +768 -0
  35. package/dist/jtcsv-workers.esm.js.map +1 -0
  36. package/dist/jtcsv-workers.umd.js +782 -0
  37. package/dist/jtcsv-workers.umd.js.map +1 -0
  38. package/dist/jtcsv.cjs.js +1996 -2048
  39. package/dist/jtcsv.cjs.js.map +1 -1
  40. package/dist/jtcsv.esm.js +1992 -2048
  41. package/dist/jtcsv.esm.js.map +1 -1
  42. package/dist/jtcsv.umd.js +2157 -2209
  43. package/dist/jtcsv.umd.js.map +1 -1
  44. package/dist/plugins/express-middleware/index.js +350 -0
  45. package/dist/plugins/express-middleware/index.js.map +1 -0
  46. package/dist/plugins/fastify-plugin/index.js +315 -0
  47. package/dist/plugins/fastify-plugin/index.js.map +1 -0
  48. package/dist/plugins/hono/index.js +111 -0
  49. package/dist/plugins/hono/index.js.map +1 -0
  50. package/dist/plugins/nestjs/index.js +112 -0
  51. package/dist/plugins/nestjs/index.js.map +1 -0
  52. package/dist/plugins/nuxt/index.js +53 -0
  53. package/dist/plugins/nuxt/index.js.map +1 -0
  54. package/dist/plugins/remix/index.js +133 -0
  55. package/dist/plugins/remix/index.js.map +1 -0
  56. package/dist/plugins/sveltekit/index.js +155 -0
  57. package/dist/plugins/sveltekit/index.js.map +1 -0
  58. package/dist/plugins/trpc/index.js +136 -0
  59. package/dist/plugins/trpc/index.js.map +1 -0
  60. package/dist/run-demo.js +49 -0
  61. package/dist/run-demo.js.map +1 -0
  62. package/dist/src/browser/browser-functions.js +193 -0
  63. package/dist/src/browser/browser-functions.js.map +1 -0
  64. package/dist/src/browser/core.js +123 -0
  65. package/dist/src/browser/core.js.map +1 -0
  66. package/dist/src/browser/csv-to-json-browser.js +353 -0
  67. package/dist/src/browser/csv-to-json-browser.js.map +1 -0
  68. package/dist/src/browser/errors-browser.js +219 -0
  69. package/dist/src/browser/errors-browser.js.map +1 -0
  70. package/dist/src/browser/extensions/plugins.js +106 -0
  71. package/dist/src/browser/extensions/plugins.js.map +1 -0
  72. package/dist/src/browser/extensions/workers.js +66 -0
  73. package/dist/src/browser/extensions/workers.js.map +1 -0
  74. package/dist/src/browser/index.js +140 -0
  75. package/dist/src/browser/index.js.map +1 -0
  76. package/dist/src/browser/json-to-csv-browser.js +225 -0
  77. package/dist/src/browser/json-to-csv-browser.js.map +1 -0
  78. package/dist/src/browser/streams.js +340 -0
  79. package/dist/src/browser/streams.js.map +1 -0
  80. package/dist/src/browser/workers/csv-parser.worker.js +264 -0
  81. package/dist/src/browser/workers/csv-parser.worker.js.map +1 -0
  82. package/dist/src/browser/workers/worker-pool.js +338 -0
  83. package/dist/src/browser/workers/worker-pool.js.map +1 -0
  84. package/dist/src/core/delimiter-cache.js +196 -0
  85. package/dist/src/core/delimiter-cache.js.map +1 -0
  86. package/dist/src/core/node-optimizations.js +279 -0
  87. package/dist/src/core/node-optimizations.js.map +1 -0
  88. package/dist/src/core/plugin-system.js +399 -0
  89. package/dist/src/core/plugin-system.js.map +1 -0
  90. package/dist/src/core/transform-hooks.js +348 -0
  91. package/dist/src/core/transform-hooks.js.map +1 -0
  92. package/dist/src/engines/fast-path-engine-new.js +262 -0
  93. package/dist/src/engines/fast-path-engine-new.js.map +1 -0
  94. package/dist/src/engines/fast-path-engine.js +671 -0
  95. package/dist/src/engines/fast-path-engine.js.map +1 -0
  96. package/dist/src/errors.js +18 -0
  97. package/dist/src/errors.js.map +1 -0
  98. package/dist/src/formats/ndjson-parser.js +332 -0
  99. package/dist/src/formats/ndjson-parser.js.map +1 -0
  100. package/dist/src/formats/tsv-parser.js +230 -0
  101. package/dist/src/formats/tsv-parser.js.map +1 -0
  102. package/dist/src/index-with-plugins.js +259 -0
  103. package/dist/src/index-with-plugins.js.map +1 -0
  104. package/dist/src/types/index.js +3 -0
  105. package/dist/src/types/index.js.map +1 -0
  106. package/dist/src/utils/bom-utils.js +267 -0
  107. package/dist/src/utils/bom-utils.js.map +1 -0
  108. package/dist/src/utils/encoding-support.js +77 -0
  109. package/dist/src/utils/encoding-support.js.map +1 -0
  110. package/dist/src/utils/schema-validator.js +609 -0
  111. package/dist/src/utils/schema-validator.js.map +1 -0
  112. package/dist/src/utils/transform-loader.js +281 -0
  113. package/dist/src/utils/transform-loader.js.map +1 -0
  114. package/dist/src/utils/validators.js +40 -0
  115. package/dist/src/utils/validators.js.map +1 -0
  116. package/dist/src/utils/zod-adapter.js +144 -0
  117. package/dist/src/utils/zod-adapter.js.map +1 -0
  118. package/dist/src/web-server/index.js +648 -0
  119. package/dist/src/web-server/index.js.map +1 -0
  120. package/dist/src/workers/csv-multithreaded.js +211 -0
  121. package/dist/src/workers/csv-multithreaded.js.map +1 -0
  122. package/dist/src/workers/csv-parser.worker.js +179 -0
  123. package/dist/src/workers/csv-parser.worker.js.map +1 -0
  124. package/dist/src/workers/worker-pool.js +228 -0
  125. package/dist/src/workers/worker-pool.js.map +1 -0
  126. package/dist/stream-csv-to-json.js +665 -0
  127. package/dist/stream-csv-to-json.js.map +1 -0
  128. package/dist/stream-json-to-csv.js +389 -0
  129. package/dist/stream-json-to-csv.js.map +1 -0
  130. package/examples/advanced/conditional-transformations.ts +446 -0
  131. package/examples/advanced/csv-parser.worker.ts +89 -0
  132. package/examples/advanced/nested-objects-example.ts +306 -0
  133. package/examples/advanced/performance-optimization.ts +504 -0
  134. package/examples/advanced/run-demo-server.ts +116 -0
  135. package/examples/advanced/web-worker-usage.html +874 -0
  136. package/examples/async-multithreaded-example.ts +335 -0
  137. package/examples/cli-advanced-usage.md +290 -0
  138. package/examples/{cli-batch-processing.js → cli-batch-processing.ts} +38 -38
  139. package/examples/{cli-tool.js → cli-tool.ts} +5 -8
  140. package/examples/{error-handling.js → error-handling.ts} +356 -324
  141. package/examples/{express-api.js → express-api.ts} +161 -164
  142. package/examples/{large-dataset-example.js → large-dataset-example.ts} +201 -182
  143. package/examples/{ndjson-processing.js → ndjson-processing.ts} +456 -434
  144. package/examples/{plugin-excel-exporter.js → plugin-excel-exporter.ts} +6 -7
  145. package/examples/react-integration.tsx +637 -0
  146. package/examples/{schema-validation.js → schema-validation.ts} +2 -2
  147. package/examples/simple-usage.ts +194 -0
  148. package/examples/{streaming-example.js → streaming-example.ts} +12 -12
  149. package/index.d.ts +187 -18
  150. package/package.json +75 -81
  151. package/plugins.d.ts +37 -0
  152. package/schema.d.ts +103 -0
  153. package/src/browser/browser-functions.ts +402 -0
  154. package/src/browser/core.ts +152 -0
  155. package/src/browser/csv-to-json-browser.d.ts +3 -0
  156. package/src/browser/csv-to-json-browser.ts +494 -0
  157. package/src/browser/{errors-browser.js → errors-browser.ts} +305 -197
  158. package/src/browser/extensions/plugins.ts +93 -0
  159. package/src/browser/extensions/workers.ts +39 -0
  160. package/src/browser/globals.d.ts +5 -0
  161. package/src/browser/index.ts +192 -0
  162. package/src/browser/json-to-csv-browser.d.ts +3 -0
  163. package/src/browser/json-to-csv-browser.ts +338 -0
  164. package/src/browser/streams.ts +403 -0
  165. package/src/browser/workers/{csv-parser.worker.js → csv-parser.worker.ts} +3 -3
  166. package/src/browser/workers/{worker-pool.js → worker-pool.ts} +51 -30
  167. package/src/core/delimiter-cache.ts +320 -0
  168. package/src/core/{node-optimizations.js → node-optimizations.ts} +448 -407
  169. package/src/core/plugin-system.ts +588 -0
  170. package/src/core/transform-hooks.ts +566 -0
  171. package/src/engines/{fast-path-engine-new.js → fast-path-engine-new.ts} +11 -2
  172. package/src/engines/{fast-path-engine.js → fast-path-engine.ts} +79 -53
  173. package/src/errors.ts +1 -0
  174. package/src/formats/{ndjson-parser.js → ndjson-parser.ts} +24 -16
  175. package/src/formats/{tsv-parser.js → tsv-parser.ts} +18 -17
  176. package/src/{index-with-plugins.js → index-with-plugins.ts} +381 -357
  177. package/src/types/index.ts +275 -0
  178. package/src/utils/bom-utils.ts +373 -0
  179. package/src/utils/encoding-support.ts +155 -0
  180. package/src/utils/{schema-validator.js → schema-validator.ts} +814 -589
  181. package/src/utils/transform-loader.ts +389 -0
  182. package/src/utils/validators.ts +35 -0
  183. package/src/utils/zod-adapter.ts +280 -0
  184. package/src/web-server/{index.js → index.ts} +19 -19
  185. package/src/workers/csv-multithreaded.ts +310 -0
  186. package/src/workers/csv-parser.worker.ts +227 -0
  187. package/src/workers/worker-pool.ts +409 -0
  188. package/bin/jtcsv.js +0 -2462
  189. package/csv-to-json.js +0 -688
  190. package/errors.js +0 -208
  191. package/examples/simple-usage.js +0 -282
  192. package/index.js +0 -68
  193. package/json-save.js +0 -254
  194. package/json-to-csv.js +0 -526
  195. package/plugins/README.md +0 -91
  196. package/plugins/express-middleware/README.md +0 -64
  197. package/plugins/express-middleware/example.js +0 -136
  198. package/plugins/express-middleware/index.d.ts +0 -114
  199. package/plugins/express-middleware/index.js +0 -360
  200. package/plugins/express-middleware/package.json +0 -52
  201. package/plugins/fastify-plugin/index.js +0 -406
  202. package/plugins/fastify-plugin/package.json +0 -55
  203. package/plugins/hono/README.md +0 -28
  204. package/plugins/hono/index.d.ts +0 -12
  205. package/plugins/hono/index.js +0 -36
  206. package/plugins/hono/package.json +0 -35
  207. package/plugins/nestjs/README.md +0 -35
  208. package/plugins/nestjs/index.d.ts +0 -25
  209. package/plugins/nestjs/index.js +0 -77
  210. package/plugins/nestjs/package.json +0 -37
  211. package/plugins/nextjs-api/README.md +0 -57
  212. package/plugins/nextjs-api/examples/ConverterComponent.jsx +0 -386
  213. package/plugins/nextjs-api/examples/api-convert.js +0 -69
  214. package/plugins/nextjs-api/index.js +0 -387
  215. package/plugins/nextjs-api/package.json +0 -63
  216. package/plugins/nextjs-api/route.js +0 -371
  217. package/plugins/nuxt/README.md +0 -24
  218. package/plugins/nuxt/index.js +0 -21
  219. package/plugins/nuxt/package.json +0 -35
  220. package/plugins/nuxt/runtime/composables/useJtcsv.js +0 -6
  221. package/plugins/nuxt/runtime/plugin.js +0 -6
  222. package/plugins/remix/README.md +0 -26
  223. package/plugins/remix/index.d.ts +0 -16
  224. package/plugins/remix/index.js +0 -62
  225. package/plugins/remix/package.json +0 -35
  226. package/plugins/sveltekit/README.md +0 -28
  227. package/plugins/sveltekit/index.d.ts +0 -17
  228. package/plugins/sveltekit/index.js +0 -54
  229. package/plugins/sveltekit/package.json +0 -33
  230. package/plugins/trpc/README.md +0 -25
  231. package/plugins/trpc/index.d.ts +0 -7
  232. package/plugins/trpc/index.js +0 -32
  233. package/plugins/trpc/package.json +0 -34
  234. package/src/browser/browser-functions.js +0 -219
  235. package/src/browser/csv-to-json-browser.js +0 -700
  236. package/src/browser/index.js +0 -113
  237. package/src/browser/json-to-csv-browser.js +0 -309
  238. package/src/browser/streams.js +0 -393
  239. package/src/core/delimiter-cache.js +0 -186
  240. package/src/core/plugin-system.js +0 -476
  241. package/src/core/transform-hooks.js +0 -350
  242. package/src/errors.js +0 -26
  243. package/src/utils/transform-loader.js +0 -205
  244. package/stream-csv-to-json.js +0 -542
  245. package/stream-json-to-csv.js +0 -464
  246. /package/examples/{web-workers-advanced.js → web-workers-advanced.ts} +0 -0
@@ -1,594 +1,819 @@
1
- /**
2
- * Schema Validator Utility
3
- *
4
- * Utility for loading and applying JSON schema validation in CLI
5
- */
6
-
7
- const fs = require('fs');
8
- const path = require('path');
9
-
10
- const {
11
- ValidationError,
12
- SecurityError,
13
- ConfigurationError
14
- } = require('../errors');
15
-
16
- /**
17
- * Loads JSON schema from file or string
18
- *
19
- * @param {string} schemaPathOrJson - Path to JSON file or JSON string
20
- * @returns {Object} Parsed JSON schema
21
- */
22
- function loadSchema(schemaPathOrJson) {
23
- if (!schemaPathOrJson || typeof schemaPathOrJson !== 'string') {
24
- throw new ValidationError('Schema must be a string (JSON or file path)');
25
- }
26
-
27
- let schemaString = schemaPathOrJson;
28
-
29
- // Check if it's a file path (ends with .json or contains path separators)
30
- const isFilePath = schemaPathOrJson.endsWith('.json') ||
31
- schemaPathOrJson.includes('/') ||
32
- schemaPathOrJson.includes('\\');
33
-
34
- if (isFilePath) {
35
- // Validate file path
36
- const safePath = path.resolve(schemaPathOrJson);
37
-
38
- // Prevent directory traversal
39
- const normalizedPath = path.normalize(schemaPathOrJson);
40
- if (normalizedPath.includes('..') ||
41
- /\\\.\.\\|\/\.\.\//.test(schemaPathOrJson) ||
42
- schemaPathOrJson.startsWith('..') ||
43
- schemaPathOrJson.includes('/..')) {
44
- throw new SecurityError('Directory traversal detected in schema file path');
45
- }
46
-
47
- // Check file exists and has .json extension
48
- if (!fs.existsSync(safePath)) {
49
- throw new ValidationError(`Schema file not found: ${schemaPathOrJson}`);
50
- }
51
-
52
- if (!safePath.toLowerCase().endsWith('.json')) {
53
- throw new ValidationError('Schema file must have .json extension');
54
- }
55
-
56
- try {
57
- schemaString = fs.readFileSync(safePath, 'utf8');
58
- } catch (error) {
59
- if (error.code === 'EACCES') {
60
- throw new SecurityError(`Permission denied reading schema file: ${schemaPathOrJson}`);
61
- }
62
- throw new ValidationError(`Failed to read schema file: ${error.message}`);
63
- }
64
- }
65
-
66
- // Parse JSON schema
67
- try {
68
- const schema = JSON.parse(schemaString);
69
-
70
- // Validate basic schema structure
71
- if (typeof schema !== 'object' || schema === null) {
72
- throw new ValidationError('Schema must be a JSON object');
73
- }
74
-
75
- return schema;
76
- } catch (error) {
77
- if (error instanceof SyntaxError) {
78
- throw new ValidationError(`Invalid JSON in schema: ${error.message}`);
79
- }
80
- throw new ValidationError(`Failed to parse schema: ${error.message}`);
81
- }
82
- }
83
-
84
- /**
85
- * Creates a validation hook for use with csvToJson/jsonToCsv hooks system
86
- *
87
- * @param {string|Object} schema - Schema object or path to schema file
88
- * @returns {Function} Validation hook function
89
- */
90
- function createValidationHook(schema) {
91
- let schemaObj;
92
-
93
- if (typeof schema === 'string') {
94
- // Load schema from file or JSON string
95
- schemaObj = loadSchema(schema);
96
- } else if (typeof schema === 'object' && schema !== null) {
97
- // Use provided schema object
98
- schemaObj = schema;
99
- } else {
100
- throw new ValidationError('Schema must be an object or a path to a JSON file');
101
- }
102
-
103
- // Try to use @jtcsv/validator if available
104
- let validator;
105
- try {
106
- const JtcsvValidator = require('../../packages/jtcsv-validator/src/index');
107
- validator = new JtcsvValidator();
108
-
109
- // Convert simple schema format to validator format
110
- if (schemaObj.fields) {
111
- // Assume it's already in validator format
112
- validator.schema(schemaObj.fields);
113
- } else {
114
- // Convert simple field definitions
115
- Object.entries(schemaObj).forEach(([field, rule]) => {
116
- if (typeof rule === 'object') {
117
- validator.field(field, rule);
118
- }
119
- });
120
- }
121
- } catch (error) {
122
- // Fallback to simple validation if validator is not available
123
- console.warn('@jtcsv/validator not available, using simple validation');
124
- validator = createSimpleValidator(schemaObj);
125
- }
126
-
127
- // Return a hook function compatible with hooks.perRow
128
- return function(row, index, context) {
129
- try {
130
- const result = validator.validate([row], {
131
- stopOnFirstError: true,
132
- transform: false
133
- });
134
-
135
- if (!result.valid && result.errors.length > 0) {
136
- const error = result.errors[0];
137
- throw new ValidationError(
138
- `Row ${index + 1}: ${error.message} (field: ${error.field})`
139
- );
140
- }
141
-
142
- return row;
143
- } catch (error) {
144
- if (error instanceof ValidationError) {
145
- throw error;
146
- }
147
- // Log error but don't crash - return original row
148
- console.error(`Validation error at row ${index}: ${error.message}`);
149
- if (process.env.NODE_ENV === 'development') {
150
- console.error(error.stack);
151
- }
152
- return row;
153
- }
154
- };
155
- }
156
-
157
- /**
158
- * Creates a simple validator for fallback when @jtcsv/validator is not available
159
- *
160
- * @private
161
- */
162
- function createSimpleValidator(schema) {
163
- return {
164
- validate(data, options = {}) {
165
- const errors = [];
166
- const warnings = [];
167
-
168
- if (!Array.isArray(data)) {
169
- return {
170
- valid: false,
171
- errors: [{ type: 'INVALID_DATA', message: 'Data must be an array' }],
172
- warnings: [],
173
- summary: {
174
- totalRows: 0,
175
- validRows: 0,
176
- errorCount: 1,
177
- warningCount: 0
178
- }
179
- };
180
- }
181
-
182
- for (let i = 0; i < data.length; i++) {
183
- const row = data[i];
184
-
185
- for (const [field, rule] of Object.entries(schema)) {
186
- const value = row[field];
187
-
188
- // Check required
189
- if (rule.required && (value === undefined || value === null || value === '')) {
190
- errors.push({
191
- row: i + 1,
192
- type: 'REQUIRED',
193
- field,
194
- message: `Field "${field}" is required`,
195
- value
196
- });
197
- continue;
198
- }
199
-
200
- // Skip further validation if value is empty and not required
201
- if (value === undefined || value === null || value === '') {
202
- continue;
203
- }
204
-
205
- // Check type
206
- if (rule.type) {
207
- const types = Array.isArray(rule.type) ? rule.type : [rule.type];
208
- let typeValid = false;
209
-
210
- for (const type of types) {
211
- if (checkType(value, type)) {
212
- typeValid = true;
213
- break;
214
- }
215
- }
216
-
217
- if (!typeValid) {
218
- errors.push({
219
- row: i + 1,
220
- type: 'TYPE',
221
- field,
222
- message: `Field "${field}" must be of type ${types.join(' or ')}`,
223
- value,
224
- expected: types
225
- });
226
- }
227
- }
228
-
229
- // Check min/max for strings
230
- if (rule.min !== undefined && typeof value === 'string' && value.length < rule.min) {
231
- errors.push({
232
- row: i + 1,
233
- type: 'MIN_LENGTH',
234
- field,
235
- message: `Field "${field}" must be at least ${rule.min} characters`,
236
- value,
237
- min: rule.min
238
- });
239
- }
240
-
241
- if (rule.max !== undefined && typeof value === 'string' && value.length > rule.max) {
242
- errors.push({
243
- row: i + 1,
244
- type: 'MAX_LENGTH',
245
- field,
246
- message: `Field "${field}" must be at most ${rule.max} characters`,
247
- value,
248
- max: rule.max
249
- });
250
- }
251
-
252
- // Check min/max for numbers
253
- if (rule.min !== undefined && typeof value === 'number' && value < rule.min) {
254
- errors.push({
255
- row: i + 1,
256
- type: 'MIN_VALUE',
257
- field,
258
- message: `Field "${field}" must be at least ${rule.min}`,
259
- value,
260
- min: rule.min
261
- });
262
- }
263
-
264
- if (rule.max !== undefined && typeof value === 'number' && value > rule.max) {
265
- errors.push({
266
- row: i + 1,
267
- type: 'MAX_VALUE',
268
- field,
269
- message: `Field "${field}" must be at most ${rule.max}`,
270
- value,
271
- max: rule.max
272
- });
273
- }
274
-
275
- // Check pattern
276
- if (rule.pattern && typeof value === 'string') {
277
- const pattern = rule.pattern instanceof RegExp ? rule.pattern : new RegExp(rule.pattern);
278
- if (!pattern.test(value)) {
279
- errors.push({
280
- row: i + 1,
281
- type: 'PATTERN',
282
- field,
283
- message: `Field "${field}" must match pattern`,
284
- value,
285
- pattern: pattern.toString()
286
- });
287
- }
288
- }
289
-
290
- // Check enum
291
- if (rule.enum && Array.isArray(rule.enum) && !rule.enum.includes(value)) {
292
- errors.push({
293
- row: i + 1,
294
- type: 'ENUM',
295
- field,
296
- message: `Field "${field}" must be one of: ${rule.enum.join(', ')}`,
297
- value,
298
- allowed: rule.enum
299
- });
300
- }
301
- }
302
- }
303
-
304
- return {
305
- valid: errors.length === 0,
306
- errors,
307
- warnings,
308
- summary: {
309
- totalRows: data.length,
310
- validRows: data.length - errors.length,
311
- errorCount: errors.length,
312
- warningCount: warnings.length
313
- }
314
- };
315
- }
316
- };
317
- }
318
-
319
- /**
320
- * Checks if value matches type
321
- *
322
- * @private
323
- */
324
- function checkType(value, type) {
325
- switch (type) {
326
- case 'string':
327
- return typeof value === 'string';
328
- case 'number':
329
- return typeof value === 'number' && !isNaN(value);
330
- case 'boolean':
331
- return typeof value === 'boolean';
332
- case 'integer':
333
- return Number.isInteger(value);
334
- case 'float':
335
- return typeof value === 'number' && !Number.isInteger(value);
336
- case 'date':
337
- return value instanceof Date && !isNaN(value);
338
- case 'array':
339
- return Array.isArray(value);
340
- case 'object':
341
- return typeof value === 'object' && value !== null && !Array.isArray(value);
342
- default:
343
- return false;
344
- }
345
- }
346
-
347
- /**
348
- * Applies schema validation to data array
349
- *
350
- * @param {Array} data - Array of data to validate
351
- * @param {string|Object} schema - Schema object or path to schema file
352
- * @returns {Object} Validation result
353
- */
354
- function applySchemaValidation(data, schema) {
355
- if (!Array.isArray(data)) {
356
- throw new ValidationError('Data must be an array');
357
- }
358
-
359
- const validationHook = createValidationHook(schema);
360
- const errors = [];
361
- const validatedData = [];
362
-
363
- for (let i = 0; i < data.length; i++) {
364
- try {
365
- const validatedRow = validationHook(data[i], i, { operation: 'validate' });
366
- validatedData.push(validatedRow);
367
- } catch (error) {
368
- if (error instanceof ValidationError) {
369
- errors.push({
370
- row: i + 1,
371
- message: error.message,
372
- data: data[i]
373
- });
374
- } else {
375
- // Skip rows with non-validation errors
376
- validatedData.push(data[i]);
377
- }
378
- }
379
- }
380
-
381
- return {
382
- valid: errors.length === 0,
383
- errors,
384
- data: validatedData,
385
- summary: {
386
- totalRows: data.length,
387
- validRows: validatedData.length,
388
- errorCount: errors.length,
389
- errorRate: data.length > 0 ? (errors.length / data.length) * 100 : 0
390
- }
391
- };
392
- }
393
-
394
- /**
395
- * Creates a TransformHooks instance with validation
396
- *
397
- * @param {string|Object} schema - Schema object or path to schema file
398
- * @returns {Object} TransformHooks instance
399
- */
400
- function createValidationHooks(schema) {
1
+ /**
2
+ * Schema Validator Utility
3
+ *
4
+ * Utility for loading and applying JSON schema validation in CLI
5
+ */
6
+
7
+ import * as fs from 'fs';
8
+ import * as fsPromises from 'fs/promises';
9
+ import * as path from 'path';
10
+ import {
11
+ ValidationError,
12
+ SecurityError,
13
+ ConfigurationError
14
+ } from '../errors';
15
+
16
+ export interface SchemaRule {
17
+ type?: string | string[];
18
+ required?: boolean;
19
+ min?: number;
20
+ max?: number;
21
+ pattern?: string | RegExp;
22
+ enum?: any[];
23
+ minLength?: number;
24
+ maxLength?: number;
25
+ minimum?: number;
26
+ maximum?: number;
27
+ exclusiveMinimum?: number;
28
+ exclusiveMaximum?: number;
29
+ multipleOf?: number;
30
+ minItems?: number;
31
+ maxItems?: number;
32
+ uniqueItems?: boolean;
33
+ items?: SchemaRule;
34
+ properties?: Record<string, SchemaRule>;
35
+ format?: string;
36
+ }
37
+
38
+ export interface Schema extends Record<string, any> {
39
+ properties?: Record<string, SchemaRule>;
40
+ required?: string[];
41
+ }
42
+
43
+ export interface ValidationErrorItem {
44
+ row: number;
45
+ type: string;
46
+ field: string;
47
+ message: string;
48
+ value?: any;
49
+ expected?: any;
50
+ min?: number;
51
+ max?: number;
52
+ pattern?: string;
53
+ allowed?: any[];
54
+ }
55
+
56
+ export interface ValidationResult {
57
+ valid: boolean;
58
+ errors: ValidationErrorItem[];
59
+ warnings: any[];
60
+ summary: {
61
+ totalRows: number;
62
+ validRows: number;
63
+ errorCount: number;
64
+ warningCount: number;
65
+ };
66
+ }
67
+
68
+ export interface ApplySchemaValidationResult {
69
+ valid: boolean;
70
+ errors: Array<{ row: number; message: string; data: any }>;
71
+ data: any[];
72
+ summary: {
73
+ totalRows: number;
74
+ validRows: number;
75
+ errorCount: number;
76
+ errorRate: number;
77
+ };
78
+ }
79
+
80
+ export interface Validator {
81
+ validate(data: any[], options?: { stopOnFirstError?: boolean; transform?: boolean }): ValidationResult;
82
+ schema?(schema: any): void;
83
+ field?(field: string, rule: any): void;
84
+ }
85
+
86
+ /**
87
+ * Loads JSON schema from file or string
88
+ *
89
+ * @param schemaPathOrJson - Path to JSON file or JSON string
90
+ * @returns Parsed JSON schema
91
+ */
92
+ export function loadSchema(schemaPathOrJson: string): Schema {
93
+ if (!schemaPathOrJson || typeof schemaPathOrJson !== 'string') {
94
+ throw new ValidationError('Schema must be a string (JSON or file path)');
95
+ }
96
+
97
+ let schemaString = schemaPathOrJson;
98
+
99
+ // Check if it's a file path (ends with .json or contains path separators)
100
+ const isFilePath = schemaPathOrJson.endsWith('.json') ||
101
+ schemaPathOrJson.includes('/') ||
102
+ schemaPathOrJson.includes('\\');
103
+
104
+ if (isFilePath) {
105
+ // Validate file path
106
+ const safePath = path.resolve(schemaPathOrJson);
107
+
108
+ // Prevent directory traversal
109
+ const normalizedPath = path.normalize(schemaPathOrJson);
110
+ if (normalizedPath.includes('..') ||
111
+ /\\\.\.\\|\/\.\.\//.test(schemaPathOrJson) ||
112
+ schemaPathOrJson.startsWith('..') ||
113
+ schemaPathOrJson.includes('/..')) {
114
+ throw new SecurityError('Directory traversal detected in schema file path');
115
+ }
116
+
117
+ // Check file exists and has .json extension
118
+ if (!fs.existsSync(safePath)) {
119
+ throw new ValidationError(`Schema file not found: ${schemaPathOrJson}`);
120
+ }
121
+
122
+ if (!safePath.toLowerCase().endsWith('.json')) {
123
+ throw new ValidationError('Schema file must have .json extension');
124
+ }
125
+
126
+ try {
127
+ schemaString = fs.readFileSync(safePath, 'utf8');
128
+ } catch (error: any) {
129
+ if (error.code === 'EACCES') {
130
+ throw new SecurityError(`Permission denied reading schema file: ${schemaPathOrJson}`);
131
+ }
132
+ throw new ValidationError(`Failed to read schema file: ${error.message}`);
133
+ }
134
+ }
135
+
136
+ // Parse JSON schema
137
+ try {
138
+ const schema = JSON.parse(schemaString) as Schema;
139
+
140
+ // Validate basic schema structure
141
+ if (typeof schema !== 'object' || schema === null) {
142
+ throw new ValidationError('Schema must be a JSON object');
143
+ }
144
+
145
+ return schema;
146
+ } catch (error: any) {
147
+ if (error instanceof SyntaxError) {
148
+ throw new ValidationError(`Invalid JSON in schema: ${error.message}`);
149
+ }
150
+ throw new ValidationError(`Failed to parse schema: ${error.message}`);
151
+ }
152
+ }
153
+
154
+ /**
155
+ * Creates a simple validator for fallback when @jtcsv/validator is not available
156
+ */
157
+ function createSimpleValidator(schema: Schema): Validator {
158
+ return {
159
+ validate(data: any[], options: { stopOnFirstError?: boolean; transform?: boolean } = {}): ValidationResult {
160
+ const errors: ValidationErrorItem[] = [];
161
+ const warnings: any[] = [];
162
+
163
+ if (!Array.isArray(data)) {
164
+ return {
165
+ valid: false,
166
+ errors: [{
167
+ row: 0,
168
+ type: 'INVALID_DATA',
169
+ field: '',
170
+ message: 'Data must be an array'
171
+ }],
172
+ warnings: [],
173
+ summary: {
174
+ totalRows: 0,
175
+ validRows: 0,
176
+ errorCount: 1,
177
+ warningCount: 0
178
+ }
179
+ };
180
+ }
181
+
182
+ for (let i = 0; i < data.length; i++) {
183
+ const row = data[i];
184
+
185
+ for (const [field, rule] of Object.entries(schema)) {
186
+ const value = row[field];
187
+
188
+ // Check required
189
+ if (rule.required && (value === undefined || value === null || value === '')) {
190
+ errors.push({
191
+ row: i + 1,
192
+ type: 'REQUIRED',
193
+ field,
194
+ message: `Field "${field}" is required`,
195
+ value
196
+ });
197
+ continue;
198
+ }
199
+
200
+ // Skip further validation if value is empty and not required
201
+ if (value === undefined || value === null || value === '') {
202
+ continue;
203
+ }
204
+
205
+ // Check type
206
+ if (rule.type) {
207
+ const types = Array.isArray(rule.type) ? rule.type : [rule.type];
208
+ let typeValid = false;
209
+
210
+ for (const type of types) {
211
+ if (checkType(value, type)) {
212
+ typeValid = true;
213
+ break;
214
+ }
215
+ }
216
+
217
+ if (!typeValid) {
218
+ errors.push({
219
+ row: i + 1,
220
+ type: 'TYPE',
221
+ field,
222
+ message: `Field "${field}" must be of type ${types.join(' or ')}`,
223
+ value,
224
+ expected: types
225
+ });
226
+ }
227
+ }
228
+
229
+ // Check min/max for strings
230
+ if (rule.min !== undefined && typeof value === 'string' && value.length < rule.min) {
231
+ errors.push({
232
+ row: i + 1,
233
+ type: 'MIN_LENGTH',
234
+ field,
235
+ message: `Field "${field}" must be at least ${rule.min} characters`,
236
+ value,
237
+ min: rule.min
238
+ });
239
+ }
240
+
241
+ if (rule.max !== undefined && typeof value === 'string' && value.length > rule.max) {
242
+ errors.push({
243
+ row: i + 1,
244
+ type: 'MAX_LENGTH',
245
+ field,
246
+ message: `Field "${field}" must be at most ${rule.max} characters`,
247
+ value,
248
+ max: rule.max
249
+ });
250
+ }
251
+
252
+ // Check min/max for numbers
253
+ if (rule.min !== undefined && typeof value === 'number' && value < rule.min) {
254
+ errors.push({
255
+ row: i + 1,
256
+ type: 'MIN_VALUE',
257
+ field,
258
+ message: `Field "${field}" must be at least ${rule.min}`,
259
+ value,
260
+ min: rule.min
261
+ });
262
+ }
263
+
264
+ if (rule.max !== undefined && typeof value === 'number' && value > rule.max) {
265
+ errors.push({
266
+ row: i + 1,
267
+ type: 'MAX_VALUE',
268
+ field,
269
+ message: `Field "${field}" must be at most ${rule.max}`,
270
+ value,
271
+ max: rule.max
272
+ });
273
+ }
274
+
275
+ // Check pattern
276
+ if (rule.pattern && typeof value === 'string') {
277
+ const pattern = rule.pattern instanceof RegExp ? rule.pattern : new RegExp(rule.pattern);
278
+ if (!pattern.test(value)) {
279
+ errors.push({
280
+ row: i + 1,
281
+ type: 'PATTERN',
282
+ field,
283
+ message: `Field "${field}" must match pattern`,
284
+ value,
285
+ pattern: pattern.toString()
286
+ });
287
+ }
288
+ }
289
+
290
+ // Check enum
291
+ if (rule.enum && Array.isArray(rule.enum) && !rule.enum.includes(value)) {
292
+ errors.push({
293
+ row: i + 1,
294
+ type: 'ENUM',
295
+ field,
296
+ message: `Field "${field}" must be one of: ${rule.enum.join(', ')}`,
297
+ value,
298
+ allowed: rule.enum
299
+ });
300
+ }
301
+ }
302
+ }
303
+
304
+ return {
305
+ valid: errors.length === 0,
306
+ errors,
307
+ warnings,
308
+ summary: {
309
+ totalRows: data.length,
310
+ validRows: data.length - errors.length,
311
+ errorCount: errors.length,
312
+ warningCount: warnings.length
313
+ }
314
+ };
315
+ }
316
+ };
317
+ }
318
+
319
+ /**
320
+ * Checks if value matches type
321
+ */
322
+ function checkType(value: any, type: string): boolean {
323
+ switch (type) {
324
+ case 'string':
325
+ return typeof value === 'string';
326
+ case 'number':
327
+ return typeof value === 'number' && !isNaN(value);
328
+ case 'boolean':
329
+ return typeof value === 'boolean';
330
+ case 'integer':
331
+ return Number.isInteger(value);
332
+ case 'float':
333
+ return typeof value === 'number' && !Number.isInteger(value);
334
+ case 'date':
335
+ return value instanceof Date && !isNaN(value.getTime());
336
+ case 'array':
337
+ return Array.isArray(value);
338
+ case 'object':
339
+ return typeof value === 'object' && value !== null && !Array.isArray(value);
340
+ default:
341
+ return false;
342
+ }
343
+ }
344
+
345
+ /**
346
+ * Creates a validation hook for use with csvToJson/jsonToCsv hooks system
347
+ *
348
+ * @param schema - Schema object or path to schema file
349
+ * @returns Validation hook function
350
+ */
351
+ export function createValidationHook(schema: string | Schema): (row: any, index: number, context: any) => any {
352
+ let schemaObj: Schema;
353
+
354
+ if (typeof schema === 'string') {
355
+ // Load schema from file or JSON string
356
+ schemaObj = loadSchema(schema);
357
+ } else if (typeof schema === 'object' && schema !== null) {
358
+ // Use provided schema object
359
+ schemaObj = schema;
360
+ } else {
361
+ throw new ValidationError('Schema must be an object or a path to a JSON file');
362
+ }
363
+
364
+ // Try to use @jtcsv/validator if available
365
+ let validator: Validator;
366
+ try {
367
+ const JtcsvValidator = require('../../packages/jtcsv-validator/src/index');
368
+ validator = new JtcsvValidator();
369
+
370
+ // Convert simple schema format to validator format
371
+ if ((schemaObj as any).fields) {
372
+ // Assume it's already in validator format
373
+ validator.schema!((schemaObj as any).fields);
374
+ } else {
375
+ // Convert simple field definitions
376
+ Object.entries(schemaObj).forEach(([field, rule]) => {
377
+ if (typeof rule === 'object') {
378
+ validator.field!(field, rule);
379
+ }
380
+ });
381
+ }
382
+ } catch (error) {
383
+ // Fallback to simple validation if validator is not available
384
+ console.warn('@jtcsv/validator not available, using simple validation');
385
+ validator = createSimpleValidator(schemaObj);
386
+ }
387
+
388
+ // Return a hook function compatible with hooks.perRow
389
+ return function (row: any, index: number, context: any): any {
390
+ try {
391
+ const result = validator.validate([row], {
392
+ stopOnFirstError: true,
393
+ transform: false
394
+ });
395
+
396
+ if (!result.valid && result.errors.length > 0) {
397
+ const error = result.errors[0];
398
+ throw new ValidationError(
399
+ `Row ${index + 1}: ${error.message} (field: ${error.field})`
400
+ );
401
+ }
402
+
403
+ return row;
404
+ } catch (error: any) {
405
+ if (error instanceof ValidationError) {
406
+ throw error;
407
+ }
408
+ // Log error but don't crash - return original row
409
+ console.error(`Validation error at row ${index}: ${error.message}`);
410
+ if (process.env['NODE_ENV'] === 'development') {
411
+ console.error(error.stack);
412
+ }
413
+ return row;
414
+ }
415
+ };
416
+ }
417
+
418
+ /**
419
+ * Applies schema validation to data array
420
+ *
421
+ * @param data - Array of data to validate
422
+ * @param schema - Schema object or path to schema file
423
+ * @returns Validation result
424
+ */
425
+ export function applySchemaValidation(data: any[], schema: string | Schema): ApplySchemaValidationResult {
426
+ if (!Array.isArray(data)) {
427
+ throw new ValidationError('Data must be an array');
428
+ }
429
+
430
+ const validationHook = createValidationHook(schema);
431
+ const errors: Array<{ row: number; message: string; data: any }> = [];
432
+ const validatedData: any[] = [];
433
+
434
+ for (let i = 0; i < data.length; i++) {
435
+ try {
436
+ const validatedRow = validationHook(data[i], i, { operation: 'validate' });
437
+ validatedData.push(validatedRow);
438
+ } catch (error: any) {
439
+ if (error instanceof ValidationError) {
440
+ errors.push({
441
+ row: i + 1,
442
+ message: error.message,
443
+ data: data[i]
444
+ });
445
+ } else {
446
+ // Skip rows with non-validation errors
447
+ validatedData.push(data[i]);
448
+ }
449
+ }
450
+ }
451
+
452
+ return {
453
+ valid: errors.length === 0,
454
+ errors,
455
+ data: validatedData,
456
+ summary: {
457
+ totalRows: data.length,
458
+ validRows: validatedData.length,
459
+ errorCount: errors.length,
460
+ errorRate: data.length > 0 ? (errors.length / data.length) * 100 : 0
461
+ }
462
+ };
463
+ }
464
+
465
+ /**
466
+ * Creates a TransformHooks instance with validation
467
+ *
468
+ * @param schema - Schema object or path to schema file
469
+ * @returns TransformHooks instance
470
+ */
471
+ export function createValidationHooks(schema: Schema): any {
401
472
  const { TransformHooks } = require('../core/transform-hooks');
402
473
  const hooks = new TransformHooks();
403
474
 
404
475
  const validationHook = createValidationHook(schema);
405
- hooks.perRow(validationHook);
406
-
407
- return hooks;
408
- }
409
-
410
- /**
411
- * Creates schema validators from JSON schema
412
- *
413
- * @param {Object} schema - JSON schema
414
- * @returns {Object} Validators object
415
- */
416
- function createSchemaValidators(schema) {
417
- const validators = {};
418
-
419
- // Handle both JSON Schema format and simple format
420
- const properties = schema.properties || schema;
421
- const requiredFields = schema.required || [];
422
-
423
- if (!properties || typeof properties !== 'object') {
424
- return validators;
425
- }
426
-
427
- for (const [key, definition] of Object.entries(properties)) {
428
- const validator = {
429
- type: definition.type,
430
- required: requiredFields.includes(key)
431
- };
432
-
433
- // Add format function for dates and other formats
434
- if (definition.type === 'string' && definition.format) {
435
- validator.format = (value) => {
436
- // Handle date-time format
437
- if (definition.format === 'date-time') {
438
- if (value instanceof Date) {
439
- return value.toISOString();
440
- }
441
- /* istanbul ignore next */
442
- if (typeof value === 'string') {
443
- // Try to parse as date
444
- const date = new Date(value);
445
- if (!isNaN(date.getTime())) {
446
- return date.toISOString();
447
- }
448
- }
449
- }
450
- // Handle email format
451
- if (definition.format === 'email') {
452
- if (typeof value === 'string') {
453
- return value.toLowerCase().trim();
454
- }
455
- }
456
- // Handle uri format
457
- if (definition.format === 'uri') {
458
- if (typeof value === 'string') {
459
- return value.trim();
460
- }
461
- }
462
- return value;
463
- };
464
- }
465
-
466
- // Add validation function
467
- validator.validate = (value) => {
468
- if (value === null || value === undefined) {
469
- return !validator.required;
470
- }
471
-
472
- // Type validation
473
- if (definition.type === 'string' && typeof value !== 'string') {
474
- // For date-time format, also accept Date objects
475
- if (definition.format === 'date-time' && value instanceof Date) {
476
- return true;
477
- }
478
- return false;
479
- }
480
- if (definition.type === 'number' && typeof value !== 'number') {
481
- return false;
482
- }
483
- if (definition.type === 'integer' && (!Number.isInteger(value) || typeof value !== 'number')) {
484
- return false;
485
- }
486
- if (definition.type === 'boolean' && typeof value !== 'boolean') {
487
- return false;
488
- }
489
- if (definition.type === 'array' && !Array.isArray(value)) {
490
- return false;
491
- }
492
- if (definition.type === 'object' && (typeof value !== 'object' || value === null || Array.isArray(value))) {
493
- return false;
494
- }
495
-
496
- // Additional constraints for strings
497
- if (definition.type === 'string') {
498
- if (definition.minLength !== undefined && value.length < definition.minLength) {
499
- return false;
500
- }
501
- if (definition.maxLength !== undefined && value.length > definition.maxLength) {
502
- return false;
503
- }
504
- if (definition.pattern && !new RegExp(definition.pattern).test(value)) {
505
- return false;
506
- }
507
- if (definition.format === 'email' && !/^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(value)) {
508
- return false;
509
- }
510
- if (definition.format === 'uri') {
511
- try {
512
- new URL(value);
513
- } catch {
514
- return false;
515
- }
516
- }
517
- }
518
-
519
- // Additional constraints for numbers
520
- if (definition.type === 'number' || definition.type === 'integer') {
521
- if (definition.minimum !== undefined && value < definition.minimum) {
522
- return false;
523
- }
524
- if (definition.maximum !== undefined && value > definition.maximum) {
525
- return false;
526
- }
527
- if (definition.exclusiveMinimum !== undefined && value <= definition.exclusiveMinimum) {
528
- return false;
529
- }
530
- if (definition.exclusiveMaximum !== undefined && value >= definition.exclusiveMaximum) {
531
- return false;
532
- }
533
- if (definition.multipleOf !== undefined && value % definition.multipleOf !== 0) {
534
- return false;
535
- }
536
- }
537
-
538
- // Additional constraints for arrays
539
- if (definition.type === 'array') {
540
- if (definition.minItems !== undefined && value.length < definition.minItems) {
541
- return false;
542
- }
543
- if (definition.maxItems !== undefined && value.length > definition.maxItems) {
544
- return false;
545
- }
546
- if (definition.uniqueItems && new Set(value).size !== value.length) {
547
- return false;
548
- }
549
- // Validate array items if schema is provided
550
- if (definition.items) {
551
- for (const item of value) {
552
- const itemValidator = createSchemaValidators({ properties: { item: definition.items } });
553
- if (itemValidator.item && !itemValidator.item.validate(item)) {
554
- return false;
555
- }
556
- }
557
- }
558
- }
559
-
560
- // Additional constraints for objects
561
- if (definition.type === 'object' && definition.properties) {
562
- const nestedValidators = createSchemaValidators(definition);
563
- for (const [nestedKey, nestedValidator] of Object.entries(nestedValidators)) {
564
- if (value[nestedKey] !== undefined && !nestedValidator.validate(value[nestedKey])) {
565
- return false;
566
- }
567
- if (nestedValidator.required && value[nestedKey] === undefined) {
568
- return false;
569
- }
570
- }
571
- }
572
-
573
- // Check enum
574
- if (definition.enum && !definition.enum.includes(value)) {
575
- return false;
576
- }
577
-
578
- return true;
579
- };
580
-
581
- validators[key] = validator;
582
- }
583
-
584
- return validators;
585
- }
586
-
587
- module.exports = {
588
- loadSchema,
589
- createValidationHook,
590
- applySchemaValidation,
591
- createValidationHooks,
592
- checkType,
593
- createSchemaValidators // Add this line
476
+ hooks.perRow(validationHook);
477
+
478
+ return hooks;
479
+ }
480
+
481
+ /**
482
+ * Creates schema validators from JSON schema
483
+ *
484
+ * @param schema - JSON schema
485
+ * @returns Validators object
486
+ */
487
+ export function createSchemaValidators(schema: Schema): Record<string, any> {
488
+ const validators: Record<string, any> = {};
489
+
490
+ // Handle both JSON Schema format and simple format
491
+ const properties = schema.properties || schema;
492
+ const requiredFields = schema.required || [];
493
+
494
+ if (!properties || typeof properties !== 'object') {
495
+ return validators;
496
+ }
497
+
498
+ for (const [key, definition] of Object.entries(properties)) {
499
+ const validator: any = {
500
+ type: definition.type,
501
+ required: requiredFields.includes(key)
502
+ };
503
+
504
+ // Add format function for dates and other formats
505
+ if (definition.type === 'string' && definition.format) {
506
+ validator.format = (value: any) => {
507
+ // Handle date-time format
508
+ if (definition.format === 'date-time') {
509
+ if (value instanceof Date) {
510
+ return value.toISOString();
511
+ }
512
+ /* istanbul ignore next */
513
+ if (typeof value === 'string') {
514
+ // Try to parse as date
515
+ const date = new Date(value);
516
+ if (!isNaN(date.getTime())) {
517
+ return date.toISOString();
518
+ }
519
+ }
520
+ }
521
+ // Handle email format
522
+ if (definition.format === 'email') {
523
+ if (typeof value === 'string') {
524
+ return value.toLowerCase().trim();
525
+ }
526
+ }
527
+ // Handle uri format
528
+ if (definition.format === 'uri') {
529
+ if (typeof value === 'string') {
530
+ return value.trim();
531
+ }
532
+ }
533
+ return value;
534
+ };
535
+ }
536
+
537
+ // Add validation function
538
+ validator.validate = (value: any) => {
539
+ if (value === null || value === undefined) {
540
+ return !validator.required;
541
+ }
542
+
543
+ // Type validation
544
+ if (definition.type === 'string' && typeof value !== 'string') {
545
+ // For date-time format, also accept Date objects
546
+ if (definition.format === 'date-time' && value instanceof Date) {
547
+ return true;
548
+ }
549
+ return false;
550
+ }
551
+ if (definition.type === 'number' && typeof value !== 'number') {
552
+ return false;
553
+ }
554
+ if (definition.type === 'integer' && (!Number.isInteger(value) || typeof value !== 'number')) {
555
+ return false;
556
+ }
557
+ if (definition.type === 'boolean' && typeof value !== 'boolean') {
558
+ return false;
559
+ }
560
+ if (definition.type === 'array' && !Array.isArray(value)) {
561
+ return false;
562
+ }
563
+ if (definition.type === 'object' && (typeof value !== 'object' || value === null || Array.isArray(value))) {
564
+ return false;
565
+ }
566
+
567
+ // Additional constraints for strings
568
+ if (definition.type === 'string') {
569
+ if (definition.minLength !== undefined && value.length < definition.minLength) {
570
+ return false;
571
+ }
572
+ if (definition.maxLength !== undefined && value.length > definition.maxLength) {
573
+ return false;
574
+ }
575
+ if (definition.pattern && !new RegExp(definition.pattern).test(value)) {
576
+ return false;
577
+ }
578
+ if (definition.format === 'email' && !/^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(value)) {
579
+ return false;
580
+ }
581
+ if (definition.format === 'uri') {
582
+ try {
583
+ new URL(value);
584
+ } catch {
585
+ return false;
586
+ }
587
+ }
588
+ }
589
+
590
+ // Additional constraints for numbers
591
+ if (definition.type === 'number' || definition.type === 'integer') {
592
+ if (definition.minimum !== undefined && value < definition.minimum) {
593
+ return false;
594
+ }
595
+ if (definition.maximum !== undefined && value > definition.maximum) {
596
+ return false;
597
+ }
598
+ if (definition.exclusiveMinimum !== undefined && value <= definition.exclusiveMinimum) {
599
+ return false;
600
+ }
601
+ if (definition.exclusiveMaximum !== undefined && value >= definition.exclusiveMaximum) {
602
+ return false;
603
+ }
604
+ if (definition.multipleOf !== undefined && value % definition.multipleOf !== 0) {
605
+ return false;
606
+ }
607
+ }
608
+
609
+ // Additional constraints for arrays
610
+ if (definition.type === 'array') {
611
+ if (definition.minItems !== undefined && value.length < definition.minItems) {
612
+ return false;
613
+ }
614
+ if (definition.maxItems !== undefined && value.length > definition.maxItems) {
615
+ return false;
616
+ }
617
+ if (definition.uniqueItems && new Set(value).size !== value.length) {
618
+ return false;
619
+ }
620
+ // Validate array items if schema is provided
621
+ if (definition.items) {
622
+ for (const item of value) {
623
+ const itemValidator = createSchemaValidators({ properties: { item: definition.items } });
624
+ if (itemValidator.item && !itemValidator.item.validate(item)) {
625
+ return false;
626
+ }
627
+ }
628
+ }
629
+ }
630
+
631
+ // Additional constraints for objects
632
+ if (definition.type === 'object' && definition.properties) {
633
+ const nestedValidators = createSchemaValidators(definition);
634
+ for (const [nestedKey, nestedValidator] of Object.entries(nestedValidators)) {
635
+ if (value[nestedKey] !== undefined && !nestedValidator.validate(value[nestedKey])) {
636
+ return false;
637
+ }
638
+ if (nestedValidator.required && value[nestedKey] === undefined) {
639
+ return false;
640
+ }
641
+ }
642
+ }
643
+
644
+ // Check enum
645
+ if (definition.enum && !definition.enum.includes(value)) {
646
+ return false;
647
+ }
648
+
649
+ return true;
650
+ };
651
+
652
+ validators[key] = validator;
653
+ }
654
+
655
+ return validators;
656
+ }
657
+
658
+ /**
659
+ * Async version of loadSchema that reads file asynchronously
660
+ *
661
+ * @param schemaPathOrJson - Path to JSON file or JSON string
662
+ * @returns Promise with parsed JSON schema
663
+ */
664
+ export async function loadSchemaAsync(schemaPathOrJson: string): Promise<Schema> {
665
+ if (!schemaPathOrJson || typeof schemaPathOrJson !== 'string') {
666
+ throw new ValidationError('Schema must be a string (JSON or file path)');
667
+ }
668
+
669
+ let schemaString = schemaPathOrJson;
670
+
671
+ // Check if it's a file path (ends with .json or contains path separators)
672
+ const isFilePath = schemaPathOrJson.endsWith('.json') ||
673
+ schemaPathOrJson.includes('/') ||
674
+ schemaPathOrJson.includes('\\');
675
+
676
+ if (isFilePath) {
677
+ // Validate file path
678
+ const safePath = path.resolve(schemaPathOrJson);
679
+
680
+ // Prevent directory traversal
681
+ const normalizedPath = path.normalize(schemaPathOrJson);
682
+ if (normalizedPath.includes('..') ||
683
+ /\\\.\.\\|\/\.\.\//.test(schemaPathOrJson) ||
684
+ schemaPathOrJson.startsWith('..') ||
685
+ schemaPathOrJson.includes('/..')) {
686
+ throw new SecurityError('Directory traversal detected in schema file path');
687
+ }
688
+
689
+ // Check file exists and has .json extension
690
+ try {
691
+ await fsPromises.access(safePath);
692
+ } catch {
693
+ throw new ValidationError(`Schema file not found: ${schemaPathOrJson}`);
694
+ }
695
+
696
+ if (!safePath.toLowerCase().endsWith('.json')) {
697
+ throw new ValidationError('Schema file must have .json extension');
698
+ }
699
+
700
+ try {
701
+ schemaString = await fsPromises.readFile(safePath, 'utf8');
702
+ } catch (error: any) {
703
+ if (error.code === 'EACCES') {
704
+ throw new SecurityError(`Permission denied reading schema file: ${schemaPathOrJson}`);
705
+ }
706
+ throw new ValidationError(`Failed to read schema file: ${error.message}`);
707
+ }
708
+ }
709
+
710
+ // Parse JSON schema
711
+ try {
712
+ const schema = JSON.parse(schemaString) as Schema;
713
+
714
+ // Validate basic schema structure
715
+ if (typeof schema !== 'object' || schema === null) {
716
+ throw new ValidationError('Schema must be a JSON object');
717
+ }
718
+
719
+ return schema;
720
+ } catch (error: any) {
721
+ if (error instanceof SyntaxError) {
722
+ throw new ValidationError(`Invalid JSON in schema: ${error.message}`);
723
+ }
724
+ throw new ValidationError(`Failed to parse schema: ${error.message}`);
725
+ }
726
+ }
727
+
728
+ /**
729
+ * Async version of applySchemaValidation that uses worker threads for parallel validation
730
+ *
731
+ * @param data - Array of data to validate
732
+ * @param schema - Schema object or path to schema file
733
+ * @returns Promise with validation result
734
+ */
735
+ export async function applySchemaValidationAsync(
736
+ data: any[],
737
+ schema: string | Schema
738
+ ): Promise<ApplySchemaValidationResult> {
739
+ if (!Array.isArray(data)) {
740
+ throw new ValidationError('Data must be an array');
741
+ }
742
+
743
+ const schemaObj = typeof schema === 'string' ? await loadSchemaAsync(schema) : schema;
744
+ const validationHook = createValidationHook(schemaObj);
745
+
746
+ // Use worker pool for parallel validation if data is large
747
+ if (data.length > 1000) {
748
+ const { createWorkerPool } = require('../workers/worker-pool');
749
+ const pool = createWorkerPool({
750
+ workerCount: Math.min(4, require('os').cpus().length),
751
+ workerScript: require.resolve('./validation-worker.js')
752
+ });
753
+
754
+ try {
755
+ const validationPromises = data.map((row, index) =>
756
+ pool.execute({ row, index, schema: schemaObj, operation: 'validate' })
757
+ );
758
+
759
+ const results = await Promise.all(validationPromises);
760
+ const errors: Array<{ row: number; message: string; data: any }> = [];
761
+ const validatedData: any[] = [];
762
+
763
+ results.forEach((result, index) => {
764
+ if (result.error) {
765
+ errors.push({
766
+ row: index + 1,
767
+ message: result.error.message,
768
+ data: data[index]
769
+ });
770
+ } else {
771
+ validatedData.push(result.validatedRow);
772
+ }
773
+ });
774
+
775
+ return {
776
+ valid: errors.length === 0,
777
+ errors,
778
+ data: validatedData,
779
+ summary: {
780
+ totalRows: data.length,
781
+ validRows: validatedData.length,
782
+ errorCount: errors.length,
783
+ errorRate: data.length > 0 ? (errors.length / data.length) * 100 : 0
784
+ }
785
+ };
786
+ } finally {
787
+ await pool.terminate();
788
+ }
789
+ }
790
+
791
+ // For small datasets, validate synchronously
792
+ return applySchemaValidation(data, schemaObj);
793
+ }
794
+
795
+ /**
796
+ * Creates an async validation hook that can be used with async hooks
797
+ *
798
+ * @param schema - Schema object or path to schema file
799
+ * @returns Async validation hook function
800
+ */
801
+ export function createAsyncValidationHook(schema: string | Schema): (row: any, index: number, context: any) => Promise<unknown> {
802
+ const syncHook = createValidationHook(schema);
803
+
804
+ return async function (row: any, index: number, context: any): Promise<unknown> {
805
+ return Promise.resolve(syncHook(row, index, context));
806
+ };
807
+ }
808
+
809
+ export default {
810
+ loadSchema,
811
+ loadSchemaAsync,
812
+ createValidationHook,
813
+ createAsyncValidationHook,
814
+ applySchemaValidation,
815
+ applySchemaValidationAsync,
816
+ createValidationHooks,
817
+ checkType,
818
+ createSchemaValidators
594
819
  };