jtcsv 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,532 @@
1
+ /**
2
+ * Stream JSON to CSV Converter - Node.js Module
3
+ *
4
+ * A streaming implementation for converting JSON data to CSV format
5
+ * with memory-efficient processing for large files.
6
+ *
7
+ * @module stream-json-to-csv
8
+ */
9
+
10
+ const {
11
+ ValidationError,
12
+ SecurityError,
13
+ LimitError,
14
+ ConfigurationError,
15
+ safeExecute
16
+ } = require('./errors');
17
+
18
+ const { Transform, Readable, Writable } = require('stream');
19
+ const { pipeline } = require('stream/promises');
20
+
21
+ /**
22
+ * Creates a transform stream that converts JSON objects to CSV rows
23
+ *
24
+ * @param {Object} options - Configuration options
25
+ * @param {string} [options.delimiter=';'] - CSV delimiter character
26
+ * @param {boolean} [options.includeHeaders=true] - Whether to include headers row
27
+ * @param {Object} [options.renameMap={}] - Map for renaming column headers (oldKey: newKey)
28
+ * @param {Object} [options.template={}] - Template object to ensure consistent column order
29
+ * @param {number} [options.maxRecords=Infinity] - Maximum number of records to process
30
+ * @param {Function} [options.transform] - Custom transform function for each row
31
+ * @param {Object} [options.schema] - JSON schema for validation and formatting
32
+ * @returns {Transform} Transform stream
33
+ *
34
+ * @example
35
+ * const { createJsonToCsvStream } = require('./stream-json-to-csv');
36
+ *
37
+ * const transformStream = createJsonToCsvStream({
38
+ * delimiter: ',',
39
+ * renameMap: { id: 'ID', name: 'Full Name' }
40
+ * });
41
+ *
42
+ * // Pipe JSON objects to CSV
43
+ * jsonReadableStream.pipe(transformStream).pipe(csvWritableStream);
44
+ */
45
+ function createJsonToCsvStream(options = {}) {
46
+ return safeExecute(() => {
47
+ const opts = options && typeof options === 'object' ? options : {};
48
+
49
+ const {
50
+ delimiter = ';',
51
+ includeHeaders = true,
52
+ renameMap = {},
53
+ template = {},
54
+ maxRecords = Infinity,
55
+ transform = null,
56
+ schema = null
57
+ } = opts;
58
+
59
+ // Validate options
60
+ if (delimiter && typeof delimiter !== 'string') {
61
+ throw new ConfigurationError('Delimiter must be a string');
62
+ }
63
+
64
+ if (delimiter && delimiter.length !== 1) {
65
+ throw new ConfigurationError('Delimiter must be a single character');
66
+ }
67
+
68
+ if (renameMap && typeof renameMap !== 'object') {
69
+ throw new ConfigurationError('renameMap must be an object');
70
+ }
71
+
72
+ if (template && typeof template !== 'object') {
73
+ throw new ConfigurationError('template must be an object');
74
+ }
75
+
76
+ if (maxRecords !== Infinity && (typeof maxRecords !== 'number' || maxRecords <= 0)) {
77
+ throw new ConfigurationError('maxRecords must be a positive number or Infinity');
78
+ }
79
+
80
+ if (transform && typeof transform !== 'function') {
81
+ throw new ConfigurationError('transform must be a function');
82
+ }
83
+
84
+ if (schema && typeof schema !== 'object') {
85
+ throw new ConfigurationError('schema must be an object');
86
+ }
87
+
88
+ let headers = null;
89
+ let headersWritten = false;
90
+ let recordCount = 0;
91
+ let reverseRenameMap = {};
92
+ let finalHeaders = [];
93
+ let schemaValidators = null;
94
+
95
+ // Initialize schema validators if schema is provided
96
+ if (schema) {
97
+ schemaValidators = createSchemaValidators(schema);
98
+ }
99
+
100
+ /**
101
+ * Escapes a value for CSV format with CSV injection protection
102
+ *
103
+ * @private
104
+ * @param {*} value - The value to escape
105
+ * @returns {string} Escaped CSV value
106
+ */
107
+ const escapeValue = (value) => {
108
+ if (value === null || value === undefined || value === '') {
109
+ return '';
110
+ }
111
+
112
+ const stringValue = String(value);
113
+
114
+ // CSV Injection protection - escape formulas
115
+ let escapedValue = stringValue;
116
+ if (/^[=+\-@]/.test(stringValue)) {
117
+ // Prepend single quote to prevent formula execution in Excel
118
+ escapedValue = "'" + stringValue;
119
+ }
120
+
121
+ // Check if value needs escaping (contains delimiter, quotes, or newlines)
122
+ if (
123
+ escapedValue.includes(delimiter) ||
124
+ escapedValue.includes('"') ||
125
+ escapedValue.includes('\n') ||
126
+ escapedValue.includes('\r')
127
+ ) {
128
+ // Escape double quotes by doubling them
129
+ return `"${escapedValue.replace(/"/g, '""')}"`;
130
+ }
131
+
132
+ return escapedValue;
133
+ };
134
+
135
+ /**
136
+ * Formats value based on schema
137
+ *
138
+ * @private
139
+ * @param {*} value - The value to format
140
+ * @param {string} key - The key/field name
141
+ * @returns {*} Formatted value
142
+ */
143
+ const formatValue = (value, key) => {
144
+ if (!schemaValidators || !schemaValidators[key]) {
145
+ return value;
146
+ }
147
+
148
+ const validator = schemaValidators[key];
149
+
150
+ // Apply formatting if available
151
+ if (validator.format) {
152
+ return validator.format(value);
153
+ }
154
+
155
+ return value;
156
+ };
157
+
158
+ /**
159
+ * Validates value against schema
160
+ *
161
+ * @private
162
+ * @param {*} value - The value to validate
163
+ * @param {string} key - The key/field name
164
+ * @returns {boolean} True if valid
165
+ */
166
+ const validateValue = (value, key) => {
167
+ if (!schemaValidators || !schemaValidators[key]) {
168
+ return true;
169
+ }
170
+
171
+ const validator = schemaValidators[key];
172
+
173
+ // Apply validation if available
174
+ if (validator.validate) {
175
+ return validator.validate(value);
176
+ }
177
+
178
+ return true;
179
+ };
180
+
181
+ return new Transform({
182
+ objectMode: true,
183
+
184
+ transform(chunk, encoding, callback) {
185
+ try {
186
+ // Check record limit
187
+ if (recordCount >= maxRecords) {
188
+ return callback(new LimitError(
189
+ `Data size exceeds maximum limit of ${maxRecords} records`,
190
+ maxRecords,
191
+ recordCount
192
+ ));
193
+ }
194
+
195
+ // Validate chunk is an object
196
+ if (!chunk || typeof chunk !== 'object') {
197
+ return callback(new ValidationError('Input data must be objects'));
198
+ }
199
+
200
+ // Apply custom transform if provided
201
+ let item = chunk;
202
+ if (transform) {
203
+ try {
204
+ item = transform(chunk);
205
+ if (!item || typeof item !== 'object') {
206
+ return callback(new ValidationError('Transform function must return an object'));
207
+ }
208
+ } catch (error) {
209
+ return callback(new ValidationError(`Transform function error: ${error.message}`));
210
+ }
211
+ }
212
+
213
+ // Initialize headers on first record
214
+ if (!headers) {
215
+ const allKeys = new Set();
216
+ Object.keys(item).forEach(key => allKeys.add(key));
217
+ const originalKeys = Array.from(allKeys);
218
+
219
+ // Apply rename map to create header names
220
+ headers = originalKeys.map(key => renameMap[key] || key);
221
+
222
+ // Create reverse mapping
223
+ reverseRenameMap = {};
224
+ originalKeys.forEach((key, index) => {
225
+ reverseRenameMap[headers[index]] = key;
226
+ });
227
+
228
+ // Apply template ordering if provided
229
+ finalHeaders = headers;
230
+ if (Object.keys(template).length > 0) {
231
+ const templateHeaders = Object.keys(template).map(key => renameMap[key] || key);
232
+ const extraHeaders = headers.filter(h => !templateHeaders.includes(h));
233
+ finalHeaders = [...templateHeaders, ...extraHeaders];
234
+ }
235
+
236
+ // Write headers if requested
237
+ if (includeHeaders && finalHeaders.length > 0 && !headersWritten) {
238
+ this.push(finalHeaders.join(delimiter) + '\n');
239
+ headersWritten = true;
240
+ }
241
+ }
242
+
243
+ // Build CSV row
244
+ const rowValues = finalHeaders.map(header => {
245
+ // Get the original key for this header
246
+ const originalKey = reverseRenameMap[header] || header;
247
+ let value = item[originalKey];
248
+
249
+ // Format value based on schema
250
+ value = formatValue(value, originalKey);
251
+
252
+ // Validate value against schema
253
+ if (!validateValue(value, originalKey)) {
254
+ throw new ValidationError(`Invalid value for field '${originalKey}': ${value}`);
255
+ }
256
+
257
+ return escapeValue(value);
258
+ });
259
+
260
+ const row = rowValues.join(delimiter) + '\n';
261
+ this.push(row);
262
+ recordCount++;
263
+
264
+ callback();
265
+ } catch (error) {
266
+ callback(error);
267
+ }
268
+ },
269
+
270
+ flush(callback) {
271
+ // If no data was processed but headers were requested, write empty headers
272
+ if (includeHeaders && !headersWritten) {
273
+ if (Object.keys(template).length > 0) {
274
+ const templateHeaders = Object.keys(template).map(key => renameMap[key] || key);
275
+ if (templateHeaders.length > 0) {
276
+ this.push(templateHeaders.join(delimiter) + '\n');
277
+ }
278
+ }
279
+ }
280
+ callback();
281
+ }
282
+ });
283
+ }, 'STREAM_CREATION_ERROR', { function: 'createJsonToCsvStream' });
284
+ }
285
+
286
+ /**
287
+ * Creates schema validators from JSON schema
288
+ *
289
+ * @private
290
+ * @param {Object} schema - JSON schema
291
+ * @returns {Object} Validators object
292
+ */
293
+ function createSchemaValidators(schema) {
294
+ const validators = {};
295
+
296
+ if (!schema.properties) {
297
+ return validators;
298
+ }
299
+
300
+ for (const [key, definition] of Object.entries(schema.properties)) {
301
+ const validator = {
302
+ type: definition.type,
303
+ required: schema.required && schema.required.includes(key)
304
+ };
305
+
306
+ // Add format function for dates
307
+ if (definition.type === 'string' && definition.format === 'date-time') {
308
+ validator.format = (value) => {
309
+ if (value instanceof Date) {
310
+ return value.toISOString();
311
+ }
312
+ if (typeof value === 'string') {
313
+ // Try to parse as date
314
+ const date = new Date(value);
315
+ if (!isNaN(date.getTime())) {
316
+ return date.toISOString();
317
+ }
318
+ }
319
+ return value;
320
+ };
321
+ }
322
+
323
+ // Add validation function
324
+ validator.validate = (value) => {
325
+ if (value === null || value === undefined) {
326
+ return !validator.required;
327
+ }
328
+
329
+ // Type validation
330
+ if (definition.type === 'string' && typeof value !== 'string') {
331
+ return false;
332
+ }
333
+ if (definition.type === 'number' && typeof value !== 'number') {
334
+ return false;
335
+ }
336
+ if (definition.type === 'integer' && (!Number.isInteger(value) || typeof value !== 'number')) {
337
+ return false;
338
+ }
339
+ if (definition.type === 'boolean' && typeof value !== 'boolean') {
340
+ return false;
341
+ }
342
+
343
+ // Additional constraints
344
+ if (definition.minimum !== undefined && value < definition.minimum) {
345
+ return false;
346
+ }
347
+ if (definition.maximum !== undefined && value > definition.maximum) {
348
+ return false;
349
+ }
350
+ if (definition.minLength !== undefined && value.length < definition.minLength) {
351
+ return false;
352
+ }
353
+ if (definition.maxLength !== undefined && value.length > definition.maxLength) {
354
+ return false;
355
+ }
356
+ if (definition.pattern && !new RegExp(definition.pattern).test(value)) {
357
+ return false;
358
+ }
359
+
360
+ return true;
361
+ };
362
+
363
+ validators[key] = validator;
364
+ }
365
+
366
+ return validators;
367
+ }
368
+
369
+ /**
370
+ * Converts a readable stream of JSON objects to CSV and writes to a writable stream
371
+ *
372
+ * @param {Readable} inputStream - Readable stream of JSON objects
373
+ * @param {Writable} outputStream - Writable stream for CSV output
374
+ * @param {Object} options - Configuration options (same as createJsonToCsvStream)
375
+ * @returns {Promise<void>}
376
+ *
377
+ * @example
378
+ * const { streamJsonToCsv } = require('./stream-json-to-csv');
379
+ *
380
+ * await streamJsonToCsv(jsonStream, csvStream, {
381
+ * delimiter: ',',
382
+ * schema: {
383
+ * properties: {
384
+ * id: { type: 'integer' },
385
+ * name: { type: 'string', minLength: 1 },
386
+ * date: { type: 'string', format: 'date-time' }
387
+ * }
388
+ * }
389
+ * });
390
+ */
391
+ async function streamJsonToCsv(inputStream, outputStream, options = {}) {
392
+ return safeExecute(async () => {
393
+ const transformStream = createJsonToCsvStream(options);
394
+
395
+ await pipeline(
396
+ inputStream,
397
+ transformStream,
398
+ outputStream
399
+ );
400
+ }, 'STREAM_PROCESSING_ERROR', { function: 'streamJsonToCsv' });
401
+ }
402
+
403
+ /**
404
+ * Converts JSON to CSV and saves it to a file using streaming
405
+ *
406
+ * @param {Readable} inputStream - Readable stream of JSON objects
407
+ * @param {string} filePath - Path to save the CSV file
408
+ * @param {Object} options - Configuration options (same as createJsonToCsvStream)
409
+ * @returns {Promise<void>}
410
+ *
411
+ * @example
412
+ * const { saveJsonStreamAsCsv } = require('./stream-json-to-csv');
413
+ *
414
+ * await saveJsonStreamAsCsv(jsonStream, './output.csv', {
415
+ * delimiter: ',',
416
+ * includeHeaders: true
417
+ * });
418
+ */
419
+ async function saveJsonStreamAsCsv(inputStream, filePath, options = {}) {
420
+ return safeExecute(async () => {
421
+ const fs = require('fs');
422
+ const path = require('path');
423
+
424
+ // Validate file path
425
+ if (typeof filePath !== 'string' || filePath.trim() === '') {
426
+ throw new ValidationError('File path must be a non-empty string');
427
+ }
428
+
429
+ if (!filePath.toLowerCase().endsWith('.csv')) {
430
+ throw new ValidationError('File must have .csv extension');
431
+ }
432
+
433
+ // Prevent directory traversal attacks
434
+ const normalizedPath = path.normalize(filePath);
435
+ if (normalizedPath.includes('..') ||
436
+ /\\\.\.\\|\/\.\.\//.test(filePath) ||
437
+ filePath.startsWith('..') ||
438
+ filePath.includes('/..')) {
439
+ throw new SecurityError('Directory traversal detected in file path');
440
+ }
441
+
442
+ const safePath = path.resolve(filePath);
443
+
444
+ // Ensure directory exists
445
+ const dir = path.dirname(safePath);
446
+ await fs.promises.mkdir(dir, { recursive: true });
447
+
448
+ // Create write stream with BOM for Excel UTF-8 support
449
+ const writeStream = fs.createWriteStream(safePath, 'utf8');
450
+
451
+ // Add UTF-8 BOM for Excel compatibility if requested
452
+ if (options.addBOM !== false) {
453
+ writeStream.write('\uFEFF');
454
+ }
455
+
456
+ const transformStream = createJsonToCsvStream(options);
457
+
458
+ await pipeline(
459
+ inputStream,
460
+ transformStream,
461
+ writeStream
462
+ );
463
+
464
+ return safePath;
465
+ }, 'FILE_STREAM_ERROR', { function: 'saveJsonStreamAsCsv' });
466
+ }
467
+
468
+ /**
469
+ * Creates a readable stream from an array of JSON objects
470
+ *
471
+ * @param {Array<Object>} data - Array of JSON objects
472
+ * @returns {Readable} Readable stream
473
+ */
474
+ function createJsonReadableStream(data) {
475
+ return new Readable({
476
+ objectMode: true,
477
+ read() {
478
+ if (!this._data) {
479
+ this._data = Array.isArray(data) ? [...data] : [];
480
+ this._index = 0;
481
+ }
482
+
483
+ while (this._index < this._data.length) {
484
+ const item = this._data[this._index];
485
+ this._index++;
486
+
487
+ if (!this.push(item)) {
488
+ // Stream buffer is full, wait for next read
489
+ return;
490
+ }
491
+ }
492
+
493
+ // End of data
494
+ this.push(null);
495
+ }
496
+ });
497
+ }
498
+
499
+ /**
500
+ * Creates a writable stream that collects CSV data
501
+ *
502
+ * @returns {Writable} Writable stream that collects data
503
+ */
504
+ function createCsvCollectorStream() {
505
+ let collectedData = '';
506
+
507
+ return new Writable({
508
+ write(chunk, encoding, callback) {
509
+ collectedData += chunk.toString();
510
+ callback();
511
+ },
512
+
513
+ final(callback) {
514
+ this._collectedData = collectedData;
515
+ callback();
516
+ }
517
+ });
518
+ }
519
+
520
+ module.exports = {
521
+ createJsonToCsvStream,
522
+ streamJsonToCsv,
523
+ saveJsonStreamAsCsv,
524
+ createJsonReadableStream,
525
+ createCsvCollectorStream,
526
+ createSchemaValidators
527
+ };
528
+
529
+ // For ES6 module compatibility
530
+ if (typeof module !== 'undefined' && module.exports) {
531
+ module.exports.default = createJsonToCsvStream;
532
+ }