convert-csv-to-json 4.35.0 → 4.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -80,7 +80,7 @@ jobs:
80
80
  - name: Upload pages artifact
81
81
  uses: actions/upload-pages-artifact@v5
82
82
  with:
83
- path: docs
83
+ path: demo
84
84
  - name: Deploy to GitHub Pages
85
85
  uses: actions/deploy-pages@v5
86
86
 
package/README.md CHANGED
@@ -3,7 +3,6 @@
3
3
  [![Node CI](https://github.com/iuccio/csvToJson/actions/workflows/ci-cd.yml/badge.svg?branch=master)](https://github.com/iuccio/csvToJson/actions/workflows/ci-cd.yml)
4
4
  ![CodeQL](https://github.com/iuccio/csvToJson/actions/workflows/codeql-analysis.yml/badge.svg)
5
5
  [![Maintainability](https://qlty.sh/gh/iuccio/projects/csvToJson/maintainability.svg)](https://qlty.sh/gh/iuccio/projects/csvToJson)
6
- [![Code Climate](https://codeclimate.com/github/iuccio/csvToJson/badges/gpa.svg)](https://codeclimate.com/github/iuccio/csvToJson)
7
6
  [![NPM Version](https://img.shields.io/npm/v/convert-csv-to-json.svg)](https://npmjs.org/package/convert-csv-to-json)
8
7
  ![NodeJS Version](https://img.shields.io/badge/nodeJS-%3E=18.x-brightgreen.svg)
9
8
  [![Downloads](https://img.shields.io/npm/dm/convert-csv-to-json.svg)](https://npmjs.org/package/convert-csv-to-json)
@@ -17,14 +16,14 @@
17
16
  ![TypeScript](https://img.shields.io/badge/typescript-%23007ACC.svg?style=for-the-badge&logo=typescript&logoColor=white)
18
17
 
19
18
  >
20
- Convert CSV files to JSON with **no dependencies**. Supports Node.js (Sync & Async), and Browser environments with full RFC 4180 compliance.
19
+ Convert CSV files to JSON with **no dependencies**. Supports Node.js (Sync & Async), and Browser environments with full RFC 4180 compliance. **Memory-efficient streaming** for processing large files without loading them entirely into memory.
21
20
 
22
21
  ## Overview
23
22
 
24
23
  Transform CSV data into JSON with a simple, chainable API. Choose your implementation style:
25
24
 
26
25
  - **[Synchronous API](docs/SYNC.md)** - Blocking operations for simple workflows
27
- - **[Asynchronous API](docs/ASYNC.md)** - Promise-based for modern async/await patterns
26
+ - **[Asynchronous API](docs/ASYNC.md)** - Promise-based for modern async/await patterns with **memory-efficient streaming** for large files
28
27
  - **[Browser API](docs/BROWSER.md)** - Client-side CSV parsing for web applications
29
28
 
30
29
  ## Demo and JSDoc
@@ -40,7 +39,7 @@ Transform CSV data into JSON with a simple, chainable API. Choose your implement
40
39
  ✅ **Full TypeScript Support** - Included type definitions for all APIs
41
40
  ✅ **Flexible Configuration** - Custom delimiters, encoding, trimming, and more
42
41
  ✅ **Method Chaining** - Fluent API for readable code
43
- ✅ **Large File Support** - Stream processing for memory-efficient handling
42
+ ✅ **Memory-Efficient Streaming** - Process large files without loading them entirely into memory
44
43
  ✅ **Comprehensive Error Handling** - Detailed, actionable error messages with solutions (see [ERROR_HANDLING.md](docs/ERROR_HANDLING.md))
45
44
 
46
45
  ## RFC 4180 Standard
@@ -150,6 +149,9 @@ All APIs (Sync, Async and Browser) support the same configuration methods:
150
149
  - `trimHeaderFieldWhiteSpace(bool)` - Remove spaces from headers
151
150
  - `parseSubArray(delim, sep)` - Parse delimited arrays
152
151
  - `mapRows(fn)` - Transform, filter, or enrich each row
152
+ - `getJsonFromStreamAsync(stream)` - Process CSV from Readable streams for NodeJS and Browser
153
+ - `getJsonFromFileStreamingAsync(filePath)` - Stream processing for large files for NodeJS and Browser
154
+ - `getJsonFromFileStreamingAsyncWithCallback(filePath, options = {})` - Parse CSV from a File using streaming with progress callbacks for large files
153
155
  - `utf8Encoding()`, `latin1Encoding()`, etc. - Set file encoding
154
156
 
155
157
  ### Examples
@@ -233,6 +235,64 @@ csvToJson.latin1Encoding().getJsonFromCsv('data.csv');
233
235
  csvToJson.customEncoding('ucs2').getJsonFromCsv('data.csv');
234
236
  ```
235
237
 
238
+ #### `getJsonFromStreamAsync(stream)` - Process CSV from Readable streams
239
+ ```js
240
+ const fs = require('fs');
241
+ const csvToJson = require('convert-csv-to-json');
242
+
243
+ // Process large files without loading them entirely into memory
244
+ async function processLargeCSV() {
245
+ const stream = fs.createReadStream('large-dataset.csv');
246
+ const jsonData = await csvToJson
247
+ .fieldDelimiter(';')
248
+ .supportQuotedField(true)
249
+ .getJsonFromStreamAsync(stream);
250
+
251
+ console.log(`Processed ${jsonData.length} records efficiently`);
252
+ return jsonData;
253
+ }
254
+ ```
255
+
256
+ #### `getJsonFromFileStreamingAsync(filePath)` - Stream processing for large files
257
+ ```js
258
+ const csvToJson = require('convert-csv-to-json');
259
+
260
+ // Most efficient way to process large CSV files
261
+ async function processLargeCSV(filePath) {
262
+ const jsonData = await csvToJson
263
+ .fieldDelimiter(',')
264
+ .formatValueByType()
265
+ .getJsonFromFileStreamingAsync(filePath);
266
+
267
+ console.log(`Streamed and processed ${jsonData.length} records`);
268
+ return jsonData;
269
+ }
270
+
271
+ // Usage - handles files of any size without memory constraints
272
+ const data = await processLargeCSV('massive-dataset.csv');
273
+ ```
274
+
275
+ #### `getJsonFromFileStreamingAsyncWithCallback(filePath, options = {})` - Parse CSV from a File object using streaming with progress callbacks for large files
276
+
277
+ ```js
278
+ const csvToJson = require('convert-csv-to-json');
279
+ const fileInput = document.querySelector('#csvfile').files[0];
280
+
281
+ csvToJson.browser.getJsonFromFileStreamingAsyncWithCallback(fileInput, {
282
+ chunkSize: 500,
283
+ onChunk: (rows, processed, total) => {
284
+ console.log(`Processed ${processed}/${total} rows`);
285
+ // Handle chunk of rows here
286
+ },
287
+ onComplete: (allRows) => {
288
+ console.log('Processing complete!');
289
+ },
290
+ onError: (error) => {
291
+ console.error('Error:', error);
292
+ }
293
+ });
294
+ ```
295
+
236
296
  See [SYNC.md](docs/SYNC.md), [ASYNC.md](docs/ASYNC.md) or [BROWSER.md](docs/BROWSER.md) for complete configuration details.
237
297
 
238
298
  ## Example: Complete Workflow
package/docs/ASYNC.md CHANGED
@@ -7,6 +7,8 @@ Promise-based async/await API for modern Node.js applications. Perfect for handl
7
7
  - [File Operations](#file-operations)
8
8
  - [Working with Raw CSV Data](#working-with-raw-csv-data)
9
9
  - [Processing Large Files](#processing-large-files)
10
+ - [Stream Processing](#stream-processing)
11
+ - [File Streaming](#file-streaming)
10
12
  - [Batch Processing](#batch-processing)
11
13
  - [Error Handling](#error-handling)
12
14
  - [Method Chaining](#method-chaining)
@@ -158,6 +160,112 @@ async function processChunk(records) {
158
160
  }
159
161
  ```
160
162
 
163
+ ## Stream Processing
164
+
165
+ For true memory-efficient processing of large CSV files, use the stream API which processes data in chunks without loading the entire file into memory.
166
+
167
+ ### Basic Stream Usage
168
+
169
+ ```js
170
+ const fs = require('fs');
171
+ const csvToJson = require('convert-csv-to-json');
172
+
173
+ async function processLargeCSV(filePath) {
174
+ const stream = fs.createReadStream(filePath);
175
+ const jsonData = await csvToJson.getJsonFromStreamAsync(stream);
176
+
177
+ console.log(`Processed ${jsonData.length} records`);
178
+ return jsonData;
179
+ }
180
+
181
+ // Usage
182
+ const data = await processLargeCSV('large-dataset.csv');
183
+ ```
184
+
185
+ ### Stream with Configuration
186
+
187
+ ```js
188
+ const fs = require('fs');
189
+ const csvToJson = require('convert-csv-to-json');
190
+
191
+ async function processConfiguredStream(filePath) {
192
+ const stream = fs.createReadStream(filePath, { encoding: 'utf8' });
193
+
194
+ const jsonData = await csvToJson
195
+ .fieldDelimiter(';')
196
+ .supportQuotedField(true)
197
+ .getJsonFromStreamAsync(stream);
198
+
199
+ return jsonData;
200
+ }
201
+ ```
202
+
203
+ ### File Streaming with getJsonFromFileStreamingAsync
204
+
205
+ For simplified file streaming without manually creating streams, use `getJsonFromFileStreamingAsync`:
206
+
207
+ ```js
208
+ const csvToJson = require('convert-csv-to-json');
209
+
210
+ async function processLargeCSV(filePath) {
211
+ const jsonData = await csvToJson
212
+ .fieldDelimiter(';')
213
+ .supportQuotedField(true)
214
+ .getJsonFromFileStreamingAsync(filePath);
215
+
216
+ console.log(`Processed ${jsonData.length} records efficiently`);
217
+ return jsonData;
218
+ }
219
+
220
+ // Usage - processes large files without loading them entirely into memory
221
+ const data = await processLargeCSV('large-dataset.csv');
222
+ ```
223
+
224
+ ### Stream from Other Sources
225
+
226
+ ```js
227
+ const { Readable } = require('stream');
228
+ const csvToJson = require('convert-csv-to-json');
229
+
230
+ // Create a stream from a string
231
+ function createCSVStream(csvString) {
232
+ const stream = new Readable();
233
+ stream.push(csvString);
234
+ stream.push(null); // End the stream
235
+ return stream;
236
+ }
237
+
238
+ async function processStringAsStream() {
239
+ const csvData = 'name,age\nAlice,30\nBob,25';
240
+ const stream = createCSVStream(csvData);
241
+
242
+ const json = await csvToJson.getJsonFromStreamAsync(stream);
243
+ console.log(json);
244
+ // Output: [{ name: 'Alice', age: '30' }, { name: 'Bob', age: '25' }]
245
+ }
246
+ ```
247
+
248
+ ### File Streaming
249
+
250
+ For the most efficient processing of large CSV files, use the built-in file streaming API which handles all the complexity of chunked reading and parsing:
251
+
252
+ ```js
253
+ const csvToJson = require('convert-csv-to-json');
254
+
255
+ async function processLargeCSV(filePath) {
256
+ const jsonData = await csvToJson
257
+ .fieldDelimiter(';')
258
+ .supportQuotedField(true)
259
+ .getJsonFromFileStreamingAsync(filePath);
260
+
261
+ console.log(`Processed ${jsonData.length} records`);
262
+ return jsonData;
263
+ }
264
+
265
+ // Usage
266
+ const data = await processLargeCSV('large-dataset.csv');
267
+ ```
268
+
161
269
  ## Batch Processing
162
270
 
163
271
  ### Sequential Processing
package/docs/BROWSER.md CHANGED
@@ -6,6 +6,7 @@ Client-side CSV parsing for web browsers. Supports parsing CSV strings and file/
6
6
  - [Basic Usage](#basic-usage)
7
7
  - [Parsing CSV Strings](#parsing-csv-strings)
8
8
  - [Parsing Files and Blobs](#parsing-files-and-blobs)
9
+ - [Streaming Large Files](#streaming-large-files)
9
10
  - [Configuration Options](#configuration-options)
10
11
  - [File Upload Examples](#file-upload-examples)
11
12
  - [TypeScript Support](#typescript-support)
@@ -122,6 +123,57 @@ async function parseWithEncoding(file) {
122
123
  }
123
124
  ```
124
125
 
126
+ ## Streaming Large Files
127
+
128
+ For memory-efficient processing of large CSV files in browsers, use the streaming API which processes data in chunks without loading the entire file into memory.
129
+
130
+ ### Stream from ReadableStream
131
+
132
+ ```js
133
+ const convert = require('convert-csv-to-json');
134
+
135
+ async function processStream(stream) {
136
+ const jsonData = await convert.browser
137
+ .fieldDelimiter(';')
138
+ .supportQuotedField(true)
139
+ .getJsonFromStreamAsync(stream);
140
+
141
+ console.log(`Processed ${jsonData.length} records`);
142
+ return jsonData;
143
+ }
144
+
145
+ // Usage with fetch
146
+ const response = await fetch('large-dataset.csv');
147
+ const stream = response.body;
148
+ const data = await processStream(stream);
149
+ ```
150
+
151
+ ### Stream from File Object
152
+
153
+ ```js
154
+ const convert = require('convert-csv-to-json');
155
+
156
+ async function processLargeFile(file) {
157
+ const jsonData = await convert.browser
158
+ .fieldDelimiter(',')
159
+ .formatValueByType()
160
+ .getJsonFromFileStreamingAsync(file);
161
+
162
+ console.log(`Streamed and processed ${jsonData.length} records`);
163
+ return jsonData;
164
+ }
165
+
166
+ // Usage with file input
167
+ const fileInput = document.querySelector('#csvfile');
168
+ fileInput.addEventListener('change', async (event) => {
169
+ const file = event.target.files[0];
170
+ const data = await processLargeFile(file);
171
+ console.log(data);
172
+ });
173
+ ```
174
+
175
+ **Note:** Streaming requires modern browsers that support the `ReadableStream` API (Chrome 43+, Firefox 65+, Safari 10.1+). For older browsers, the method falls back to regular file parsing.
176
+
125
177
  ## Configuration Options
126
178
 
127
179
  All configuration methods from the [Sync API](SYNC.md) are available:
package/index.d.ts CHANGED
@@ -99,10 +99,21 @@ declare module 'convert-csv-to-json' {
99
99
  */
100
100
  getJsonFromCsv(inputFileName: string): any[];
101
101
 
102
- /**
103
- * Async version of getJsonFromCsv. When options.raw is true the input is treated as a CSV string
104
- */
105
- getJsonFromCsvAsync(inputFileNameOrCsv: string, options?: { raw?: boolean }): Promise<any[]>;
102
+ /**
103
+ * Async version of getJsonFromCsv. When options.raw is true the input is treated as a CSV string
104
+ */
105
+ getJsonFromCsvAsync(inputFileNameOrCsv: string, options?: { raw?: boolean }): Promise<any[]>;
106
+
107
+ /**
108
+ * Parse CSV from a Readable stream and return parsed data as JSON array
109
+ * Processes data in chunks for memory-efficient handling of large files
110
+ */
111
+ getJsonFromStreamAsync(stream: NodeJS.ReadableStream): Promise<any[]>;
112
+
113
+ /**
114
+ * Parse CSV from a file path using streaming for memory-efficient processing
115
+ */
116
+ getJsonFromFileStreamingAsync(filePath: string): Promise<any[]>;
106
117
 
107
118
  csvStringToJson(csvString: string): any[];
108
119
 
@@ -139,6 +150,16 @@ declare module 'convert-csv-to-json' {
139
150
  * Parse a File or Blob and return a Promise that resolves to the JSON array
140
151
  */
141
152
  parseFile(file: Blob | File, options?: { encoding?: string }): Promise<any[]>;
153
+
154
+ /**
155
+ * Parse CSV from a ReadableStream and return parsed data as JSON array
156
+ */
157
+ getJsonFromStreamAsync(stream: any): Promise<any[]>;
158
+
159
+ /**
160
+ * Parse CSV from a File object using streaming for memory-efficient processing
161
+ */
162
+ getJsonFromFileStreamingAsync(file: File): Promise<any[]>;
142
163
  }
143
164
 
144
165
  export const browser: BrowserApi;
package/index.js CHANGED
@@ -253,6 +253,36 @@ exports.getJsonFromCsv = function(inputFileName) {
253
253
  */
254
254
  const csvToJsonAsync = require('./src/csvToJsonAsync');
255
255
 
256
+ /**
257
+ * Parse CSV from a Readable stream and return parsed data as JSON array
258
+ * Processes data in chunks for memory-efficient handling of large files
259
+ * @param {object} stream - Node.js Readable stream containing CSV data
260
+ * @returns {Promise<Array<object>>} Promise resolving to array of objects representing CSV rows
261
+ * @throws {InputValidationError} If stream is invalid
262
+ * @throws {CsvFormatError} If CSV is malformed
263
+ * @category 1-Core API
264
+ * @example
265
+ * const fs = require('fs');
266
+ * const csvToJson = require('convert-csv-to-json');
267
+ * const stream = fs.createReadStream('large.csv');
268
+ * const data = await csvToJson.getJsonFromStreamAsync(stream);
269
+ * console.log(data);
270
+ */
271
+
272
+ /**
273
+ * Parse CSV from a file path using streaming for memory-efficient processing
274
+ * @param {string} filePath - Path to the CSV file
275
+ * @returns {Promise<Array<object>>} Promise resolving to array of objects representing CSV rows
276
+ * @throws {InputValidationError} If filePath is invalid
277
+ * @throws {FileOperationError} If file cannot be read
278
+ * @throws {CsvFormatError} If CSV is malformed
279
+ * @category 1-Core API
280
+ * @example
281
+ * const csvToJson = require('convert-csv-to-json');
282
+ * const data = await csvToJson.getJsonFromFileStreamingAsync('large.csv');
283
+ * console.log(data);
284
+ */
285
+
256
286
  // Re-export all async API methods
257
287
  Object.assign(exports, {
258
288
  getJsonFromCsvAsync: function(input, options) {
@@ -266,6 +296,12 @@ Object.assign(exports, {
266
296
  },
267
297
  generateJsonFileFromCsvAsync: function(input, output) {
268
298
  return csvToJsonAsync.generateJsonFileFromCsv(input, output);
299
+ },
300
+ getJsonFromStreamAsync: function(stream) {
301
+ return csvToJsonAsync.getJsonFromStreamAsync(stream);
302
+ },
303
+ getJsonFromFileStreamingAsync: function(filePath) {
304
+ return csvToJsonAsync.getJsonFromFileStreamingAsync(filePath);
269
305
  }
270
306
  });
271
307
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "convert-csv-to-json",
3
- "version": "4.35.0",
3
+ "version": "4.37.0",
4
4
  "description": "Convert CSV to JSON",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
@@ -9,7 +9,7 @@
9
9
  "test-debug": "node --inspect-brk node_modules/.bin/jest --runInBand --detectOpenHandles",
10
10
  "lint": "eslint .",
11
11
  "docs": "jsdoc -c jsdoc.json",
12
- "docs:api": "jsdoc -c jsdoc.json -d docs/api",
12
+ "docs:api": "jsdoc -c jsdoc.json -d demo/api",
13
13
  "prepublishOnly": "npm run docs:api",
14
14
  "version-patch": "npm version patch",
15
15
  "version-minor": "npm version minor",
@@ -52,6 +52,7 @@
52
52
  "@eslint/js": "^10.0.1",
53
53
  "@types/jest": "^30.0.0",
54
54
  "better-docs": "^2.7.3",
55
+ "browserify": "^17.0.1",
55
56
  "eslint": "^10.1.0",
56
57
  "eslint-plugin-jsdoc": "^62.8.0",
57
58
  "jest": "^30.2.0",
package/src/browserApi.js CHANGED
@@ -4,6 +4,7 @@
4
4
 
5
5
  const csvToJson = require('./csvToJson');
6
6
  const { InputValidationError, BrowserApiError } = require('./util/errors');
7
+ const StreamProcessor = require('./streamProcessor');
7
8
 
8
9
  /**
9
10
  * Browser-friendly CSV to JSON API
@@ -216,6 +217,203 @@ class BrowserApi {
216
217
  }
217
218
  });
218
219
  }
220
+
221
+ /**
222
+ * Parse CSV from a browser ReadableStream and return parsed data as JSON array
223
+ * Processes data in chunks for memory-efficient handling of large streams
224
+ * @param {object} stream - Browser ReadableStream containing CSV data
225
+ * @returns {Promise<Array<object>>} Promise resolving to array of objects representing CSV rows
226
+ * @throws {InputValidationError} If stream is invalid
227
+ * @throws {BrowserApiError} If streaming is not supported or parsing fails
228
+ * @example
229
+ * const csvToJson = require('convert-csv-to-json');
230
+ * const response = await fetch('large-dataset.csv');
231
+ * const stream = response.body;
232
+ * const data = await csvToJson.browser.getJsonFromStreamAsync(stream);
233
+ * console.log(data);
234
+ */
235
+ async getJsonFromStreamAsync(stream) {
236
+ if (typeof ReadableStream === 'undefined') {
237
+ throw BrowserApiError.streamingNotSupported();
238
+ }
239
+
240
+ if (!stream || typeof stream.getReader !== 'function') {
241
+ throw new InputValidationError(
242
+ 'stream',
243
+ 'ReadableStream',
244
+ typeof stream,
245
+ 'Provide a valid browser ReadableStream.'
246
+ );
247
+ }
248
+
249
+ const streamProcessor = new StreamProcessor(this.csvToJson, { isBrowser: true });
250
+ return streamProcessor.processStream(stream);
251
+ }
252
+
253
+ /**
254
+ * Parse CSV from a File object using streaming for memory-efficient processing
255
+ * @param {File} file - File object containing CSV data
256
+ * @returns {Promise<Array<object>>} Promise resolving to array of objects representing CSV rows
257
+ * @throws {InputValidationError} If file is invalid
258
+ * @throws {BrowserApiError} If streaming is not supported or parsing fails
259
+ * @example
260
+ * const csvToJson = require('convert-csv-to-json');
261
+ * const fileInput = document.querySelector('#csvfile').files[0];
262
+ * const data = await csvToJson.browser.getJsonFromFileStreamingAsync(fileInput);
263
+ * console.log(data);
264
+ */
265
+ async getJsonFromFileStreamingAsync(file) {
266
+ if (!file || !(file instanceof File)) {
267
+ throw new InputValidationError(
268
+ 'file',
269
+ 'File object',
270
+ typeof file,
271
+ 'Provide a valid File object.'
272
+ );
273
+ }
274
+
275
+ // Check if the file supports streaming
276
+ if (typeof file.stream === 'function') {
277
+ // Use native streaming if available
278
+ const stream = file.stream();
279
+ return this.getJsonFromStreamAsync(stream);
280
+ } else {
281
+ // Fallback to regular file parsing for older browsers
282
+ return this.parseFile(file);
283
+ }
284
+ }
285
+
286
+ /**
287
+ * Parse CSV from a File object using streaming with progress callbacks for large files
288
+ * Processes data in chunks to avoid memory issues with large datasets
289
+ * @param {File} file - File object containing CSV data
290
+ * @param {object} options - Processing options
291
+ * @param {function(Array<object>, number, number): void} options.onChunk - Callback for each chunk of processed rows
292
+ * @param {function(Array<object>): void} [options.onComplete] - Callback when processing is complete
293
+ * @param {function(Error): void} [options.onError] - Callback for errors
294
+ * @param {number} [options.chunkSize=1000] - Number of rows per chunk
295
+ * @returns {Promise<void>} Promise that resolves when streaming starts
296
+ * @throws {InputValidationError} If file or options are invalid
297
+ * @example
298
+ * const csvToJson = require('convert-csv-to-json');
299
+ * const fileInput = document.querySelector('#csvfile').files[0];
300
+ *
301
+ * await csvToJson.browser.getJsonFromFileStreamingAsyncWithCallback(fileInput, {
302
+ * chunkSize: 500,
303
+ * onChunk: (rows, processed, total) => {
304
+ * console.log(`Processed ${processed}/${total} rows`);
305
+ * // Handle chunk of rows here
306
+ * },
307
+ * onComplete: (allRows) => {
308
+ * console.log('Processing complete!');
309
+ * },
310
+ * onError: (error) => {
311
+ * console.error('Error:', error);
312
+ * }
313
+ * });
314
+ */
315
+ async getJsonFromFileStreamingAsyncWithCallback(file, options = {}) {
316
+ if (!file || !(file instanceof File)) {
317
+ throw new InputValidationError(
318
+ 'file',
319
+ 'File object',
320
+ typeof file,
321
+ 'Provide a valid File object.'
322
+ );
323
+ }
324
+
325
+ if (!options.onChunk || typeof options.onChunk !== 'function') {
326
+ throw new InputValidationError(
327
+ 'options.onChunk',
328
+ 'function',
329
+ typeof options.onChunk,
330
+ 'Provide a callback function to handle processed chunks.'
331
+ );
332
+ }
333
+
334
+ const chunkSize = options.chunkSize || 1000;
335
+ const streamProcessor = new StreamProcessor(this.csvToJson, {
336
+ isBrowser: true,
337
+ chunkSize,
338
+ onChunk: options.onChunk,
339
+ onComplete: options.onComplete,
340
+ onError: options.onError
341
+ });
342
+
343
+ // Check if the file supports streaming
344
+ if (typeof file.stream === 'function') {
345
+ // Use native streaming if available
346
+ const stream = file.stream();
347
+ return streamProcessor.processStreamWithCallbacks(stream);
348
+ } else {
349
+ // Fallback to regular file parsing for older browsers
350
+ return this.parseFileWithCallbacks(file, options);
351
+ }
352
+ }
353
+
354
+ /**
355
+ * Parse a File object with progress callbacks (fallback for non-streaming browsers)
356
+ * @param {File} file - File object to parse
357
+ * @param {object} options - Processing options
358
+ * @private
359
+ */
360
+ async parseFileWithCallbacks(file, options) {
361
+ const chunkSize = options.chunkSize || 1000;
362
+ const onChunk = options.onChunk;
363
+ const onComplete = options.onComplete;
364
+ const onError = options.onError;
365
+
366
+ return new Promise((resolve, reject) => {
367
+ if (typeof FileReader === 'undefined') {
368
+ const error = BrowserApiError.fileReaderNotAvailable();
369
+ if (onError) onError(error);
370
+ reject(error);
371
+ return;
372
+ }
373
+
374
+ const reader = new FileReader();
375
+ reader.onerror = () => {
376
+ const error = BrowserApiError.parseFileError(
377
+ reader.error || new Error('Unknown file reading error')
378
+ );
379
+ if (onError) onError(error);
380
+ reject(error);
381
+ };
382
+
383
+ reader.onload = () => {
384
+ try {
385
+ const text = reader.result;
386
+ const allRows = this.csvToJson.csvToJson(String(text));
387
+
388
+ // Process in chunks
389
+ let processed = 0;
390
+ const total = allRows.length;
391
+
392
+ const processChunk = () => {
393
+ const chunk = allRows.slice(processed, processed + chunkSize);
394
+ if (chunk.length > 0) {
395
+ onChunk(chunk, processed + chunk.length, total);
396
+ processed += chunk.length;
397
+ // Use setTimeout to avoid blocking the UI
398
+ setTimeout(processChunk, 0);
399
+ } else {
400
+ if (onComplete) onComplete(allRows);
401
+ resolve();
402
+ }
403
+ };
404
+
405
+ processChunk();
406
+ } catch (err) {
407
+ const error = BrowserApiError.parseFileError(err);
408
+ if (onError) onError(error);
409
+ reject(error);
410
+ }
411
+ };
412
+
413
+ reader.readAsText(file);
414
+ });
415
+ }
416
+
219
417
  }
220
418
 
221
- module.exports = new BrowserApi();
419
+ module.exports = new BrowserApi();
@@ -4,6 +4,7 @@
4
4
  const fileUtils = require('./util/fileUtils');
5
5
  const csvToJson = require('./csvToJson');
6
6
  const { InputValidationError } = require('./util/errors');
7
+ const StreamProcessor = require('./streamProcessor');
7
8
 
8
9
  /**
9
10
  * Asynchronous CSV to JSON converter
@@ -177,18 +178,69 @@ class CsvToJsonAsync {
177
178
  }
178
179
 
179
180
  /**
180
- * Parse CSV string to stringified JSON (async)
181
- * @param {string} csvString - CSV content as string
182
- * @returns {Promise<string>} JSON stringified array of objects
181
+ * Parse CSV from a Readable stream and return parsed data as JSON array
182
+ * Processes data in chunks for memory-efficient handling of large files
183
+ * @param {object} stream - Node.js Readable stream containing CSV data
184
+ * @returns {Promise<Array<object>>} Promise resolving to array of objects representing CSV rows
185
+ * @throws {InputValidationError} If stream is invalid
183
186
  * @throws {CsvFormatError} If CSV is malformed
184
187
  * @example
188
+ * const fs = require('fs');
185
189
  * const csvToJson = require('convert-csv-to-json');
186
- * const jsonString = await csvToJson.csvStringToJsonStringifiedAsync('name,age\nAlice,30');
187
- * console.log(jsonString);
190
+ * const stream = fs.createReadStream('large.csv');
191
+ * const data = await csvToJson.getJsonFromStreamAsync(stream);
192
+ * console.log(data);
188
193
  */
189
- async csvStringToJsonStringifiedAsync(csvString) {
190
- const json = await this.csvStringToJsonAsync(csvString);
191
- return JSON.stringify(json, undefined, 1);
194
+ async getJsonFromStreamAsync(stream) {
195
+ this._validateStream(stream);
196
+
197
+ const streamProcessor = new StreamProcessor(this.csvToJson, { isBrowser: false });
198
+ return streamProcessor.processStream(stream);
199
+ }
200
+
201
+ /**
202
+ * Validate that the provided stream is a valid Readable stream
203
+ * @param {object} stream - The stream to validate
204
+ * @throws {InputValidationError} If stream is invalid
205
+ * @private
206
+ */
207
+ _validateStream(stream) {
208
+ if (!stream || typeof stream.pipe !== 'function') {
209
+ throw new InputValidationError(
210
+ 'stream',
211
+ 'Readable stream',
212
+ typeof stream,
213
+ 'Provide a valid Node.js Readable stream.'
214
+ );
215
+ }
216
+ }
217
+
218
+ /**
219
+ * Parse CSV from a file path using streaming for memory-efficient processing
220
+ * @param {string} filePath - Path to the CSV file
221
+ * @returns {Promise<Array<object>>} Promise resolving to array of objects representing CSV rows
222
+ * @throws {InputValidationError} If filePath is invalid
223
+ * @throws {FileOperationError} If file cannot be read
224
+ * @throws {CsvFormatError} If CSV is malformed
225
+ * @example
226
+ * const csvToJson = require('convert-csv-to-json');
227
+ * const data = await csvToJson.getJsonFromFileStreamingAsync('large.csv');
228
+ * console.log(data);
229
+ */
230
+ async getJsonFromFileStreamingAsync(filePath) {
231
+ if (!filePath || typeof filePath !== 'string') {
232
+ throw new InputValidationError(
233
+ 'filePath',
234
+ 'string (file path)',
235
+ typeof filePath,
236
+ 'Provide a valid file path as a string.'
237
+ );
238
+ }
239
+
240
+ const fs = require('fs');
241
+ const encoding = typeof this.csvToJson.encoding === 'string' ? this.csvToJson.encoding : 'utf8';
242
+ const stream = fs.createReadStream(filePath, { encoding });
243
+ return this.getJsonFromStreamAsync(stream);
192
244
  }
193
245
  }
194
246
 
@@ -0,0 +1,469 @@
1
+ /* globals CsvFormatError */
2
+ 'use strict';
3
+
4
+ const stringUtils = require('./util/stringUtils');
5
+
6
+ const QUOTE_CHAR = '"';
7
+ const CRLF = '\r\n';
8
+ const LF = '\n';
9
+ const CR = '\r';
10
+
11
+ /**
12
+ * Handles the processing of CSV data from a stream
13
+ * Encapsulates all stream processing logic following single responsibility principle
14
+ * Works with both Node.js streams and browser ReadableStream
15
+ * @private
16
+ */
17
+ class StreamProcessor {
18
+ /**
19
+ * Initialize the stream processor with CSV configuration
20
+ * @param {object} csvConfig - The CSV configuration object
21
+ * @param {object} options - Environment options
22
+ * @param {boolean} options.isBrowser - Whether running in browser environment
23
+ * @param {number} options.chunkSize - Number of rows per chunk for callback processing
24
+ * @param {function} options.onChunk - Callback for each chunk
25
+ * @param {function} options.onComplete - Callback when processing complete
26
+ * @param {function} options.onError - Callback for errors
27
+ */
28
+ constructor(csvConfig, options = {}) {
29
+ this.csvConfig = csvConfig;
30
+ this.isBrowser = options.isBrowser || (typeof window !== 'undefined' && typeof document !== 'undefined');
31
+ this.buffer = '';
32
+ this.isInsideQuotes = false;
33
+ this.headers = null;
34
+ this.headerRowIndex = csvConfig.getIndexHeader();
35
+ this.currentRecordIndex = 0;
36
+ this.parsedRecords = [];
37
+ this.dataRowIndex = 0;
38
+
39
+ // Chunked processing options
40
+ this.chunkSize = options.chunkSize || 1000;
41
+ this.onChunk = options.onChunk;
42
+ this.onComplete = options.onComplete;
43
+ this.onError = options.onError;
44
+ this.allRecords = []; // For collecting all records when using callbacks
45
+ }
46
+
47
+ /**
48
+ * Process a chunk of data from the stream
49
+ * @param {Buffer|string|Uint8Array} chunk - The data chunk to process
50
+ */
51
+ processChunk(chunk) {
52
+ // Convert chunk to string, handling both Node.js Buffers and browser Uint8Array
53
+ let chunkString;
54
+ if (typeof chunk === 'string') {
55
+ chunkString = chunk;
56
+ } else if (this.isBrowser && typeof globalThis.TextDecoder !== 'undefined') {
57
+ chunkString = new globalThis.TextDecoder().decode(chunk);
58
+ } else if (this.isBrowser) {
59
+ // Fallback for older browsers without TextDecoder
60
+ chunkString = String.fromCharCode.apply(null, new Uint8Array(chunk));
61
+ } else {
62
+ // Node.js environment
63
+ chunkString = chunk.toString();
64
+ }
65
+
66
+ this.buffer += chunkString;
67
+ this._processCompleteRecords();
68
+ }
69
+
70
+ /**
71
+ * Process a stream with chunked callbacks (for large files)
72
+ * @param {Readable|ReadableStream} stream - The stream to process
73
+ * @returns {Promise<void>} Promise that resolves when streaming starts
74
+ */
75
+ async processStreamWithCallbacks(stream) {
76
+ return new Promise((resolve, reject) => {
77
+ if (this.isBrowser) {
78
+ // Browser ReadableStream
79
+ if (!stream || typeof stream.getReader !== 'function') {
80
+ const error = new Error('Invalid ReadableStream provided');
81
+ if (this.onError) this.onError(error);
82
+ reject(error);
83
+ return;
84
+ }
85
+
86
+ const reader = stream.getReader();
87
+
88
+ const processChunk = async () => {
89
+ try {
90
+ while (true) {
91
+ const { done, value } = await reader.read();
92
+
93
+ if (done) {
94
+ this.finalizeProcessing();
95
+ this._sendRemainingChunks();
96
+ if (this.onComplete) this.onComplete(this.allRecords);
97
+ resolve();
98
+ return;
99
+ }
100
+
101
+ this.processChunk(value);
102
+ this._sendPendingChunks();
103
+ }
104
+ } catch (error) {
105
+ if (this.onError) this.onError(error);
106
+ reject(error);
107
+ }
108
+ };
109
+
110
+ processChunk();
111
+ } else {
112
+ // Node.js Readable stream
113
+ if (!stream || typeof stream.pipe !== 'function') {
114
+ const error = new Error('Invalid Readable stream provided');
115
+ if (this.onError) this.onError(error);
116
+ reject(error);
117
+ return;
118
+ }
119
+
120
+ stream.on('data', (chunk) => {
121
+ try {
122
+ this.processChunk(chunk);
123
+ this._sendPendingChunks();
124
+ } catch (error) {
125
+ if (this.onError) this.onError(error);
126
+ reject(error);
127
+ }
128
+ });
129
+
130
+ stream.on('end', () => {
131
+ try {
132
+ this.finalizeProcessing();
133
+ this._sendRemainingChunks();
134
+ if (this.onComplete) this.onComplete(this.allRecords);
135
+ resolve();
136
+ } catch (error) {
137
+ if (this.onError) this.onError(error);
138
+ reject(error);
139
+ }
140
+ });
141
+
142
+ stream.on('error', (error) => {
143
+ if (this.onError) this.onError(error);
144
+ reject(error);
145
+ });
146
+ }
147
+ });
148
+ }
149
+
150
+ /**
151
+ * Send pending chunks when they reach the chunk size
152
+ * @private
153
+ */
154
+ _sendPendingChunks() {
155
+ if (!this.onChunk) return;
156
+
157
+ while (this.parsedRecords.length >= this.chunkSize) {
158
+ const chunk = this.parsedRecords.splice(0, this.chunkSize);
159
+ this.allRecords.push(...chunk);
160
+ this.onChunk(chunk, this.allRecords.length, null); // null for total when streaming
161
+ }
162
+ }
163
+
164
+ /**
165
+ * Send any remaining chunks at the end of processing
166
+ * @private
167
+ */
168
+ _sendRemainingChunks() {
169
+ if (!this.onChunk || this.parsedRecords.length === 0) return;
170
+
171
+ const chunk = [...this.parsedRecords];
172
+ this.parsedRecords.length = 0; // Clear the array
173
+ this.allRecords.push(...chunk);
174
+ this.onChunk(chunk, this.allRecords.length, this.allRecords.length);
175
+ }
176
+
177
+ /**
178
+ * Process a stream directly (unified interface for both environments)
179
+ * @param {Readable|ReadableStream} stream - The stream to process
180
+ * @returns {Promise<Array<object>>} Promise resolving to parsed records
181
+ */
182
+ async processStream(stream) {
183
+ return new Promise((resolve, reject) => {
184
+ if (this.isBrowser) {
185
+ // Browser ReadableStream
186
+ if (!stream || typeof stream.getReader !== 'function') {
187
+ reject(new Error('Invalid ReadableStream provided'));
188
+ return;
189
+ }
190
+
191
+ const reader = stream.getReader();
192
+
193
+ const processChunk = async () => {
194
+ try {
195
+ while (true) {
196
+ const { done, value } = await reader.read();
197
+
198
+ if (done) {
199
+ this.finalizeProcessing();
200
+ resolve(this.getResult());
201
+ return;
202
+ }
203
+
204
+ this.processChunk(value);
205
+ }
206
+ } catch (error) {
207
+ reject(error);
208
+ }
209
+ };
210
+
211
+ processChunk();
212
+ } else {
213
+ // Node.js Readable stream
214
+ if (!stream || typeof stream.pipe !== 'function') {
215
+ reject(new Error('Invalid Readable stream provided'));
216
+ return;
217
+ }
218
+
219
+ stream.on('data', (chunk) => {
220
+ try {
221
+ this.processChunk(chunk);
222
+ } catch (error) {
223
+ reject(error);
224
+ }
225
+ });
226
+
227
+ stream.on('end', () => {
228
+ try {
229
+ this.finalizeProcessing();
230
+ resolve(this.getResult());
231
+ } catch (error) {
232
+ reject(error);
233
+ }
234
+ });
235
+
236
+ stream.on('error', (error) => {
237
+ reject(error);
238
+ });
239
+ }
240
+ });
241
+ }
242
+
243
+ /**
244
+ * Finalize processing when the stream ends
245
+ */
246
+ finalizeProcessing() {
247
+ this._processRemainingBuffer();
248
+ this._validateProcessingResult();
249
+ }
250
+
251
+ /**
252
+ * Get the final processed result
253
+ * @returns {Array<object>} Array of parsed JSON objects
254
+ */
255
+ getResult() {
256
+ return this.parsedRecords;
257
+ }
258
+
259
+ /**
260
+ * Process all complete records currently in the buffer
261
+ * @private
262
+ */
263
+ _processCompleteRecords() {
264
+ const parseResult = this._parseRecordsFromBuffer(this.buffer, this.isInsideQuotes);
265
+
266
+ this.buffer = parseResult.remainingBuffer;
267
+ this.isInsideQuotes = parseResult.isInsideQuotes;
268
+
269
+ for (const record of parseResult.completeRecords) {
270
+ this._processRecord(record);
271
+ this.currentRecordIndex++;
272
+ }
273
+ }
274
+
275
+ /**
276
+ * Process any remaining buffer content when stream ends
277
+ * @private
278
+ */
279
+ _processRemainingBuffer() {
280
+ if (this.buffer.length > 0) {
281
+ if (this.isInsideQuotes) {
282
+ throw CsvFormatError.mismatchedQuotes('CSV stream');
283
+ }
284
+
285
+ const parseResult = this._parseRecordsFromBuffer(this.buffer + '\n', false);
286
+
287
+ for (const record of parseResult.completeRecords) {
288
+ this._processRecord(record);
289
+ this.currentRecordIndex++;
290
+ }
291
+ }
292
+ }
293
+
294
+ /**
295
+ * Process a single CSV record
296
+ * @param {string} record - The CSV record to process
297
+ * @private
298
+ */
299
+ _processRecord(record) {
300
+ if (this.headers === null && this.currentRecordIndex === this.headerRowIndex) {
301
+ this._processHeaderRecord(record);
302
+ } else if (this.headers !== null) {
303
+ this._processDataRecord(record);
304
+ }
305
+ }
306
+
307
+ /**
308
+ * Process a header record
309
+ * @param {string} record - The header record
310
+ * @private
311
+ */
312
+ _processHeaderRecord(record) {
313
+ const headerFields = this._splitRecord(record);
314
+ if (stringUtils.hasContent(headerFields)) {
315
+ this.headers = headerFields;
316
+ }
317
+ }
318
+
319
+ /**
320
+ * Process a data record
321
+ * @param {string} record - The data record
322
+ * @private
323
+ */
324
+ _processDataRecord(record) {
325
+ const dataFields = this._splitRecord(record);
326
+ if (stringUtils.hasContent(dataFields)) {
327
+ const row = this.csvConfig.buildJsonResult(this.headers, dataFields);
328
+ const processedRow = this._applyRowMapper(row);
329
+ if (processedRow !== null) {
330
+ this.parsedRecords.push(processedRow);
331
+ }
332
+ }
333
+ }
334
+
335
+ /**
336
+ * Apply row mapper function if configured
337
+ * @param {object} row - The parsed row object
338
+ * @returns {object|null} The processed row or null if filtered out
339
+ * @private
340
+ */
341
+ _applyRowMapper(row) {
342
+ if (this.csvConfig.rowMapper) {
343
+ const mappedRow = this.csvConfig.rowMapper(row, this.dataRowIndex);
344
+ this.dataRowIndex++;
345
+ return mappedRow;
346
+ }
347
+ this.dataRowIndex++;
348
+ return row;
349
+ }
350
+
351
+ /**
352
+ * Split a CSV record into fields based on configuration
353
+ * @param {string} record - The record to split
354
+ * @returns {string[]} Array of field values
355
+ * @private
356
+ */
357
+ _splitRecord(record) {
358
+ if (this.csvConfig.isSupportQuotedField) {
359
+ return this.csvConfig.split(record);
360
+ }
361
+ return record.split(this.csvConfig.delimiter || ',');
362
+ }
363
+
364
+ /**
365
+ * Parse complete records from buffer, handling quoted fields across chunks
366
+ * @param {string} buffer - Current buffer content
367
+ * @param {boolean} insideQuotes - Whether we're currently inside quotes
368
+ * @returns {object} Object with completeRecords array and remaining buffer/quote state
369
+ * @private
370
+ */
371
+ _parseRecordsFromBuffer(buffer, insideQuotes) {
372
+ const completeRecords = [];
373
+ let currentRecord = '';
374
+ let i = 0;
375
+
376
+ while (i < buffer.length) {
377
+ const char = buffer[i];
378
+
379
+ if (char === QUOTE_CHAR) {
380
+ const escapedQuoteResult = this._handleEscapedQuote(buffer, i, insideQuotes);
381
+ if (escapedQuoteResult.wasEscaped) {
382
+ currentRecord += QUOTE_CHAR + QUOTE_CHAR;
383
+ i = escapedQuoteResult.newIndex;
384
+ continue;
385
+ } else {
386
+ insideQuotes = !insideQuotes;
387
+ }
388
+ } else if (!insideQuotes && this._isLineEnding(buffer, i)) {
389
+ const lineEndingLength = this._getLineEndingLength(buffer, i);
390
+ completeRecords.push(currentRecord);
391
+ currentRecord = '';
392
+ i += lineEndingLength;
393
+ continue;
394
+ }
395
+
396
+ currentRecord += char;
397
+ i++;
398
+ }
399
+
400
+ return {
401
+ completeRecords,
402
+ remainingBuffer: currentRecord,
403
+ isInsideQuotes: insideQuotes
404
+ };
405
+ }
406
+
407
+ /**
408
+ * Handle escaped quotes in quoted fields
409
+ * @param {string} buffer - The buffer content
410
+ * @param {number} index - Current index in buffer
411
+ * @param {boolean} insideQuotes - Whether currently inside quotes
412
+ * @returns {object} Result indicating if quote was escaped and new index
413
+ * @private
414
+ */
415
+ _handleEscapedQuote(buffer, index, insideQuotes) {
416
+ if (insideQuotes && index + 1 < buffer.length && buffer[index + 1] === QUOTE_CHAR) {
417
+ return { wasEscaped: true, newIndex: index + 2 };
418
+ }
419
+ return { wasEscaped: false, newIndex: index + 1 };
420
+ }
421
+
422
+ /**
423
+ * Check if character at index is a line ending
424
+ * @param {string} buffer - The buffer content
425
+ * @param {number} index - Current index
426
+ * @returns {boolean} True if line ending
427
+ * @private
428
+ */
429
+ _isLineEnding(buffer, index) {
430
+ return this._getLineEndingLength(buffer, index) > 0;
431
+ }
432
+
433
+ /**
434
+ * Get the length of line ending at current position
435
+ * @param {string} content - Content to check
436
+ * @param {number} index - Current index
437
+ * @returns {number} Length of line ending
438
+ * @private
439
+ */
440
+ _getLineEndingLength(content, index) {
441
+ if (content.slice(index, index + 2) === CRLF) {
442
+ return 2;
443
+ }
444
+ if (content[index] === LF) {
445
+ return 1;
446
+ }
447
+ if (content[index] === CR && content[index + 1] !== LF) {
448
+ return 1;
449
+ }
450
+ return 0;
451
+ }
452
+
453
+ /**
454
+ * Validate the final processing result
455
+ * @private
456
+ */
457
+ _validateProcessingResult() {
458
+ if (!this.headers && this.parsedRecords.length === 0) {
459
+ // Empty stream - this is OK
460
+ return;
461
+ }
462
+
463
+ if (!this.headers) {
464
+ throw CsvFormatError.missingHeader();
465
+ }
466
+ }
467
+ }
468
+
469
+ module.exports = StreamProcessor;
@@ -318,6 +318,24 @@ class BrowserApiError extends CsvParsingError {
318
318
  { originalError: originalError.message }
319
319
  );
320
320
  }
321
+
322
+ /**
323
+ * Create error for unsupported streaming API in browser
324
+ * Occurs when browser doesn't support ReadableStream
325
+ * @returns {BrowserApiError} Configured error instance
326
+ * @static
327
+ */
328
+ static streamingNotSupported() {
329
+ return new BrowserApiError(
330
+ `Browser compatibility error: ReadableStream API is not available.\n` +
331
+ `Your browser does not support the ReadableStream API required for streaming.\n\n` +
332
+ `Solutions:\n` +
333
+ ` 1. Use a modern browser that supports ReadableStream (Chrome 43+, Firefox 65+, Safari 10.1+)\n` +
334
+ ` 2. Use getJsonFromFileStreamingAsync() which falls back to regular file parsing\n` +
335
+ ` 3. Consider using parseFile() for non-streaming file parsing\n` +
336
+ ` 4. Implement a polyfill for ReadableStream support`
337
+ );
338
+ }
321
339
  }
322
340
 
323
341
  module.exports = {