convert-csv-to-json 4.36.0 → 4.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci-cd.yml +1 -1
- package/README.md +64 -3
- package/docs/ASYNC.md +108 -0
- package/docs/BROWSER.md +52 -0
- package/index.d.ts +25 -4
- package/index.js +36 -0
- package/package.json +3 -2
- package/src/browserApi.js +199 -1
- package/src/csvToJsonAsync.js +60 -8
- package/src/streamProcessor.js +469 -0
- package/src/util/errors.js +18 -0
package/README.md
CHANGED
|
@@ -16,14 +16,14 @@
|
|
|
16
16
|

|
|
17
17
|
|
|
18
18
|
>
|
|
19
|
-
Convert CSV files to JSON with **no dependencies**. Supports Node.js (Sync & Async), and Browser environments with full RFC 4180 compliance.
|
|
19
|
+
Convert CSV files to JSON with **no dependencies**. Supports Node.js (Sync & Async), and Browser environments with full RFC 4180 compliance. **Memory-efficient streaming** for processing large files without loading them entirely into memory.
|
|
20
20
|
|
|
21
21
|
## Overview
|
|
22
22
|
|
|
23
23
|
Transform CSV data into JSON with a simple, chainable API. Choose your implementation style:
|
|
24
24
|
|
|
25
25
|
- **[Synchronous API](docs/SYNC.md)** - Blocking operations for simple workflows
|
|
26
|
-
- **[Asynchronous API](docs/ASYNC.md)** - Promise-based for modern async/await patterns
|
|
26
|
+
- **[Asynchronous API](docs/ASYNC.md)** - Promise-based for modern async/await patterns with **memory-efficient streaming** for large files
|
|
27
27
|
- **[Browser API](docs/BROWSER.md)** - Client-side CSV parsing for web applications
|
|
28
28
|
|
|
29
29
|
## Demo and JSDoc
|
|
@@ -39,7 +39,7 @@ Transform CSV data into JSON with a simple, chainable API. Choose your implement
|
|
|
39
39
|
✅ **Full TypeScript Support** - Included type definitions for all APIs
|
|
40
40
|
✅ **Flexible Configuration** - Custom delimiters, encoding, trimming, and more
|
|
41
41
|
✅ **Method Chaining** - Fluent API for readable code
|
|
42
|
-
✅ **
|
|
42
|
+
✅ **Memory-Efficient Streaming** - Process large files without loading them entirely into memory
|
|
43
43
|
✅ **Comprehensive Error Handling** - Detailed, actionable error messages with solutions (see [ERROR_HANDLING.md](docs/ERROR_HANDLING.md))
|
|
44
44
|
|
|
45
45
|
## RFC 4180 Standard
|
|
@@ -149,6 +149,9 @@ All APIs (Sync, Async and Browser) support the same configuration methods:
|
|
|
149
149
|
- `trimHeaderFieldWhiteSpace(bool)` - Remove spaces from headers
|
|
150
150
|
- `parseSubArray(delim, sep)` - Parse delimited arrays
|
|
151
151
|
- `mapRows(fn)` - Transform, filter, or enrich each row
|
|
152
|
+
- `getJsonFromStreamAsync(stream)` - Process CSV from Readable streams for NodeJS and Browser
|
|
153
|
+
- `getJsonFromFileStreamingAsync(filePath)` - Stream processing for large files for NodeJS and Browser
|
|
154
|
+
- `getJsonFromFileStreamingAsyncWithCallback(filePath, options = {})` - Parse CSV from a File using streaming with progress callbacks for large files
|
|
152
155
|
- `utf8Encoding()`, `latin1Encoding()`, etc. - Set file encoding
|
|
153
156
|
|
|
154
157
|
### Examples
|
|
@@ -232,6 +235,64 @@ csvToJson.latin1Encoding().getJsonFromCsv('data.csv');
|
|
|
232
235
|
csvToJson.customEncoding('ucs2').getJsonFromCsv('data.csv');
|
|
233
236
|
```
|
|
234
237
|
|
|
238
|
+
#### `getJsonFromStreamAsync(stream)` - Process CSV from Readable streams
|
|
239
|
+
```js
|
|
240
|
+
const fs = require('fs');
|
|
241
|
+
const csvToJson = require('convert-csv-to-json');
|
|
242
|
+
|
|
243
|
+
// Process large files without loading them entirely into memory
|
|
244
|
+
async function processLargeCSV() {
|
|
245
|
+
const stream = fs.createReadStream('large-dataset.csv');
|
|
246
|
+
const jsonData = await csvToJson
|
|
247
|
+
.fieldDelimiter(';')
|
|
248
|
+
.supportQuotedField(true)
|
|
249
|
+
.getJsonFromStreamAsync(stream);
|
|
250
|
+
|
|
251
|
+
console.log(`Processed ${jsonData.length} records efficiently`);
|
|
252
|
+
return jsonData;
|
|
253
|
+
}
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
#### `getJsonFromFileStreamingAsync(filePath)` - Stream processing for large files
|
|
257
|
+
```js
|
|
258
|
+
const csvToJson = require('convert-csv-to-json');
|
|
259
|
+
|
|
260
|
+
// Most efficient way to process large CSV files
|
|
261
|
+
async function processLargeCSV(filePath) {
|
|
262
|
+
const jsonData = await csvToJson
|
|
263
|
+
.fieldDelimiter(',')
|
|
264
|
+
.formatValueByType()
|
|
265
|
+
.getJsonFromFileStreamingAsync(filePath);
|
|
266
|
+
|
|
267
|
+
console.log(`Streamed and processed ${jsonData.length} records`);
|
|
268
|
+
return jsonData;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Usage - handles files of any size without memory constraints
|
|
272
|
+
const data = await processLargeCSV('massive-dataset.csv');
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
#### `getJsonFromFileStreamingAsyncWithCallback(filePath, options = {})` - Parse CSV from a File object using streaming with progress callbacks for large files
|
|
276
|
+
|
|
277
|
+
```js
|
|
278
|
+
const csvToJson = require('convert-csv-to-json');
|
|
279
|
+
const fileInput = document.querySelector('#csvfile').files[0];
|
|
280
|
+
|
|
281
|
+
csvToJson.browser.getJsonFromFileStreamingAsyncWithCallback(fileInput, {
|
|
282
|
+
chunkSize: 500,
|
|
283
|
+
onChunk: (rows, processed, total) => {
|
|
284
|
+
console.log(`Processed ${processed}/${total} rows`);
|
|
285
|
+
// Handle chunk of rows here
|
|
286
|
+
},
|
|
287
|
+
onComplete: (allRows) => {
|
|
288
|
+
console.log('Processing complete!');
|
|
289
|
+
},
|
|
290
|
+
onError: (error) => {
|
|
291
|
+
console.error('Error:', error);
|
|
292
|
+
}
|
|
293
|
+
});
|
|
294
|
+
```
|
|
295
|
+
|
|
235
296
|
See [SYNC.md](docs/SYNC.md), [ASYNC.md](docs/ASYNC.md) or [BROWSER.md](docs/BROWSER.md) for complete configuration details.
|
|
236
297
|
|
|
237
298
|
## Example: Complete Workflow
|
package/docs/ASYNC.md
CHANGED
|
@@ -7,6 +7,8 @@ Promise-based async/await API for modern Node.js applications. Perfect for handl
|
|
|
7
7
|
- [File Operations](#file-operations)
|
|
8
8
|
- [Working with Raw CSV Data](#working-with-raw-csv-data)
|
|
9
9
|
- [Processing Large Files](#processing-large-files)
|
|
10
|
+
- [Stream Processing](#stream-processing)
|
|
11
|
+
- [File Streaming](#file-streaming)
|
|
10
12
|
- [Batch Processing](#batch-processing)
|
|
11
13
|
- [Error Handling](#error-handling)
|
|
12
14
|
- [Method Chaining](#method-chaining)
|
|
@@ -158,6 +160,112 @@ async function processChunk(records) {
|
|
|
158
160
|
}
|
|
159
161
|
```
|
|
160
162
|
|
|
163
|
+
## Stream Processing
|
|
164
|
+
|
|
165
|
+
For true memory-efficient processing of large CSV files, use the stream API which processes data in chunks without loading the entire file into memory.
|
|
166
|
+
|
|
167
|
+
### Basic Stream Usage
|
|
168
|
+
|
|
169
|
+
```js
|
|
170
|
+
const fs = require('fs');
|
|
171
|
+
const csvToJson = require('convert-csv-to-json');
|
|
172
|
+
|
|
173
|
+
async function processLargeCSV(filePath) {
|
|
174
|
+
const stream = fs.createReadStream(filePath);
|
|
175
|
+
const jsonData = await csvToJson.getJsonFromStreamAsync(stream);
|
|
176
|
+
|
|
177
|
+
console.log(`Processed ${jsonData.length} records`);
|
|
178
|
+
return jsonData;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Usage
|
|
182
|
+
const data = await processLargeCSV('large-dataset.csv');
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Stream with Configuration
|
|
186
|
+
|
|
187
|
+
```js
|
|
188
|
+
const fs = require('fs');
|
|
189
|
+
const csvToJson = require('convert-csv-to-json');
|
|
190
|
+
|
|
191
|
+
async function processConfiguredStream(filePath) {
|
|
192
|
+
const stream = fs.createReadStream(filePath, { encoding: 'utf8' });
|
|
193
|
+
|
|
194
|
+
const jsonData = await csvToJson
|
|
195
|
+
.fieldDelimiter(';')
|
|
196
|
+
.supportQuotedField(true)
|
|
197
|
+
.getJsonFromStreamAsync(stream);
|
|
198
|
+
|
|
199
|
+
return jsonData;
|
|
200
|
+
}
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### File Streaming with getJsonFromFileStreamingAsync
|
|
204
|
+
|
|
205
|
+
For simplified file streaming without manually creating streams, use `getJsonFromFileStreamingAsync`:
|
|
206
|
+
|
|
207
|
+
```js
|
|
208
|
+
const csvToJson = require('convert-csv-to-json');
|
|
209
|
+
|
|
210
|
+
async function processLargeCSV(filePath) {
|
|
211
|
+
const jsonData = await csvToJson
|
|
212
|
+
.fieldDelimiter(';')
|
|
213
|
+
.supportQuotedField(true)
|
|
214
|
+
.getJsonFromFileStreamingAsync(filePath);
|
|
215
|
+
|
|
216
|
+
console.log(`Processed ${jsonData.length} records efficiently`);
|
|
217
|
+
return jsonData;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Usage - processes large files without loading them entirely into memory
|
|
221
|
+
const data = await processLargeCSV('large-dataset.csv');
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
### Stream from Other Sources
|
|
225
|
+
|
|
226
|
+
```js
|
|
227
|
+
const { Readable } = require('stream');
|
|
228
|
+
const csvToJson = require('convert-csv-to-json');
|
|
229
|
+
|
|
230
|
+
// Create a stream from a string
|
|
231
|
+
function createCSVStream(csvString) {
|
|
232
|
+
const stream = new Readable();
|
|
233
|
+
stream.push(csvString);
|
|
234
|
+
stream.push(null); // End the stream
|
|
235
|
+
return stream;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
async function processStringAsStream() {
|
|
239
|
+
const csvData = 'name,age\nAlice,30\nBob,25';
|
|
240
|
+
const stream = createCSVStream(csvData);
|
|
241
|
+
|
|
242
|
+
const json = await csvToJson.getJsonFromStreamAsync(stream);
|
|
243
|
+
console.log(json);
|
|
244
|
+
// Output: [{ name: 'Alice', age: '30' }, { name: 'Bob', age: '25' }]
|
|
245
|
+
}
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### File Streaming
|
|
249
|
+
|
|
250
|
+
For the most efficient processing of large CSV files, use the built-in file streaming API which handles all the complexity of chunked reading and parsing:
|
|
251
|
+
|
|
252
|
+
```js
|
|
253
|
+
const csvToJson = require('convert-csv-to-json');
|
|
254
|
+
|
|
255
|
+
async function processLargeCSV(filePath) {
|
|
256
|
+
const jsonData = await csvToJson
|
|
257
|
+
.fieldDelimiter(';')
|
|
258
|
+
.supportQuotedField(true)
|
|
259
|
+
.getJsonFromFileStreamingAsync(filePath);
|
|
260
|
+
|
|
261
|
+
console.log(`Processed ${jsonData.length} records`);
|
|
262
|
+
return jsonData;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Usage
|
|
266
|
+
const data = await processLargeCSV('large-dataset.csv');
|
|
267
|
+
```
|
|
268
|
+
|
|
161
269
|
## Batch Processing
|
|
162
270
|
|
|
163
271
|
### Sequential Processing
|
package/docs/BROWSER.md
CHANGED
|
@@ -6,6 +6,7 @@ Client-side CSV parsing for web browsers. Supports parsing CSV strings and file/
|
|
|
6
6
|
- [Basic Usage](#basic-usage)
|
|
7
7
|
- [Parsing CSV Strings](#parsing-csv-strings)
|
|
8
8
|
- [Parsing Files and Blobs](#parsing-files-and-blobs)
|
|
9
|
+
- [Streaming Large Files](#streaming-large-files)
|
|
9
10
|
- [Configuration Options](#configuration-options)
|
|
10
11
|
- [File Upload Examples](#file-upload-examples)
|
|
11
12
|
- [TypeScript Support](#typescript-support)
|
|
@@ -122,6 +123,57 @@ async function parseWithEncoding(file) {
|
|
|
122
123
|
}
|
|
123
124
|
```
|
|
124
125
|
|
|
126
|
+
## Streaming Large Files
|
|
127
|
+
|
|
128
|
+
For memory-efficient processing of large CSV files in browsers, use the streaming API which processes data in chunks without loading the entire file into memory.
|
|
129
|
+
|
|
130
|
+
### Stream from ReadableStream
|
|
131
|
+
|
|
132
|
+
```js
|
|
133
|
+
const convert = require('convert-csv-to-json');
|
|
134
|
+
|
|
135
|
+
async function processStream(stream) {
|
|
136
|
+
const jsonData = await convert.browser
|
|
137
|
+
.fieldDelimiter(';')
|
|
138
|
+
.supportQuotedField(true)
|
|
139
|
+
.getJsonFromStreamAsync(stream);
|
|
140
|
+
|
|
141
|
+
console.log(`Processed ${jsonData.length} records`);
|
|
142
|
+
return jsonData;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Usage with fetch
|
|
146
|
+
const response = await fetch('large-dataset.csv');
|
|
147
|
+
const stream = response.body;
|
|
148
|
+
const data = await processStream(stream);
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### Stream from File Object
|
|
152
|
+
|
|
153
|
+
```js
|
|
154
|
+
const convert = require('convert-csv-to-json');
|
|
155
|
+
|
|
156
|
+
async function processLargeFile(file) {
|
|
157
|
+
const jsonData = await convert.browser
|
|
158
|
+
.fieldDelimiter(',')
|
|
159
|
+
.formatValueByType()
|
|
160
|
+
.getJsonFromFileStreamingAsync(file);
|
|
161
|
+
|
|
162
|
+
console.log(`Streamed and processed ${jsonData.length} records`);
|
|
163
|
+
return jsonData;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Usage with file input
|
|
167
|
+
const fileInput = document.querySelector('#csvfile');
|
|
168
|
+
fileInput.addEventListener('change', async (event) => {
|
|
169
|
+
const file = event.target.files[0];
|
|
170
|
+
const data = await processLargeFile(file);
|
|
171
|
+
console.log(data);
|
|
172
|
+
});
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
**Note:** Streaming requires modern browsers that support the `ReadableStream` API (Chrome 43+, Firefox 65+, Safari 10.1+). For older browsers, the method falls back to regular file parsing.
|
|
176
|
+
|
|
125
177
|
## Configuration Options
|
|
126
178
|
|
|
127
179
|
All configuration methods from the [Sync API](SYNC.md) are available:
|
package/index.d.ts
CHANGED
|
@@ -99,10 +99,21 @@ declare module 'convert-csv-to-json' {
|
|
|
99
99
|
*/
|
|
100
100
|
getJsonFromCsv(inputFileName: string): any[];
|
|
101
101
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
102
|
+
/**
|
|
103
|
+
* Async version of getJsonFromCsv. When options.raw is true the input is treated as a CSV string
|
|
104
|
+
*/
|
|
105
|
+
getJsonFromCsvAsync(inputFileNameOrCsv: string, options?: { raw?: boolean }): Promise<any[]>;
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Parse CSV from a Readable stream and return parsed data as JSON array
|
|
109
|
+
* Processes data in chunks for memory-efficient handling of large files
|
|
110
|
+
*/
|
|
111
|
+
getJsonFromStreamAsync(stream: NodeJS.ReadableStream): Promise<any[]>;
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Parse CSV from a file path using streaming for memory-efficient processing
|
|
115
|
+
*/
|
|
116
|
+
getJsonFromFileStreamingAsync(filePath: string): Promise<any[]>;
|
|
106
117
|
|
|
107
118
|
csvStringToJson(csvString: string): any[];
|
|
108
119
|
|
|
@@ -139,6 +150,16 @@ declare module 'convert-csv-to-json' {
|
|
|
139
150
|
* Parse a File or Blob and return a Promise that resolves to the JSON array
|
|
140
151
|
*/
|
|
141
152
|
parseFile(file: Blob | File, options?: { encoding?: string }): Promise<any[]>;
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Parse CSV from a ReadableStream and return parsed data as JSON array
|
|
156
|
+
*/
|
|
157
|
+
getJsonFromStreamAsync(stream: any): Promise<any[]>;
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Parse CSV from a File object using streaming for memory-efficient processing
|
|
161
|
+
*/
|
|
162
|
+
getJsonFromFileStreamingAsync(file: File): Promise<any[]>;
|
|
142
163
|
}
|
|
143
164
|
|
|
144
165
|
export const browser: BrowserApi;
|
package/index.js
CHANGED
|
@@ -253,6 +253,36 @@ exports.getJsonFromCsv = function(inputFileName) {
|
|
|
253
253
|
*/
|
|
254
254
|
const csvToJsonAsync = require('./src/csvToJsonAsync');
|
|
255
255
|
|
|
256
|
+
/**
|
|
257
|
+
* Parse CSV from a Readable stream and return parsed data as JSON array
|
|
258
|
+
* Processes data in chunks for memory-efficient handling of large files
|
|
259
|
+
* @param {object} stream - Node.js Readable stream containing CSV data
|
|
260
|
+
* @returns {Promise<Array<object>>} Promise resolving to array of objects representing CSV rows
|
|
261
|
+
* @throws {InputValidationError} If stream is invalid
|
|
262
|
+
* @throws {CsvFormatError} If CSV is malformed
|
|
263
|
+
* @category 1-Core API
|
|
264
|
+
* @example
|
|
265
|
+
* const fs = require('fs');
|
|
266
|
+
* const csvToJson = require('convert-csv-to-json');
|
|
267
|
+
* const stream = fs.createReadStream('large.csv');
|
|
268
|
+
* const data = await csvToJson.getJsonFromStreamAsync(stream);
|
|
269
|
+
* console.log(data);
|
|
270
|
+
*/
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Parse CSV from a file path using streaming for memory-efficient processing
|
|
274
|
+
* @param {string} filePath - Path to the CSV file
|
|
275
|
+
* @returns {Promise<Array<object>>} Promise resolving to array of objects representing CSV rows
|
|
276
|
+
* @throws {InputValidationError} If filePath is invalid
|
|
277
|
+
* @throws {FileOperationError} If file cannot be read
|
|
278
|
+
* @throws {CsvFormatError} If CSV is malformed
|
|
279
|
+
* @category 1-Core API
|
|
280
|
+
* @example
|
|
281
|
+
* const csvToJson = require('convert-csv-to-json');
|
|
282
|
+
* const data = await csvToJson.getJsonFromFileStreamingAsync('large.csv');
|
|
283
|
+
* console.log(data);
|
|
284
|
+
*/
|
|
285
|
+
|
|
256
286
|
// Re-export all async API methods
|
|
257
287
|
Object.assign(exports, {
|
|
258
288
|
getJsonFromCsvAsync: function(input, options) {
|
|
@@ -266,6 +296,12 @@ Object.assign(exports, {
|
|
|
266
296
|
},
|
|
267
297
|
generateJsonFileFromCsvAsync: function(input, output) {
|
|
268
298
|
return csvToJsonAsync.generateJsonFileFromCsv(input, output);
|
|
299
|
+
},
|
|
300
|
+
getJsonFromStreamAsync: function(stream) {
|
|
301
|
+
return csvToJsonAsync.getJsonFromStreamAsync(stream);
|
|
302
|
+
},
|
|
303
|
+
getJsonFromFileStreamingAsync: function(filePath) {
|
|
304
|
+
return csvToJsonAsync.getJsonFromFileStreamingAsync(filePath);
|
|
269
305
|
}
|
|
270
306
|
});
|
|
271
307
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "convert-csv-to-json",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.37.0",
|
|
4
4
|
"description": "Convert CSV to JSON",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.d.ts",
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
"test-debug": "node --inspect-brk node_modules/.bin/jest --runInBand --detectOpenHandles",
|
|
10
10
|
"lint": "eslint .",
|
|
11
11
|
"docs": "jsdoc -c jsdoc.json",
|
|
12
|
-
"docs:api": "jsdoc -c jsdoc.json -d
|
|
12
|
+
"docs:api": "jsdoc -c jsdoc.json -d demo/api",
|
|
13
13
|
"prepublishOnly": "npm run docs:api",
|
|
14
14
|
"version-patch": "npm version patch",
|
|
15
15
|
"version-minor": "npm version minor",
|
|
@@ -52,6 +52,7 @@
|
|
|
52
52
|
"@eslint/js": "^10.0.1",
|
|
53
53
|
"@types/jest": "^30.0.0",
|
|
54
54
|
"better-docs": "^2.7.3",
|
|
55
|
+
"browserify": "^17.0.1",
|
|
55
56
|
"eslint": "^10.1.0",
|
|
56
57
|
"eslint-plugin-jsdoc": "^62.8.0",
|
|
57
58
|
"jest": "^30.2.0",
|
package/src/browserApi.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
const csvToJson = require('./csvToJson');
|
|
6
6
|
const { InputValidationError, BrowserApiError } = require('./util/errors');
|
|
7
|
+
const StreamProcessor = require('./streamProcessor');
|
|
7
8
|
|
|
8
9
|
/**
|
|
9
10
|
* Browser-friendly CSV to JSON API
|
|
@@ -216,6 +217,203 @@ class BrowserApi {
|
|
|
216
217
|
}
|
|
217
218
|
});
|
|
218
219
|
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Parse CSV from a browser ReadableStream and return parsed data as JSON array
|
|
223
|
+
* Processes data in chunks for memory-efficient handling of large streams
|
|
224
|
+
* @param {object} stream - Browser ReadableStream containing CSV data
|
|
225
|
+
* @returns {Promise<Array<object>>} Promise resolving to array of objects representing CSV rows
|
|
226
|
+
* @throws {InputValidationError} If stream is invalid
|
|
227
|
+
* @throws {BrowserApiError} If streaming is not supported or parsing fails
|
|
228
|
+
* @example
|
|
229
|
+
* const csvToJson = require('convert-csv-to-json');
|
|
230
|
+
* const response = await fetch('large-dataset.csv');
|
|
231
|
+
* const stream = response.body;
|
|
232
|
+
* const data = await csvToJson.browser.getJsonFromStreamAsync(stream);
|
|
233
|
+
* console.log(data);
|
|
234
|
+
*/
|
|
235
|
+
async getJsonFromStreamAsync(stream) {
|
|
236
|
+
if (typeof ReadableStream === 'undefined') {
|
|
237
|
+
throw BrowserApiError.streamingNotSupported();
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if (!stream || typeof stream.getReader !== 'function') {
|
|
241
|
+
throw new InputValidationError(
|
|
242
|
+
'stream',
|
|
243
|
+
'ReadableStream',
|
|
244
|
+
typeof stream,
|
|
245
|
+
'Provide a valid browser ReadableStream.'
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
const streamProcessor = new StreamProcessor(this.csvToJson, { isBrowser: true });
|
|
250
|
+
return streamProcessor.processStream(stream);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Parse CSV from a File object using streaming for memory-efficient processing
|
|
255
|
+
* @param {File} file - File object containing CSV data
|
|
256
|
+
* @returns {Promise<Array<object>>} Promise resolving to array of objects representing CSV rows
|
|
257
|
+
* @throws {InputValidationError} If file is invalid
|
|
258
|
+
* @throws {BrowserApiError} If streaming is not supported or parsing fails
|
|
259
|
+
* @example
|
|
260
|
+
* const csvToJson = require('convert-csv-to-json');
|
|
261
|
+
* const fileInput = document.querySelector('#csvfile').files[0];
|
|
262
|
+
* const data = await csvToJson.browser.getJsonFromFileStreamingAsync(fileInput);
|
|
263
|
+
* console.log(data);
|
|
264
|
+
*/
|
|
265
|
+
async getJsonFromFileStreamingAsync(file) {
|
|
266
|
+
if (!file || !(file instanceof File)) {
|
|
267
|
+
throw new InputValidationError(
|
|
268
|
+
'file',
|
|
269
|
+
'File object',
|
|
270
|
+
typeof file,
|
|
271
|
+
'Provide a valid File object.'
|
|
272
|
+
);
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// Check if the file supports streaming
|
|
276
|
+
if (typeof file.stream === 'function') {
|
|
277
|
+
// Use native streaming if available
|
|
278
|
+
const stream = file.stream();
|
|
279
|
+
return this.getJsonFromStreamAsync(stream);
|
|
280
|
+
} else {
|
|
281
|
+
// Fallback to regular file parsing for older browsers
|
|
282
|
+
return this.parseFile(file);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Parse CSV from a File object using streaming with progress callbacks for large files
|
|
288
|
+
* Processes data in chunks to avoid memory issues with large datasets
|
|
289
|
+
* @param {File} file - File object containing CSV data
|
|
290
|
+
* @param {object} options - Processing options
|
|
291
|
+
* @param {function(Array<object>, number, number): void} options.onChunk - Callback for each chunk of processed rows
|
|
292
|
+
* @param {function(Array<object>): void} [options.onComplete] - Callback when processing is complete
|
|
293
|
+
* @param {function(Error): void} [options.onError] - Callback for errors
|
|
294
|
+
* @param {number} [options.chunkSize=1000] - Number of rows per chunk
|
|
295
|
+
* @returns {Promise<void>} Promise that resolves when streaming starts
|
|
296
|
+
* @throws {InputValidationError} If file or options are invalid
|
|
297
|
+
* @example
|
|
298
|
+
* const csvToJson = require('convert-csv-to-json');
|
|
299
|
+
* const fileInput = document.querySelector('#csvfile').files[0];
|
|
300
|
+
*
|
|
301
|
+
* await csvToJson.browser.getJsonFromFileStreamingAsyncWithCallback(fileInput, {
|
|
302
|
+
* chunkSize: 500,
|
|
303
|
+
* onChunk: (rows, processed, total) => {
|
|
304
|
+
* console.log(`Processed ${processed}/${total} rows`);
|
|
305
|
+
* // Handle chunk of rows here
|
|
306
|
+
* },
|
|
307
|
+
* onComplete: (allRows) => {
|
|
308
|
+
* console.log('Processing complete!');
|
|
309
|
+
* },
|
|
310
|
+
* onError: (error) => {
|
|
311
|
+
* console.error('Error:', error);
|
|
312
|
+
* }
|
|
313
|
+
* });
|
|
314
|
+
*/
|
|
315
|
+
async getJsonFromFileStreamingAsyncWithCallback(file, options = {}) {
|
|
316
|
+
if (!file || !(file instanceof File)) {
|
|
317
|
+
throw new InputValidationError(
|
|
318
|
+
'file',
|
|
319
|
+
'File object',
|
|
320
|
+
typeof file,
|
|
321
|
+
'Provide a valid File object.'
|
|
322
|
+
);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
if (!options.onChunk || typeof options.onChunk !== 'function') {
|
|
326
|
+
throw new InputValidationError(
|
|
327
|
+
'options.onChunk',
|
|
328
|
+
'function',
|
|
329
|
+
typeof options.onChunk,
|
|
330
|
+
'Provide a callback function to handle processed chunks.'
|
|
331
|
+
);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
const chunkSize = options.chunkSize || 1000;
|
|
335
|
+
const streamProcessor = new StreamProcessor(this.csvToJson, {
|
|
336
|
+
isBrowser: true,
|
|
337
|
+
chunkSize,
|
|
338
|
+
onChunk: options.onChunk,
|
|
339
|
+
onComplete: options.onComplete,
|
|
340
|
+
onError: options.onError
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
// Check if the file supports streaming
|
|
344
|
+
if (typeof file.stream === 'function') {
|
|
345
|
+
// Use native streaming if available
|
|
346
|
+
const stream = file.stream();
|
|
347
|
+
return streamProcessor.processStreamWithCallbacks(stream);
|
|
348
|
+
} else {
|
|
349
|
+
// Fallback to regular file parsing for older browsers
|
|
350
|
+
return this.parseFileWithCallbacks(file, options);
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Parse a File object with progress callbacks (fallback for non-streaming browsers)
|
|
356
|
+
* @param {File} file - File object to parse
|
|
357
|
+
* @param {object} options - Processing options
|
|
358
|
+
* @private
|
|
359
|
+
*/
|
|
360
|
+
async parseFileWithCallbacks(file, options) {
|
|
361
|
+
const chunkSize = options.chunkSize || 1000;
|
|
362
|
+
const onChunk = options.onChunk;
|
|
363
|
+
const onComplete = options.onComplete;
|
|
364
|
+
const onError = options.onError;
|
|
365
|
+
|
|
366
|
+
return new Promise((resolve, reject) => {
|
|
367
|
+
if (typeof FileReader === 'undefined') {
|
|
368
|
+
const error = BrowserApiError.fileReaderNotAvailable();
|
|
369
|
+
if (onError) onError(error);
|
|
370
|
+
reject(error);
|
|
371
|
+
return;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
const reader = new FileReader();
|
|
375
|
+
reader.onerror = () => {
|
|
376
|
+
const error = BrowserApiError.parseFileError(
|
|
377
|
+
reader.error || new Error('Unknown file reading error')
|
|
378
|
+
);
|
|
379
|
+
if (onError) onError(error);
|
|
380
|
+
reject(error);
|
|
381
|
+
};
|
|
382
|
+
|
|
383
|
+
reader.onload = () => {
|
|
384
|
+
try {
|
|
385
|
+
const text = reader.result;
|
|
386
|
+
const allRows = this.csvToJson.csvToJson(String(text));
|
|
387
|
+
|
|
388
|
+
// Process in chunks
|
|
389
|
+
let processed = 0;
|
|
390
|
+
const total = allRows.length;
|
|
391
|
+
|
|
392
|
+
const processChunk = () => {
|
|
393
|
+
const chunk = allRows.slice(processed, processed + chunkSize);
|
|
394
|
+
if (chunk.length > 0) {
|
|
395
|
+
onChunk(chunk, processed + chunk.length, total);
|
|
396
|
+
processed += chunk.length;
|
|
397
|
+
// Use setTimeout to avoid blocking the UI
|
|
398
|
+
setTimeout(processChunk, 0);
|
|
399
|
+
} else {
|
|
400
|
+
if (onComplete) onComplete(allRows);
|
|
401
|
+
resolve();
|
|
402
|
+
}
|
|
403
|
+
};
|
|
404
|
+
|
|
405
|
+
processChunk();
|
|
406
|
+
} catch (err) {
|
|
407
|
+
const error = BrowserApiError.parseFileError(err);
|
|
408
|
+
if (onError) onError(error);
|
|
409
|
+
reject(error);
|
|
410
|
+
}
|
|
411
|
+
};
|
|
412
|
+
|
|
413
|
+
reader.readAsText(file);
|
|
414
|
+
});
|
|
415
|
+
}
|
|
416
|
+
|
|
219
417
|
}
|
|
220
418
|
|
|
221
|
-
module.exports = new BrowserApi();
|
|
419
|
+
module.exports = new BrowserApi();
|
package/src/csvToJsonAsync.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
const fileUtils = require('./util/fileUtils');
|
|
5
5
|
const csvToJson = require('./csvToJson');
|
|
6
6
|
const { InputValidationError } = require('./util/errors');
|
|
7
|
+
const StreamProcessor = require('./streamProcessor');
|
|
7
8
|
|
|
8
9
|
/**
|
|
9
10
|
* Asynchronous CSV to JSON converter
|
|
@@ -177,18 +178,69 @@ class CsvToJsonAsync {
|
|
|
177
178
|
}
|
|
178
179
|
|
|
179
180
|
/**
|
|
180
|
-
* Parse CSV
|
|
181
|
-
*
|
|
182
|
-
* @
|
|
181
|
+
* Parse CSV from a Readable stream and return parsed data as JSON array
|
|
182
|
+
* Processes data in chunks for memory-efficient handling of large files
|
|
183
|
+
* @param {object} stream - Node.js Readable stream containing CSV data
|
|
184
|
+
* @returns {Promise<Array<object>>} Promise resolving to array of objects representing CSV rows
|
|
185
|
+
* @throws {InputValidationError} If stream is invalid
|
|
183
186
|
* @throws {CsvFormatError} If CSV is malformed
|
|
184
187
|
* @example
|
|
188
|
+
* const fs = require('fs');
|
|
185
189
|
* const csvToJson = require('convert-csv-to-json');
|
|
186
|
-
* const
|
|
187
|
-
*
|
|
190
|
+
* const stream = fs.createReadStream('large.csv');
|
|
191
|
+
* const data = await csvToJson.getJsonFromStreamAsync(stream);
|
|
192
|
+
* console.log(data);
|
|
188
193
|
*/
|
|
189
|
-
async
|
|
190
|
-
|
|
191
|
-
|
|
194
|
+
async getJsonFromStreamAsync(stream) {
|
|
195
|
+
this._validateStream(stream);
|
|
196
|
+
|
|
197
|
+
const streamProcessor = new StreamProcessor(this.csvToJson, { isBrowser: false });
|
|
198
|
+
return streamProcessor.processStream(stream);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Validate that the provided stream is a valid Readable stream
|
|
203
|
+
* @param {object} stream - The stream to validate
|
|
204
|
+
* @throws {InputValidationError} If stream is invalid
|
|
205
|
+
* @private
|
|
206
|
+
*/
|
|
207
|
+
_validateStream(stream) {
|
|
208
|
+
if (!stream || typeof stream.pipe !== 'function') {
|
|
209
|
+
throw new InputValidationError(
|
|
210
|
+
'stream',
|
|
211
|
+
'Readable stream',
|
|
212
|
+
typeof stream,
|
|
213
|
+
'Provide a valid Node.js Readable stream.'
|
|
214
|
+
);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Parse CSV from a file path using streaming for memory-efficient processing
|
|
220
|
+
* @param {string} filePath - Path to the CSV file
|
|
221
|
+
* @returns {Promise<Array<object>>} Promise resolving to array of objects representing CSV rows
|
|
222
|
+
* @throws {InputValidationError} If filePath is invalid
|
|
223
|
+
* @throws {FileOperationError} If file cannot be read
|
|
224
|
+
* @throws {CsvFormatError} If CSV is malformed
|
|
225
|
+
* @example
|
|
226
|
+
* const csvToJson = require('convert-csv-to-json');
|
|
227
|
+
* const data = await csvToJson.getJsonFromFileStreamingAsync('large.csv');
|
|
228
|
+
* console.log(data);
|
|
229
|
+
*/
|
|
230
|
+
async getJsonFromFileStreamingAsync(filePath) {
|
|
231
|
+
if (!filePath || typeof filePath !== 'string') {
|
|
232
|
+
throw new InputValidationError(
|
|
233
|
+
'filePath',
|
|
234
|
+
'string (file path)',
|
|
235
|
+
typeof filePath,
|
|
236
|
+
'Provide a valid file path as a string.'
|
|
237
|
+
);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
const fs = require('fs');
|
|
241
|
+
const encoding = typeof this.csvToJson.encoding === 'string' ? this.csvToJson.encoding : 'utf8';
|
|
242
|
+
const stream = fs.createReadStream(filePath, { encoding });
|
|
243
|
+
return this.getJsonFromStreamAsync(stream);
|
|
192
244
|
}
|
|
193
245
|
}
|
|
194
246
|
|
|
@@ -0,0 +1,469 @@
|
|
|
1
|
+
/* globals CsvFormatError */
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const stringUtils = require('./util/stringUtils');
|
|
5
|
+
|
|
6
|
+
const QUOTE_CHAR = '"';
|
|
7
|
+
const CRLF = '\r\n';
|
|
8
|
+
const LF = '\n';
|
|
9
|
+
const CR = '\r';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Handles the processing of CSV data from a stream
|
|
13
|
+
* Encapsulates all stream processing logic following single responsibility principle
|
|
14
|
+
* Works with both Node.js streams and browser ReadableStream
|
|
15
|
+
* @private
|
|
16
|
+
*/
|
|
17
|
+
class StreamProcessor {
|
|
18
|
+
/**
|
|
19
|
+
* Initialize the stream processor with CSV configuration
|
|
20
|
+
* @param {object} csvConfig - The CSV configuration object
|
|
21
|
+
* @param {object} options - Environment options
|
|
22
|
+
* @param {boolean} options.isBrowser - Whether running in browser environment
|
|
23
|
+
* @param {number} options.chunkSize - Number of rows per chunk for callback processing
|
|
24
|
+
* @param {function} options.onChunk - Callback for each chunk
|
|
25
|
+
* @param {function} options.onComplete - Callback when processing complete
|
|
26
|
+
* @param {function} options.onError - Callback for errors
|
|
27
|
+
*/
|
|
28
|
+
constructor(csvConfig, options = {}) {
|
|
29
|
+
this.csvConfig = csvConfig;
|
|
30
|
+
this.isBrowser = options.isBrowser || (typeof window !== 'undefined' && typeof document !== 'undefined');
|
|
31
|
+
this.buffer = '';
|
|
32
|
+
this.isInsideQuotes = false;
|
|
33
|
+
this.headers = null;
|
|
34
|
+
this.headerRowIndex = csvConfig.getIndexHeader();
|
|
35
|
+
this.currentRecordIndex = 0;
|
|
36
|
+
this.parsedRecords = [];
|
|
37
|
+
this.dataRowIndex = 0;
|
|
38
|
+
|
|
39
|
+
// Chunked processing options
|
|
40
|
+
this.chunkSize = options.chunkSize || 1000;
|
|
41
|
+
this.onChunk = options.onChunk;
|
|
42
|
+
this.onComplete = options.onComplete;
|
|
43
|
+
this.onError = options.onError;
|
|
44
|
+
this.allRecords = []; // For collecting all records when using callbacks
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Process a chunk of data from the stream
|
|
49
|
+
* @param {Buffer|string|Uint8Array} chunk - The data chunk to process
|
|
50
|
+
*/
|
|
51
|
+
processChunk(chunk) {
|
|
52
|
+
// Convert chunk to string, handling both Node.js Buffers and browser Uint8Array
|
|
53
|
+
let chunkString;
|
|
54
|
+
if (typeof chunk === 'string') {
|
|
55
|
+
chunkString = chunk;
|
|
56
|
+
} else if (this.isBrowser && typeof globalThis.TextDecoder !== 'undefined') {
|
|
57
|
+
chunkString = new globalThis.TextDecoder().decode(chunk);
|
|
58
|
+
} else if (this.isBrowser) {
|
|
59
|
+
// Fallback for older browsers without TextDecoder
|
|
60
|
+
chunkString = String.fromCharCode.apply(null, new Uint8Array(chunk));
|
|
61
|
+
} else {
|
|
62
|
+
// Node.js environment
|
|
63
|
+
chunkString = chunk.toString();
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
this.buffer += chunkString;
|
|
67
|
+
this._processCompleteRecords();
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Process a stream with chunked callbacks (for large files)
|
|
72
|
+
* @param {Readable|ReadableStream} stream - The stream to process
|
|
73
|
+
* @returns {Promise<void>} Promise that resolves when streaming starts
|
|
74
|
+
*/
|
|
75
|
+
async processStreamWithCallbacks(stream) {
|
|
76
|
+
return new Promise((resolve, reject) => {
|
|
77
|
+
if (this.isBrowser) {
|
|
78
|
+
// Browser ReadableStream
|
|
79
|
+
if (!stream || typeof stream.getReader !== 'function') {
|
|
80
|
+
const error = new Error('Invalid ReadableStream provided');
|
|
81
|
+
if (this.onError) this.onError(error);
|
|
82
|
+
reject(error);
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const reader = stream.getReader();
|
|
87
|
+
|
|
88
|
+
const processChunk = async () => {
|
|
89
|
+
try {
|
|
90
|
+
while (true) {
|
|
91
|
+
const { done, value } = await reader.read();
|
|
92
|
+
|
|
93
|
+
if (done) {
|
|
94
|
+
this.finalizeProcessing();
|
|
95
|
+
this._sendRemainingChunks();
|
|
96
|
+
if (this.onComplete) this.onComplete(this.allRecords);
|
|
97
|
+
resolve();
|
|
98
|
+
return;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
this.processChunk(value);
|
|
102
|
+
this._sendPendingChunks();
|
|
103
|
+
}
|
|
104
|
+
} catch (error) {
|
|
105
|
+
if (this.onError) this.onError(error);
|
|
106
|
+
reject(error);
|
|
107
|
+
}
|
|
108
|
+
};
|
|
109
|
+
|
|
110
|
+
processChunk();
|
|
111
|
+
} else {
|
|
112
|
+
// Node.js Readable stream
|
|
113
|
+
if (!stream || typeof stream.pipe !== 'function') {
|
|
114
|
+
const error = new Error('Invalid Readable stream provided');
|
|
115
|
+
if (this.onError) this.onError(error);
|
|
116
|
+
reject(error);
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
stream.on('data', (chunk) => {
|
|
121
|
+
try {
|
|
122
|
+
this.processChunk(chunk);
|
|
123
|
+
this._sendPendingChunks();
|
|
124
|
+
} catch (error) {
|
|
125
|
+
if (this.onError) this.onError(error);
|
|
126
|
+
reject(error);
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
stream.on('end', () => {
|
|
131
|
+
try {
|
|
132
|
+
this.finalizeProcessing();
|
|
133
|
+
this._sendRemainingChunks();
|
|
134
|
+
if (this.onComplete) this.onComplete(this.allRecords);
|
|
135
|
+
resolve();
|
|
136
|
+
} catch (error) {
|
|
137
|
+
if (this.onError) this.onError(error);
|
|
138
|
+
reject(error);
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
stream.on('error', (error) => {
|
|
143
|
+
if (this.onError) this.onError(error);
|
|
144
|
+
reject(error);
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Send pending chunks when they reach the chunk size
|
|
152
|
+
* @private
|
|
153
|
+
*/
|
|
154
|
+
_sendPendingChunks() {
|
|
155
|
+
if (!this.onChunk) return;
|
|
156
|
+
|
|
157
|
+
while (this.parsedRecords.length >= this.chunkSize) {
|
|
158
|
+
const chunk = this.parsedRecords.splice(0, this.chunkSize);
|
|
159
|
+
this.allRecords.push(...chunk);
|
|
160
|
+
this.onChunk(chunk, this.allRecords.length, null); // null for total when streaming
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Send any remaining chunks at the end of processing
|
|
166
|
+
* @private
|
|
167
|
+
*/
|
|
168
|
+
_sendRemainingChunks() {
|
|
169
|
+
if (!this.onChunk || this.parsedRecords.length === 0) return;
|
|
170
|
+
|
|
171
|
+
const chunk = [...this.parsedRecords];
|
|
172
|
+
this.parsedRecords.length = 0; // Clear the array
|
|
173
|
+
this.allRecords.push(...chunk);
|
|
174
|
+
this.onChunk(chunk, this.allRecords.length, this.allRecords.length);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Process a stream directly (unified interface for both environments)
|
|
179
|
+
* @param {Readable|ReadableStream} stream - The stream to process
|
|
180
|
+
* @returns {Promise<Array<object>>} Promise resolving to parsed records
|
|
181
|
+
*/
|
|
182
|
+
async processStream(stream) {
|
|
183
|
+
return new Promise((resolve, reject) => {
|
|
184
|
+
if (this.isBrowser) {
|
|
185
|
+
// Browser ReadableStream
|
|
186
|
+
if (!stream || typeof stream.getReader !== 'function') {
|
|
187
|
+
reject(new Error('Invalid ReadableStream provided'));
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const reader = stream.getReader();
|
|
192
|
+
|
|
193
|
+
const processChunk = async () => {
|
|
194
|
+
try {
|
|
195
|
+
while (true) {
|
|
196
|
+
const { done, value } = await reader.read();
|
|
197
|
+
|
|
198
|
+
if (done) {
|
|
199
|
+
this.finalizeProcessing();
|
|
200
|
+
resolve(this.getResult());
|
|
201
|
+
return;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
this.processChunk(value);
|
|
205
|
+
}
|
|
206
|
+
} catch (error) {
|
|
207
|
+
reject(error);
|
|
208
|
+
}
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
processChunk();
|
|
212
|
+
} else {
|
|
213
|
+
// Node.js Readable stream
|
|
214
|
+
if (!stream || typeof stream.pipe !== 'function') {
|
|
215
|
+
reject(new Error('Invalid Readable stream provided'));
|
|
216
|
+
return;
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
stream.on('data', (chunk) => {
|
|
220
|
+
try {
|
|
221
|
+
this.processChunk(chunk);
|
|
222
|
+
} catch (error) {
|
|
223
|
+
reject(error);
|
|
224
|
+
}
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
stream.on('end', () => {
|
|
228
|
+
try {
|
|
229
|
+
this.finalizeProcessing();
|
|
230
|
+
resolve(this.getResult());
|
|
231
|
+
} catch (error) {
|
|
232
|
+
reject(error);
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
stream.on('error', (error) => {
|
|
237
|
+
reject(error);
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Finalize processing when the stream ends
|
|
245
|
+
*/
|
|
246
|
+
finalizeProcessing() {
|
|
247
|
+
this._processRemainingBuffer();
|
|
248
|
+
this._validateProcessingResult();
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Get the final processed result
|
|
253
|
+
* @returns {Array<object>} Array of parsed JSON objects
|
|
254
|
+
*/
|
|
255
|
+
getResult() {
|
|
256
|
+
return this.parsedRecords;
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Process all complete records currently in the buffer
|
|
261
|
+
* @private
|
|
262
|
+
*/
|
|
263
|
+
_processCompleteRecords() {
|
|
264
|
+
const parseResult = this._parseRecordsFromBuffer(this.buffer, this.isInsideQuotes);
|
|
265
|
+
|
|
266
|
+
this.buffer = parseResult.remainingBuffer;
|
|
267
|
+
this.isInsideQuotes = parseResult.isInsideQuotes;
|
|
268
|
+
|
|
269
|
+
for (const record of parseResult.completeRecords) {
|
|
270
|
+
this._processRecord(record);
|
|
271
|
+
this.currentRecordIndex++;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Process any remaining buffer content when stream ends
|
|
277
|
+
* @private
|
|
278
|
+
*/
|
|
279
|
+
_processRemainingBuffer() {
|
|
280
|
+
if (this.buffer.length > 0) {
|
|
281
|
+
if (this.isInsideQuotes) {
|
|
282
|
+
throw CsvFormatError.mismatchedQuotes('CSV stream');
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
const parseResult = this._parseRecordsFromBuffer(this.buffer + '\n', false);
|
|
286
|
+
|
|
287
|
+
for (const record of parseResult.completeRecords) {
|
|
288
|
+
this._processRecord(record);
|
|
289
|
+
this.currentRecordIndex++;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/**
|
|
295
|
+
* Process a single CSV record
|
|
296
|
+
* @param {string} record - The CSV record to process
|
|
297
|
+
* @private
|
|
298
|
+
*/
|
|
299
|
+
_processRecord(record) {
|
|
300
|
+
if (this.headers === null && this.currentRecordIndex === this.headerRowIndex) {
|
|
301
|
+
this._processHeaderRecord(record);
|
|
302
|
+
} else if (this.headers !== null) {
|
|
303
|
+
this._processDataRecord(record);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Process a header record
|
|
309
|
+
* @param {string} record - The header record
|
|
310
|
+
* @private
|
|
311
|
+
*/
|
|
312
|
+
_processHeaderRecord(record) {
|
|
313
|
+
const headerFields = this._splitRecord(record);
|
|
314
|
+
if (stringUtils.hasContent(headerFields)) {
|
|
315
|
+
this.headers = headerFields;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Process a data record
|
|
321
|
+
* @param {string} record - The data record
|
|
322
|
+
* @private
|
|
323
|
+
*/
|
|
324
|
+
_processDataRecord(record) {
|
|
325
|
+
const dataFields = this._splitRecord(record);
|
|
326
|
+
if (stringUtils.hasContent(dataFields)) {
|
|
327
|
+
const row = this.csvConfig.buildJsonResult(this.headers, dataFields);
|
|
328
|
+
const processedRow = this._applyRowMapper(row);
|
|
329
|
+
if (processedRow !== null) {
|
|
330
|
+
this.parsedRecords.push(processedRow);
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Apply row mapper function if configured
|
|
337
|
+
* @param {object} row - The parsed row object
|
|
338
|
+
* @returns {object|null} The processed row or null if filtered out
|
|
339
|
+
* @private
|
|
340
|
+
*/
|
|
341
|
+
_applyRowMapper(row) {
|
|
342
|
+
if (this.csvConfig.rowMapper) {
|
|
343
|
+
const mappedRow = this.csvConfig.rowMapper(row, this.dataRowIndex);
|
|
344
|
+
this.dataRowIndex++;
|
|
345
|
+
return mappedRow;
|
|
346
|
+
}
|
|
347
|
+
this.dataRowIndex++;
|
|
348
|
+
return row;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Split a CSV record into fields based on configuration
|
|
353
|
+
* @param {string} record - The record to split
|
|
354
|
+
* @returns {string[]} Array of field values
|
|
355
|
+
* @private
|
|
356
|
+
*/
|
|
357
|
+
_splitRecord(record) {
|
|
358
|
+
if (this.csvConfig.isSupportQuotedField) {
|
|
359
|
+
return this.csvConfig.split(record);
|
|
360
|
+
}
|
|
361
|
+
return record.split(this.csvConfig.delimiter || ',');
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
/**
|
|
365
|
+
* Parse complete records from buffer, handling quoted fields across chunks
|
|
366
|
+
* @param {string} buffer - Current buffer content
|
|
367
|
+
* @param {boolean} insideQuotes - Whether we're currently inside quotes
|
|
368
|
+
* @returns {object} Object with completeRecords array and remaining buffer/quote state
|
|
369
|
+
* @private
|
|
370
|
+
*/
|
|
371
|
+
_parseRecordsFromBuffer(buffer, insideQuotes) {
|
|
372
|
+
const completeRecords = [];
|
|
373
|
+
let currentRecord = '';
|
|
374
|
+
let i = 0;
|
|
375
|
+
|
|
376
|
+
while (i < buffer.length) {
|
|
377
|
+
const char = buffer[i];
|
|
378
|
+
|
|
379
|
+
if (char === QUOTE_CHAR) {
|
|
380
|
+
const escapedQuoteResult = this._handleEscapedQuote(buffer, i, insideQuotes);
|
|
381
|
+
if (escapedQuoteResult.wasEscaped) {
|
|
382
|
+
currentRecord += QUOTE_CHAR + QUOTE_CHAR;
|
|
383
|
+
i = escapedQuoteResult.newIndex;
|
|
384
|
+
continue;
|
|
385
|
+
} else {
|
|
386
|
+
insideQuotes = !insideQuotes;
|
|
387
|
+
}
|
|
388
|
+
} else if (!insideQuotes && this._isLineEnding(buffer, i)) {
|
|
389
|
+
const lineEndingLength = this._getLineEndingLength(buffer, i);
|
|
390
|
+
completeRecords.push(currentRecord);
|
|
391
|
+
currentRecord = '';
|
|
392
|
+
i += lineEndingLength;
|
|
393
|
+
continue;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
currentRecord += char;
|
|
397
|
+
i++;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
return {
|
|
401
|
+
completeRecords,
|
|
402
|
+
remainingBuffer: currentRecord,
|
|
403
|
+
isInsideQuotes: insideQuotes
|
|
404
|
+
};
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
/**
|
|
408
|
+
* Handle escaped quotes in quoted fields
|
|
409
|
+
* @param {string} buffer - The buffer content
|
|
410
|
+
* @param {number} index - Current index in buffer
|
|
411
|
+
* @param {boolean} insideQuotes - Whether currently inside quotes
|
|
412
|
+
* @returns {object} Result indicating if quote was escaped and new index
|
|
413
|
+
* @private
|
|
414
|
+
*/
|
|
415
|
+
_handleEscapedQuote(buffer, index, insideQuotes) {
|
|
416
|
+
if (insideQuotes && index + 1 < buffer.length && buffer[index + 1] === QUOTE_CHAR) {
|
|
417
|
+
return { wasEscaped: true, newIndex: index + 2 };
|
|
418
|
+
}
|
|
419
|
+
return { wasEscaped: false, newIndex: index + 1 };
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
/**
|
|
423
|
+
* Check if character at index is a line ending
|
|
424
|
+
* @param {string} buffer - The buffer content
|
|
425
|
+
* @param {number} index - Current index
|
|
426
|
+
* @returns {boolean} True if line ending
|
|
427
|
+
* @private
|
|
428
|
+
*/
|
|
429
|
+
_isLineEnding(buffer, index) {
|
|
430
|
+
return this._getLineEndingLength(buffer, index) > 0;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/**
|
|
434
|
+
* Get the length of line ending at current position
|
|
435
|
+
* @param {string} content - Content to check
|
|
436
|
+
* @param {number} index - Current index
|
|
437
|
+
* @returns {number} Length of line ending
|
|
438
|
+
* @private
|
|
439
|
+
*/
|
|
440
|
+
_getLineEndingLength(content, index) {
|
|
441
|
+
if (content.slice(index, index + 2) === CRLF) {
|
|
442
|
+
return 2;
|
|
443
|
+
}
|
|
444
|
+
if (content[index] === LF) {
|
|
445
|
+
return 1;
|
|
446
|
+
}
|
|
447
|
+
if (content[index] === CR && content[index + 1] !== LF) {
|
|
448
|
+
return 1;
|
|
449
|
+
}
|
|
450
|
+
return 0;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
/**
|
|
454
|
+
* Validate the final processing result
|
|
455
|
+
* @private
|
|
456
|
+
*/
|
|
457
|
+
_validateProcessingResult() {
|
|
458
|
+
if (!this.headers && this.parsedRecords.length === 0) {
|
|
459
|
+
// Empty stream - this is OK
|
|
460
|
+
return;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
if (!this.headers) {
|
|
464
|
+
throw CsvFormatError.missingHeader();
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
module.exports = StreamProcessor;
|
package/src/util/errors.js
CHANGED
|
@@ -318,6 +318,24 @@ class BrowserApiError extends CsvParsingError {
|
|
|
318
318
|
{ originalError: originalError.message }
|
|
319
319
|
);
|
|
320
320
|
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Create error for unsupported streaming API in browser
|
|
324
|
+
* Occurs when browser doesn't support ReadableStream
|
|
325
|
+
* @returns {BrowserApiError} Configured error instance
|
|
326
|
+
* @static
|
|
327
|
+
*/
|
|
328
|
+
static streamingNotSupported() {
|
|
329
|
+
return new BrowserApiError(
|
|
330
|
+
`Browser compatibility error: ReadableStream API is not available.\n` +
|
|
331
|
+
`Your browser does not support the ReadableStream API required for streaming.\n\n` +
|
|
332
|
+
`Solutions:\n` +
|
|
333
|
+
` 1. Use a modern browser that supports ReadableStream (Chrome 43+, Firefox 65+, Safari 10.1+)\n` +
|
|
334
|
+
` 2. Use getJsonFromFileStreamingAsync() which falls back to regular file parsing\n` +
|
|
335
|
+
` 3. Consider using parseFile() for non-streaming file parsing\n` +
|
|
336
|
+
` 4. Implement a polyfill for ReadableStream support`
|
|
337
|
+
);
|
|
338
|
+
}
|
|
321
339
|
}
|
|
322
340
|
|
|
323
341
|
module.exports = {
|