@loaders.gl/csv 4.3.2 → 4.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/csv-arrow-loader.d.ts +37 -0
  2. package/dist/csv-arrow-loader.d.ts.map +1 -0
  3. package/dist/csv-arrow-loader.js +23 -0
  4. package/dist/csv-format.d.ts +10 -0
  5. package/dist/csv-format.d.ts.map +1 -0
  6. package/dist/csv-format.js +12 -0
  7. package/dist/csv-loader.d.ts +6 -6
  8. package/dist/csv-loader.d.ts.map +1 -1
  9. package/dist/csv-loader.js +53 -20
  10. package/dist/csv-writer.d.ts +6 -5
  11. package/dist/csv-writer.d.ts.map +1 -1
  12. package/dist/csv-writer.js +2 -5
  13. package/dist/dist.dev.js +13318 -449
  14. package/dist/dist.min.js +23 -20
  15. package/dist/index.cjs +317 -262
  16. package/dist/index.cjs.map +4 -4
  17. package/dist/index.d.ts +2 -0
  18. package/dist/index.d.ts.map +1 -1
  19. package/dist/index.js +1 -0
  20. package/dist/lib/encoders/encode-csv.d.ts +1 -1
  21. package/dist/lib/encoders/encode-csv.d.ts.map +1 -1
  22. package/dist/lib/encoders/encode-csv.js +1 -1
  23. package/dist/papaparse/async-iterator-streamer.d.ts +1 -21
  24. package/dist/papaparse/async-iterator-streamer.d.ts.map +1 -1
  25. package/dist/papaparse/async-iterator-streamer.js +6 -6
  26. package/dist/papaparse/papa-constants.d.ts +12 -0
  27. package/dist/papaparse/papa-constants.d.ts.map +1 -0
  28. package/dist/papaparse/papa-constants.js +19 -0
  29. package/dist/papaparse/papa-parser.d.ts +110 -0
  30. package/dist/papaparse/papa-parser.d.ts.map +1 -0
  31. package/dist/papaparse/papa-parser.js +733 -0
  32. package/dist/papaparse/papa-writer.d.ts +22 -0
  33. package/dist/papaparse/papa-writer.d.ts.map +1 -0
  34. package/dist/papaparse/papa-writer.js +166 -0
  35. package/dist/papaparse/papaparse.d.ts +9 -113
  36. package/dist/papaparse/papaparse.d.ts.map +1 -1
  37. package/dist/papaparse/papaparse.js +13 -882
  38. package/package.json +5 -5
  39. package/src/csv-arrow-loader.ts +41 -0
  40. package/src/csv-format.ts +15 -0
  41. package/src/csv-loader.ts +58 -25
  42. package/src/csv-writer.ts +2 -5
  43. package/src/index.ts +3 -0
  44. package/src/lib/encoders/encode-csv.ts +2 -1
  45. package/src/papaparse/async-iterator-streamer.ts +6 -6
  46. package/src/papaparse/papa-constants.ts +23 -0
  47. package/src/papaparse/papa-parser.ts +872 -0
  48. package/src/papaparse/papa-writer.ts +219 -0
  49. package/src/papaparse/papaparse.ts +17 -1048
@@ -1,11 +1,11 @@
1
- // This is a fork of papaparse
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
3
+ // Copyright (c) vis.gl contributors
4
+ // Copyright (c) 2015 Matthew Holt
5
+
6
+ // This is a fork of papaparse v5.0.0-beta.0 under MIT license
2
7
  // https://github.com/mholt/PapaParse
3
- /* @license
4
- Papa Parse
5
- v5.0.0-beta.0
6
- https://github.com/mholt/PapaParse
7
- License: MIT
8
- */
8
+
9
9
  // FORK SUMMARY:
10
10
  // - Adopt ES6 exports
11
11
  // - Implement new AsyncIteratorStreamer
@@ -13,1053 +13,22 @@ License: MIT
13
13
  // - Remove unused Worker support (loaders.gl worker system used instead)
14
14
  // - Remove unused jQuery plugin support
15
15
 
16
- export type CSVParserConfig = {
17
- dynamicTyping?: boolean | Function | {};
18
- dynamicTypingFunction?: Function;
19
- chunk?: boolean;
20
- chunkSize?: number | null;
21
- step?: Function;
22
- transform?: boolean;
23
- preview?: number;
24
- newline?: string;
25
- comments?: boolean;
26
- skipEmptyLines?: boolean | 'greedy';
27
- delimitersToGuess?: string[];
28
- quoteChar?: string;
29
- escapeChar?: string;
30
- delimiter?: string;
31
- // Convert numbers and boolean values in rows from strings
32
- fastMode?: boolean;
33
- };
34
-
35
- // const defaultConfig: Required<CSVParserConfig> = {
36
- // dynamicTyping: false,
37
- // dynamicTypingFunction: undefined!,
38
- // transform: false
39
- // };
40
-
41
- /* eslint-disable */
42
- const BYTE_ORDER_MARK = '\ufeff';
43
-
44
- function CsvToJson(_input, _config: CSVParserConfig = {}, Streamer: any = StringStreamer) {
45
- _config = _config || {};
46
- var dynamicTyping = _config.dynamicTyping || false;
47
- if (isFunction(dynamicTyping)) {
48
- _config.dynamicTypingFunction = dynamicTyping;
49
- // Will be filled on first row call
50
- dynamicTyping = {};
51
- }
52
- _config.dynamicTyping = dynamicTyping;
53
-
54
- _config.transform = isFunction(_config.transform) ? _config.transform : false;
55
-
56
- var streamer = new Streamer(_config);
57
-
58
- return streamer.stream(_input);
59
- }
60
-
61
- function JsonToCsv(_input, _config) {
62
- // Default configuration
63
-
64
- /** whether to surround every datum with quotes */
65
- var _quotes = false;
66
-
67
- /** whether to write headers */
68
- var _writeHeader = true;
69
-
70
- /** delimiting character(s) */
71
- var _delimiter = ',';
72
-
73
- /** newline character(s) */
74
- var _newline = '\r\n';
75
-
76
- /** quote character */
77
- var _quoteChar = '"';
78
-
79
- /** escaped quote character, either "" or <config.escapeChar>" */
80
- var _escapedQuote = _quoteChar + _quoteChar;
81
-
82
- /** whether to skip empty lines */
83
- var _skipEmptyLines = false;
84
-
85
- /** the columns (keys) we expect when we unparse objects */
86
- var _columns = null;
87
-
88
- unpackConfig();
89
-
90
- var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g');
91
-
92
- if (typeof _input === 'string') _input = JSON.parse(_input);
93
-
94
- if (Array.isArray(_input)) {
95
- if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines);
96
- else if (typeof _input[0] === 'object')
97
- return serialize(_columns || Object.keys(_input[0]), _input, _skipEmptyLines);
98
- } else if (typeof _input === 'object') {
99
- if (typeof _input.data === 'string') _input.data = JSON.parse(_input.data);
100
-
101
- if (Array.isArray(_input.data)) {
102
- if (!_input.fields) _input.fields = _input.meta && _input.meta.fields;
103
-
104
- if (!_input.fields)
105
- _input.fields = Array.isArray(_input.data[0]) ? _input.fields : Object.keys(_input.data[0]);
106
-
107
- if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object')
108
- _input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
109
- }
110
-
111
- return serialize(_input.fields || [], _input.data || [], _skipEmptyLines);
112
- }
113
-
114
- // Default (any valid paths should return before this)
115
- throw new Error('Unable to serialize unrecognized input');
116
-
117
- function unpackConfig() {
118
- if (typeof _config !== 'object') return;
119
-
120
- if (
121
- typeof _config.delimiter === 'string' &&
122
- !Papa.BAD_DELIMITERS.filter(function (value) {
123
- return _config.delimiter.indexOf(value) !== -1;
124
- }).length
125
- ) {
126
- _delimiter = _config.delimiter;
127
- }
128
-
129
- if (typeof _config.quotes === 'boolean' || Array.isArray(_config.quotes))
130
- _quotes = _config.quotes;
131
-
132
- if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string')
133
- _skipEmptyLines = _config.skipEmptyLines;
134
-
135
- if (typeof _config.newline === 'string') _newline = _config.newline;
136
-
137
- if (typeof _config.quoteChar === 'string') _quoteChar = _config.quoteChar;
138
-
139
- if (typeof _config.header === 'boolean') _writeHeader = _config.header;
140
-
141
- if (Array.isArray(_config.columns)) {
142
- if (_config.columns.length === 0) throw new Error('Option columns is empty');
143
-
144
- _columns = _config.columns;
145
- }
146
-
147
- if (_config.escapeChar !== undefined) {
148
- _escapedQuote = _config.escapeChar + _quoteChar;
149
- }
150
- }
151
-
152
- /** The double for loop that iterates the data and writes out a CSV string including header row */
153
- function serialize(fields, data, skipEmptyLines) {
154
- var csv = '';
155
-
156
- if (typeof fields === 'string') fields = JSON.parse(fields);
157
- if (typeof data === 'string') data = JSON.parse(data);
158
-
159
- var hasHeader = Array.isArray(fields) && fields.length > 0;
160
- var dataKeyedByField = !Array.isArray(data[0]);
161
-
162
- // If there a header row, write it first
163
- if (hasHeader && _writeHeader) {
164
- for (var i = 0; i < fields.length; i++) {
165
- if (i > 0) csv += _delimiter;
166
- csv += safe(fields[i], i);
167
- }
168
- if (data.length > 0) csv += _newline;
169
- }
170
-
171
- // Then write out the data
172
- for (var row = 0; row < data.length; row++) {
173
- var maxCol = hasHeader ? fields.length : data[row].length;
174
-
175
- var emptyLine = false;
176
- var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0;
177
- if (skipEmptyLines && !hasHeader) {
178
- emptyLine =
179
- skipEmptyLines === 'greedy'
180
- ? data[row].join('').trim() === ''
181
- : data[row].length === 1 && data[row][0].length === 0;
182
- }
183
- if (skipEmptyLines === 'greedy' && hasHeader) {
184
- var line: string[] = [];
185
- for (var c = 0; c < maxCol; c++) {
186
- var cx = dataKeyedByField ? fields[c] : c;
187
- line.push(data[row][cx]);
188
- }
189
- emptyLine = line.join('').trim() === '';
190
- }
191
- if (!emptyLine) {
192
- for (var col = 0; col < maxCol; col++) {
193
- if (col > 0 && !nullLine) csv += _delimiter;
194
- var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
195
- csv += safe(data[row][colIdx], col);
196
- }
197
- if (row < data.length - 1 && (!skipEmptyLines || (maxCol > 0 && !nullLine))) {
198
- csv += _newline;
199
- }
200
- }
201
- }
202
- return csv;
203
- }
204
-
205
- /** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
206
- function safe(str, col) {
207
- if (typeof str === 'undefined' || str === null) return '';
208
-
209
- if (str.constructor === Date) return JSON.stringify(str).slice(1, 25);
210
-
211
- str = str.toString().replace(quoteCharRegex, _escapedQuote);
212
-
213
- var needsQuotes =
214
- (typeof _quotes === 'boolean' && _quotes) ||
215
- (Array.isArray(_quotes) && _quotes[col]) ||
216
- hasAny(str, Papa.BAD_DELIMITERS) ||
217
- str.indexOf(_delimiter) > -1 ||
218
- str.charAt(0) === ' ' ||
219
- str.charAt(str.length - 1) === ' ';
220
-
221
- return needsQuotes ? _quoteChar + str + _quoteChar : str;
222
- }
223
-
224
- function hasAny(str, substrings) {
225
- for (var i = 0; i < substrings.length; i++) if (str.indexOf(substrings[i]) > -1) return true;
226
- return false;
227
- }
228
- }
229
-
230
- /** ChunkStreamer is the base prototype for various streamer implementations. */
231
- class ChunkStreamer {
232
- _handle;
233
- _config;
234
-
235
- _finished = false;
236
- _completed = false;
237
- _input = null;
238
- _baseIndex = 0;
239
- _partialLine = '';
240
- _rowCount = 0;
241
- _start = 0;
242
- isFirstChunk = true;
243
- _completeResults = {
244
- data: [],
245
- errors: [],
246
- meta: {}
247
- };
248
-
249
- constructor(config: CSVParserConfig) {
250
- // Deep-copy the config so we can edit it
251
- var configCopy = {...config};
252
- // @ts-expect-error
253
- configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
254
- if (!config.step && !config.chunk) {
255
- configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
256
- }
257
- this._handle = new ParserHandle(configCopy);
258
- this._handle.streamer = this;
259
- this._config = configCopy; // persist the copy to the caller
260
- }
261
-
262
- parseChunk(chunk, isFakeChunk?: boolean) {
263
- // First chunk pre-processing
264
- if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
265
- var modifiedChunk = this._config.beforeFirstChunk(chunk);
266
- if (modifiedChunk !== undefined) chunk = modifiedChunk;
267
- }
268
- this.isFirstChunk = false;
269
-
270
- // Rejoin the line we likely just split in two by chunking the file
271
- var aggregate = this._partialLine + chunk;
272
- this._partialLine = '';
273
-
274
- var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
275
-
276
- if (this._handle.paused() || this._handle.aborted()) return;
277
-
278
- var lastIndex = results.meta.cursor;
279
-
280
- if (!this._finished) {
281
- this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
282
- this._baseIndex = lastIndex;
283
- }
284
-
285
- if (results && results.data) this._rowCount += results.data.length;
286
-
287
- var finishedIncludingPreview =
288
- this._finished || (this._config.preview && this._rowCount >= this._config.preview);
289
-
290
- if (isFunction(this._config.chunk) && !isFakeChunk) {
291
- this._config.chunk(results, this._handle);
292
- if (this._handle.paused() || this._handle.aborted()) return;
293
- results = undefined;
294
- // @ts-expect-error
295
- this._completeResults = undefined;
296
- }
297
-
298
- if (!this._config.step && !this._config.chunk) {
299
- this._completeResults.data = this._completeResults.data.concat(results.data);
300
- this._completeResults.errors = this._completeResults.errors.concat(results.errors);
301
- this._completeResults.meta = results.meta;
302
- }
303
-
304
- if (
305
- !this._completed &&
306
- finishedIncludingPreview &&
307
- isFunction(this._config.complete) &&
308
- (!results || !results.meta.aborted)
309
- ) {
310
- this._config.complete(this._completeResults, this._input);
311
- this._completed = true;
312
- }
313
-
314
- // if (!finishedIncludingPreview && (!results || !results.meta.paused)) this._nextChunk();
315
-
316
- return results;
317
- }
318
-
319
- _sendError(error) {
320
- if (isFunction(this._config.error)) this._config.error(error);
321
- }
322
- }
323
-
324
- class StringStreamer extends ChunkStreamer {
325
- remaining;
326
-
327
- constructor(config = {}) {
328
- super(config);
329
- }
330
-
331
- stream(s) {
332
- this.remaining = s;
333
- return this._nextChunk();
334
- }
335
-
336
- _nextChunk() {
337
- if (this._finished) return;
338
- var size = this._config.chunkSize;
339
- var chunk = size ? this.remaining.substr(0, size) : this.remaining;
340
- this.remaining = size ? this.remaining.substr(size) : '';
341
- this._finished = !this.remaining;
342
- return this.parseChunk(chunk);
343
- }
344
- }
345
-
346
- const FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
347
- const ISO_DATE =
348
- /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
349
-
350
- // Use one ParserHandle per entire CSV file or string
351
- class ParserHandle {
352
- _config;
353
-
354
- /** Number of times step was called (number of rows parsed) */
355
- _stepCounter = 0;
356
- /** Number of rows that have been parsed so far */
357
- _rowCounter = 0;
358
- /** The input being parsed */
359
- _input;
360
- /** The core parser being used */
361
- _parser;
362
- /** Whether we are paused or not */
363
- _paused = false;
364
- /** Whether the parser has aborted or not */
365
- _aborted = false;
366
- /** Temporary state between delimiter detection and processing results */
367
- _delimiterError: boolean = false;
368
- /** Fields are from the header row of the input, if there is one */
369
- _fields: string[] = [];
370
- /** The last results returned from the parser */
371
- _results: {
372
- data: any[][] | Record<string, any>[];
373
- errors: any[];
374
- meta: Record<string, any>;
375
- } = {
376
- data: [],
377
- errors: [],
378
- meta: {}
379
- };
380
-
381
- constructor(_config: CSVParserConfig) {
382
- // One goal is to minimize the use of regular expressions...
383
-
384
- if (isFunction(_config.step)) {
385
- var userStep = _config.step;
386
- _config.step = (results) => {
387
- this._results = results;
388
-
389
- if (this.needsHeaderRow()) {
390
- this.processResults();
391
- }
392
- // only call user's step function after header row
393
- else {
394
- this.processResults();
395
-
396
- // It's possbile that this line was empty and there's no row here after all
397
- if (!this._results.data || this._results.data.length === 0) return;
398
-
399
- this._stepCounter += results.data.length;
400
- if (_config.preview && this._stepCounter > _config.preview) {
401
- this._parser.abort();
402
- } else {
403
- userStep(this._results, this);
404
- }
405
- }
406
- };
407
- }
408
- this._config = _config;
409
- }
410
-
411
- /**
412
- * Parses input. Most users won't need, and shouldn't mess with, the baseIndex
413
- * and ignoreLastRow parameters. They are used by streamers (wrapper functions)
414
- * when an input comes in multiple chunks, like from a file.
415
- */
416
- parse(input, baseIndex, ignoreLastRow) {
417
- var quoteChar = this._config.quoteChar || '"';
418
- if (!this._config.newline) this._config.newline = guessLineEndings(input, quoteChar);
419
-
420
- this._delimiterError = false;
421
- if (!this._config.delimiter) {
422
- var delimGuess = this.guessDelimiter(
423
- input,
424
- this._config.newline,
425
- this._config.skipEmptyLines,
426
- this._config.comments,
427
- this._config.delimitersToGuess
428
- );
429
- if (delimGuess.successful) {
430
- this._config.delimiter = delimGuess.bestDelimiter;
431
- } else {
432
- this._delimiterError = true; // add error after parsing (otherwise it would be overwritten)
433
- this._config.delimiter = Papa.DefaultDelimiter;
434
- }
435
- this._results.meta.delimiter = this._config.delimiter;
436
- } else if (isFunction(this._config.delimiter)) {
437
- this._config.delimiter = this._config.delimiter(input);
438
- this._results.meta.delimiter = this._config.delimiter;
439
- }
440
-
441
- var parserConfig = copy(this._config);
442
- if (this._config.preview && this._config.header) parserConfig.preview++; // to compensate for header row
443
-
444
- this._input = input;
445
- this._parser = new Parser(parserConfig);
446
- this._results = this._parser.parse(this._input, baseIndex, ignoreLastRow);
447
- this.processResults();
448
- return this._paused ? {meta: {paused: true}} : this._results || {meta: {paused: false}};
449
- }
450
-
451
- paused() {
452
- return this._paused;
453
- }
454
-
455
- pause() {
456
- this._paused = true;
457
- this._parser.abort();
458
- this._input = this._input.substr(this._parser.getCharIndex());
459
- }
460
-
461
- resume() {
462
- this._paused = false;
463
- // @ts-expect-error
464
- this.streamer.parseChunk(this._input, true);
465
- }
466
-
467
- aborted() {
468
- return this._aborted;
469
- }
470
-
471
- abort() {
472
- this._aborted = true;
473
- this._parser.abort();
474
- this._results.meta.aborted = true;
475
- if (isFunction(this._config.complete)) {
476
- this._config.complete(this._results);
477
- }
478
- this._input = '';
479
- }
480
-
481
- testEmptyLine(s) {
482
- return this._config.skipEmptyLines === 'greedy'
483
- ? s.join('').trim() === ''
484
- : s.length === 1 && s[0].length === 0;
485
- }
486
-
487
- processResults() {
488
- if (this._results && this._delimiterError) {
489
- this.addError(
490
- 'Delimiter',
491
- 'UndetectableDelimiter',
492
- "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'"
493
- );
494
- this._delimiterError = false;
495
- }
496
-
497
- if (this._config.skipEmptyLines) {
498
- for (var i = 0; i < this._results.data.length; i++)
499
- if (this.testEmptyLine(this._results.data[i])) this._results.data.splice(i--, 1);
500
- }
16
+ import {CsvToJson, Parser, ParserHandle, ChunkStreamer} from './papa-parser';
17
+ import {JsonToCsv} from './papa-writer';
18
+ import {Papa} from './papa-constants';
501
19
 
502
- if (this.needsHeaderRow()) {
503
- this.fillHeaderFields();
504
- }
20
+ export type {CSVParserConfig} from './papa-parser';
21
+ export type {CSVWriterConfig} from './papa-writer';
505
22
 
506
- return this.applyHeaderAndDynamicTypingAndTransformation();
507
- }
23
+ export default {
24
+ ...Papa,
508
25
 
509
- needsHeaderRow() {
510
- return this._config.header && this._fields.length === 0;
511
- }
512
-
513
- fillHeaderFields() {
514
- if (!this._results) return;
515
-
516
- const addHeder = (header) => {
517
- if (isFunction(this._config.transformHeader)) header = this._config.transformHeader(header);
518
- this._fields.push(header);
519
- };
520
-
521
- if (Array.isArray(this._results.data[0])) {
522
- for (var i = 0; this.needsHeaderRow() && i < this._results.data.length; i++)
523
- this._results.data[i].forEach(addHeder);
524
-
525
- this._results.data.splice(0, 1);
526
- }
527
- // if _results.data[0] is not an array, we are in a step where _results.data is the row.
528
- else {
529
- this._results.data.forEach(addHeder);
530
- }
531
- }
532
-
533
- shouldApplyDynamicTyping(field) {
534
- // Cache function values to avoid calling it for each row
535
- if (this._config.dynamicTypingFunction && this._config.dynamicTyping[field] === undefined) {
536
- this._config.dynamicTyping[field] = this._config.dynamicTypingFunction(field);
537
- }
538
- return (this._config.dynamicTyping[field] || this._config.dynamicTyping) === true;
539
- }
540
-
541
- parseDynamic(field, value) {
542
- if (this.shouldApplyDynamicTyping(field)) {
543
- if (value === 'true' || value === 'TRUE') return true;
544
- else if (value === 'false' || value === 'FALSE') return false;
545
- else if (FLOAT.test(value)) return parseFloat(value);
546
- else if (ISO_DATE.test(value)) return new Date(value);
547
- else return value === '' ? null : value;
548
- }
549
- return value;
550
- }
551
-
552
- applyHeaderAndDynamicTypingAndTransformation() {
553
- if (
554
- !this._results ||
555
- !this._results.data ||
556
- (!this._config.header && !this._config.dynamicTyping && !this._config.transform)
557
- ) {
558
- return this._results;
559
- }
560
-
561
- var incrementBy = 1;
562
- if (!this._results.data[0] || Array.isArray(this._results.data[0])) {
563
- this._results.data = this._results.data.map(this.processRow.bind(this));
564
- incrementBy = this._results.data.length;
565
- } else {
566
- // @ts-expect-error
567
- this._results.data = this.processRow(this._results.data, 0);
568
- }
569
-
570
- if (this._config.header && this._results.meta) this._results.meta.fields = this._fields;
571
-
572
- this._rowCounter += incrementBy;
573
- return this._results;
574
- }
575
-
576
- processRow(rowSource, i): any[] | Record<string, any> {
577
- var row = this._config.header ? {} : [];
578
-
579
- var j;
580
- for (j = 0; j < rowSource.length; j++) {
581
- var field = j;
582
- var value = rowSource[j];
583
-
584
- if (this._config.header)
585
- field = j >= this._fields.length ? '__parsed_extra' : this._fields[j];
586
-
587
- if (this._config.transform) value = this._config.transform(value, field);
588
-
589
- value = this.parseDynamic(field, value);
590
-
591
- if (field === '__parsed_extra') {
592
- row[field] = row[field] || [];
593
- row[field].push(value);
594
- } else row[field] = value;
595
- }
596
-
597
- if (this._config.header) {
598
- if (j > this._fields.length)
599
- this.addError(
600
- 'FieldMismatch',
601
- 'TooManyFields',
602
- 'Too many fields: expected ' + this._fields.length + ' fields but parsed ' + j,
603
- this._rowCounter + i
604
- );
605
- else if (j < this._fields.length)
606
- this.addError(
607
- 'FieldMismatch',
608
- 'TooFewFields',
609
- 'Too few fields: expected ' + this._fields.length + ' fields but parsed ' + j,
610
- this._rowCounter + i
611
- );
612
- }
613
-
614
- return row;
615
- }
616
-
617
- guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
618
- var bestDelim, bestDelta, fieldCountPrevRow;
619
-
620
- delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
621
-
622
- for (var i = 0; i < delimitersToGuess.length; i++) {
623
- var delim = delimitersToGuess[i];
624
- var delta = 0,
625
- avgFieldCount = 0,
626
- emptyLinesCount = 0;
627
- fieldCountPrevRow = undefined;
628
-
629
- var preview = new Parser({
630
- comments: comments,
631
- delimiter: delim,
632
- newline: newline,
633
- preview: 10
634
- }).parse(input);
635
-
636
- for (var j = 0; j < preview.data.length; j++) {
637
- if (skipEmptyLines && this.testEmptyLine(preview.data[j])) {
638
- emptyLinesCount++;
639
- continue;
640
- }
641
- var fieldCount = preview.data[j].length;
642
- avgFieldCount += fieldCount;
643
-
644
- if (typeof fieldCountPrevRow === 'undefined') {
645
- fieldCountPrevRow = 0;
646
- continue;
647
- } else if (fieldCount > 1) {
648
- delta += Math.abs(fieldCount - fieldCountPrevRow);
649
- fieldCountPrevRow = fieldCount;
650
- }
651
- }
652
-
653
- if (preview.data.length > 0) avgFieldCount /= preview.data.length - emptyLinesCount;
654
-
655
- if ((typeof bestDelta === 'undefined' || delta > bestDelta) && avgFieldCount > 1.99) {
656
- bestDelta = delta;
657
- bestDelim = delim;
658
- }
659
- }
660
-
661
- this._config.delimiter = bestDelim;
662
-
663
- return {
664
- successful: !!bestDelim,
665
- bestDelimiter: bestDelim
666
- };
667
- }
668
-
669
- addError(type, code, msg, row?) {
670
- this._results.errors.push({
671
- type: type,
672
- code: code,
673
- message: msg,
674
- row: row
675
- });
676
- }
677
- }
678
-
679
- function guessLineEndings(input, quoteChar) {
680
- input = input.substr(0, 1024 * 1024); // max length 1 MB
681
- // Replace all the text inside quotes
682
- var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
683
- input = input.replace(re, '');
684
-
685
- var r = input.split('\r');
686
-
687
- var n = input.split('\n');
688
-
689
- var nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
690
-
691
- if (r.length === 1 || nAppearsFirst) return '\n';
692
-
693
- var numWithN = 0;
694
- for (var i = 0; i < r.length; i++) {
695
- if (r[i][0] === '\n') numWithN++;
696
- }
697
-
698
- return numWithN >= r.length / 2 ? '\r\n' : '\r';
699
- }
700
-
701
- /** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
702
- function escapeRegExp(string) {
703
- return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
704
- }
705
-
706
- /** The core parser implements speedy and correct CSV parsing */
707
- function Parser(config) {
708
- // Unpack the config object
709
- config = config || {};
710
- var delim = config.delimiter;
711
- var newline = config.newline;
712
- var comments = config.comments;
713
- var step = config.step;
714
- var preview = config.preview;
715
- var fastMode = config.fastMode;
716
- var quoteChar;
717
- /** Allows for no quoteChar by setting quoteChar to undefined in config */
718
- if (config.quoteChar === undefined) {
719
- quoteChar = '"';
720
- } else {
721
- quoteChar = config.quoteChar;
722
- }
723
- var escapeChar = quoteChar;
724
- if (config.escapeChar !== undefined) {
725
- escapeChar = config.escapeChar;
726
- }
727
-
728
- // Delimiter must be valid
729
- if (typeof delim !== 'string' || Papa.BAD_DELIMITERS.indexOf(delim) > -1) delim = ',';
730
-
731
- // Comment character must be valid
732
- if (comments === delim) throw new Error('Comment character same as delimiter');
733
- else if (comments === true) comments = '#';
734
- else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1)
735
- comments = false;
736
-
737
- // Newline must be valid: \r, \n, or \r\n
738
- if (newline !== '\n' && newline !== '\r' && newline !== '\r\n') newline = '\n';
739
-
740
- // We're gonna need these at the Parser scope
741
- var cursor = 0;
742
- var aborted = false;
743
-
744
- // @ts-expect-error
745
- this.parse = function (input, baseIndex, ignoreLastRow) {
746
- // For some reason, in Chrome, this speeds things up (!?)
747
- if (typeof input !== 'string') throw new Error('Input must be a string');
748
-
749
- // We don't need to compute some of these every time parse() is called,
750
- // but having them in a more local scope seems to perform better
751
- var inputLen = input.length,
752
- delimLen = delim.length,
753
- newlineLen = newline.length,
754
- commentsLen = comments.length;
755
- var stepIsFunction = isFunction(step);
756
-
757
- // Establish starting state
758
- cursor = 0;
759
- var data: any[][] | Record<string, any> = [],
760
- errors: any[] = [],
761
- row: any[] | Record<string, any> = [],
762
- lastCursor: number = 0;
763
-
764
- if (!input) return returnable();
765
-
766
- if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1)) {
767
- var rows = input.split(newline);
768
- for (var i = 0; i < rows.length; i++) {
769
- const row = rows[i];
770
- cursor += row.length;
771
- if (i !== rows.length - 1) cursor += newline.length;
772
- else if (ignoreLastRow) return returnable();
773
- if (comments && row.substr(0, commentsLen) === comments) continue;
774
- if (stepIsFunction) {
775
- data = [];
776
- pushRow(row.split(delim));
777
- doStep();
778
- if (aborted) return returnable();
779
- } else pushRow(row.split(delim));
780
- if (preview && i >= preview) {
781
- data = data.slice(0, preview);
782
- return returnable(true);
783
- }
784
- }
785
- return returnable();
786
- }
787
-
788
- var nextDelim = input.indexOf(delim, cursor);
789
- var nextNewline = input.indexOf(newline, cursor);
790
- var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
791
- var quoteSearch;
792
-
793
- // Parser loop
794
- for (;;) {
795
- // Field has opening quote
796
- if (input[cursor] === quoteChar) {
797
- // Start our search for the closing quote where the cursor is
798
- quoteSearch = cursor;
799
-
800
- // Skip the opening quote
801
- cursor++;
802
-
803
- for (;;) {
804
- // Find closing quote
805
- quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
806
-
807
- //No other quotes are found - no other delimiters
808
- if (quoteSearch === -1) {
809
- if (!ignoreLastRow) {
810
- // No closing quote... what a pity
811
- errors.push({
812
- type: 'Quotes',
813
- code: 'MissingQuotes',
814
- message: 'Quoted field unterminated',
815
- row: data.length, // row has yet to be inserted
816
- index: cursor
817
- });
818
- }
819
- return finish();
820
- }
821
-
822
- // Closing quote at EOF
823
- if (quoteSearch === inputLen - 1) {
824
- var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
825
- return finish(value);
826
- }
827
-
828
- // If this quote is escaped, it's part of the data; skip it
829
- // If the quote character is the escape character, then check if the next character is the escape character
830
- if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar) {
831
- quoteSearch++;
832
- continue;
833
- }
834
-
835
- // If the quote character is not the escape character, then check if the previous character was the escape character
836
- if (
837
- quoteChar !== escapeChar &&
838
- quoteSearch !== 0 &&
839
- input[quoteSearch - 1] === escapeChar
840
- ) {
841
- continue;
842
- }
843
-
844
- // Check up to nextDelim or nextNewline, whichever is closest
845
- var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
846
- var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
847
-
848
- // Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
849
- if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) {
850
- row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
851
- cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
852
- nextDelim = input.indexOf(delim, cursor);
853
- nextNewline = input.indexOf(newline, cursor);
854
-
855
- if (stepIsFunction) {
856
- doStep();
857
- if (aborted) return returnable();
858
- }
859
-
860
- if (preview && data.length >= preview) return returnable(true);
861
-
862
- break;
863
- }
864
-
865
- var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
866
-
867
- // Closing quote followed by newline or 'unnecessary spaces + newLine'
868
- if (
869
- input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline
870
- ) {
871
- row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
872
- saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
873
- nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
874
-
875
- if (stepIsFunction) {
876
- doStep();
877
- if (aborted) return returnable();
878
- }
879
-
880
- if (preview && data.length >= preview) return returnable(true);
881
-
882
- break;
883
- }
884
-
885
- // Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
886
- errors.push({
887
- type: 'Quotes',
888
- code: 'InvalidQuotes',
889
- message: 'Trailing quote on quoted field is malformed',
890
- row: data.length, // row has yet to be inserted
891
- index: cursor
892
- });
893
-
894
- quoteSearch++;
895
- continue;
896
- }
897
-
898
- if (stepIsFunction) {
899
- doStep();
900
- if (aborted) return returnable();
901
- }
902
-
903
- if (preview && data.length >= preview) return returnable(true);
904
- continue;
905
- }
906
-
907
- // Comment found at start of new line
908
- if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) {
909
- if (nextNewline === -1)
910
- // Comment ends at EOF
911
- return returnable();
912
- cursor = nextNewline + newlineLen;
913
- nextNewline = input.indexOf(newline, cursor);
914
- nextDelim = input.indexOf(delim, cursor);
915
- continue;
916
- }
917
-
918
- // Next delimiter comes before next newline, so we've reached end of field
919
- if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) {
920
- row.push(input.substring(cursor, nextDelim));
921
- cursor = nextDelim + delimLen;
922
- nextDelim = input.indexOf(delim, cursor);
923
- continue;
924
- }
925
-
926
- // End of row
927
- if (nextNewline !== -1) {
928
- row.push(input.substring(cursor, nextNewline));
929
- saveRow(nextNewline + newlineLen);
930
-
931
- if (stepIsFunction) {
932
- doStep();
933
- if (aborted) return returnable();
934
- }
935
-
936
- if (preview && data.length >= preview) return returnable(true);
937
-
938
- continue;
939
- }
940
-
941
- break;
942
- }
943
-
944
- return finish();
945
-
946
- function pushRow(row) {
947
- data.push(row);
948
- lastCursor = cursor;
949
- }
950
-
951
- /**
952
- * checks if there are extra spaces after closing quote and given index without any text
953
- * if Yes, returns the number of spaces
954
- */
955
- function extraSpaces(index) {
956
- var spaceLength = 0;
957
- if (index !== -1) {
958
- var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index);
959
- if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') {
960
- spaceLength = textBetweenClosingQuoteAndIndex.length;
961
- }
962
- }
963
- return spaceLength;
964
- }
965
-
966
- /**
967
- * Appends the remaining input from cursor to the end into
968
- * row, saves the row, calls step, and returns the results.
969
- */
970
- function finish(value?: any) {
971
- if (ignoreLastRow) return returnable();
972
- if (typeof value === 'undefined') value = input.substr(cursor);
973
- row.push(value);
974
- cursor = inputLen; // important in case parsing is paused
975
- pushRow(row);
976
- if (stepIsFunction) doStep();
977
- return returnable();
978
- }
979
-
980
- /**
981
- * Appends the current row to the results. It sets the cursor
982
- * to newCursor and finds the nextNewline. The caller should
983
- * take care to execute user's step function and check for
984
- * preview and end parsing if necessary.
985
- */
986
- function saveRow(newCursor) {
987
- cursor = newCursor;
988
- pushRow(row);
989
- row = [];
990
- nextNewline = input.indexOf(newline, cursor);
991
- }
992
-
993
- /** Returns an object with the results, errors, and meta. */
994
- function returnable(stopped?: boolean, step?) {
995
- var isStep = step || false;
996
- return {
997
- data: isStep ? data[0] : data,
998
- errors: errors,
999
- meta: {
1000
- delimiter: delim,
1001
- linebreak: newline,
1002
- aborted: aborted,
1003
- truncated: !!stopped,
1004
- cursor: lastCursor + (baseIndex || 0)
1005
- }
1006
- };
1007
- }
1008
-
1009
- /** Executes the user's step function and resets data & errors. */
1010
- function doStep() {
1011
- step(returnable(undefined, true));
1012
- data = [];
1013
- errors = [];
1014
- }
1015
- };
1016
-
1017
- /** Sets the abort flag */
1018
- // @ts-expect-error
1019
- this.abort = function () {
1020
- aborted = true;
1021
- };
1022
-
1023
- /** Gets the cursor position */
1024
- // @ts-expect-error
1025
- this.getCharIndex = function () {
1026
- return cursor;
1027
- };
1028
- }
1029
-
1030
- /** Makes a deep copy of an array or object (mostly) */
1031
- function copy(obj) {
1032
- if (typeof obj !== 'object' || obj === null) return obj;
1033
- var cpy = Array.isArray(obj) ? [] : {};
1034
- for (var key in obj) cpy[key] = copy(obj[key]);
1035
- return cpy;
1036
- }
1037
-
1038
- function isFunction(func: unknown): func is Function {
1039
- return typeof func === 'function';
1040
- }
1041
-
1042
- const Papa = {
1043
26
  parse: CsvToJson,
1044
27
  unparse: JsonToCsv,
1045
28
 
1046
- RECORD_SEP: String.fromCharCode(30),
1047
- UNIT_SEP: String.fromCharCode(31),
1048
- BYTE_ORDER_MARK,
1049
- BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
1050
- WORKERS_SUPPORTED: false, // !IS_WORKER && !!globalThis.Worker
1051
- NODE_STREAM_INPUT: 1,
1052
-
1053
- // Configurable chunk sizes for local and remote files, respectively
1054
- LocalChunkSize: 1024 * 1024 * 10, // 10 M,
1055
- RemoteChunkSize: 1024 * 1024 * 5, // 5 M,
1056
- DefaultDelimiter: ',', // Used if not specified and detection fail,
29
+ ChunkStreamer,
1057
30
 
1058
31
  // Exposed for testing and development only
1059
- Parser: Parser,
1060
- ParserHandle: ParserHandle,
1061
-
1062
- // BEGIN FORK
1063
- ChunkStreamer: ChunkStreamer
32
+ Parser,
33
+ ParserHandle
1064
34
  };
1065
- export default Papa;