@loaders.gl/csv 4.3.1 → 4.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/csv-arrow-loader.d.ts +37 -0
  2. package/dist/csv-arrow-loader.d.ts.map +1 -0
  3. package/dist/csv-arrow-loader.js +23 -0
  4. package/dist/csv-format.d.ts +10 -0
  5. package/dist/csv-format.d.ts.map +1 -0
  6. package/dist/csv-format.js +12 -0
  7. package/dist/csv-loader.d.ts +6 -6
  8. package/dist/csv-loader.d.ts.map +1 -1
  9. package/dist/csv-loader.js +53 -20
  10. package/dist/csv-writer.d.ts +6 -5
  11. package/dist/csv-writer.d.ts.map +1 -1
  12. package/dist/csv-writer.js +2 -5
  13. package/dist/dist.dev.js +13318 -449
  14. package/dist/dist.min.js +23 -20
  15. package/dist/index.cjs +317 -262
  16. package/dist/index.cjs.map +4 -4
  17. package/dist/index.d.ts +2 -0
  18. package/dist/index.d.ts.map +1 -1
  19. package/dist/index.js +1 -0
  20. package/dist/lib/encoders/encode-csv.d.ts +1 -1
  21. package/dist/lib/encoders/encode-csv.d.ts.map +1 -1
  22. package/dist/lib/encoders/encode-csv.js +1 -1
  23. package/dist/papaparse/async-iterator-streamer.d.ts +1 -21
  24. package/dist/papaparse/async-iterator-streamer.d.ts.map +1 -1
  25. package/dist/papaparse/async-iterator-streamer.js +6 -6
  26. package/dist/papaparse/papa-constants.d.ts +12 -0
  27. package/dist/papaparse/papa-constants.d.ts.map +1 -0
  28. package/dist/papaparse/papa-constants.js +19 -0
  29. package/dist/papaparse/papa-parser.d.ts +110 -0
  30. package/dist/papaparse/papa-parser.d.ts.map +1 -0
  31. package/dist/papaparse/papa-parser.js +733 -0
  32. package/dist/papaparse/papa-writer.d.ts +22 -0
  33. package/dist/papaparse/papa-writer.d.ts.map +1 -0
  34. package/dist/papaparse/papa-writer.js +166 -0
  35. package/dist/papaparse/papaparse.d.ts +9 -113
  36. package/dist/papaparse/papaparse.d.ts.map +1 -1
  37. package/dist/papaparse/papaparse.js +13 -882
  38. package/package.json +5 -5
  39. package/src/csv-arrow-loader.ts +41 -0
  40. package/src/csv-format.ts +15 -0
  41. package/src/csv-loader.ts +58 -25
  42. package/src/csv-writer.ts +2 -5
  43. package/src/index.ts +3 -0
  44. package/src/lib/encoders/encode-csv.ts +2 -1
  45. package/src/papaparse/async-iterator-streamer.ts +6 -6
  46. package/src/papaparse/papa-constants.ts +23 -0
  47. package/src/papaparse/papa-parser.ts +872 -0
  48. package/src/papaparse/papa-writer.ts +219 -0
  49. package/src/papaparse/papaparse.ts +17 -1048
@@ -0,0 +1,872 @@
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
3
+ // Copyright (c) vis.gl contributors
4
+ // Copyright (c) 2015 Matthew Holt
5
+
6
+ // This is a fork of papaparse v5.0.0-beta.0 under MIT license
7
+ // https://github.com/mholt/PapaParse
8
+
9
+ /* eslint-disable no-continue, max-depth */
10
+
11
+ import {Papa} from './papa-constants';
12
+
13
+ export type CSVParserConfig = {
14
+ chunk?: boolean;
15
+ chunkSize?: number | null;
16
+ preview?: number;
17
+ newline?: string;
18
+ comments?: boolean | string;
19
+ skipEmptyLines?: boolean | 'greedy';
20
+ delimitersToGuess?: string[];
21
+ quotes?: string[] | boolean;
22
+ quoteChar?: string;
23
+ escapeChar?: string;
24
+ delimiter?: string | Function;
25
+ // Convert numbers and boolean values in rows from strings
26
+ fastMode?: boolean;
27
+
28
+ dynamicTyping?: boolean | {};
29
+ dynamicTypingFunction?: Function;
30
+ step?: Function;
31
+ transform?: Function;
32
+ complete?: Function;
33
+ };
34
+
35
+ // const defaultConfig: Required<CSVParserConfig> = {
36
+ // dynamicTyping: false,
37
+ // dynamicTypingFunction: undefined!,
38
+ // transform: false
39
+ // };
40
+
41
+ export function CsvToJson(_input, _config: CSVParserConfig = {}, Streamer: any = StringStreamer) {
42
+ const streamer = new Streamer(_config);
43
+
44
+ return streamer.stream(_input);
45
+ }
46
+
47
+ /** ChunkStreamer is the base prototype for various streamer implementations. */
48
+ export class ChunkStreamer {
49
+ _handle;
50
+ _config;
51
+
52
+ _finished = false;
53
+ _completed = false;
54
+ _input = null;
55
+ _baseIndex = 0;
56
+ _partialLine = '';
57
+ _rowCount = 0;
58
+ _start = 0;
59
+ isFirstChunk = true;
60
+ _completeResults = {
61
+ data: [],
62
+ errors: [],
63
+ meta: {}
64
+ };
65
+
66
+ constructor(config: CSVParserConfig) {
67
+ // Deep-copy the config so we can edit it
68
+ const configCopy = {...config};
69
+ if (configCopy.dynamicTypingFunction) {
70
+ configCopy.dynamicTyping = {};
71
+ }
72
+ // @ts-expect-error
73
+ configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
74
+ if (!config.step && !config.chunk) {
75
+ configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
76
+ }
77
+ this._handle = new ParserHandle(configCopy);
78
+ this._handle.streamer = this;
79
+ this._config = configCopy; // persist the copy to the caller
80
+ }
81
+
82
+ // eslint-disable-next-line complexity, max-statements
83
+ parseChunk(chunk, isFakeChunk?: boolean) {
84
+ // First chunk pre-processing
85
+ if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
86
+ const modifiedChunk = this._config.beforeFirstChunk(chunk);
87
+ if (modifiedChunk !== undefined) chunk = modifiedChunk;
88
+ }
89
+ this.isFirstChunk = false;
90
+
91
+ // Rejoin the line we likely just split in two by chunking the file
92
+ const aggregate = this._partialLine + chunk;
93
+ this._partialLine = '';
94
+
95
+ let results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
96
+
97
+ if (this._handle.paused() || this._handle.aborted()) return;
98
+
99
+ const lastIndex = results.meta.cursor;
100
+
101
+ if (!this._finished) {
102
+ this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
103
+ this._baseIndex = lastIndex;
104
+ }
105
+
106
+ if (results && results.data) this._rowCount += results.data.length;
107
+
108
+ const finishedIncludingPreview =
109
+ this._finished || (this._config.preview && this._rowCount >= this._config.preview);
110
+
111
+ if (isFunction(this._config.chunk) && !isFakeChunk) {
112
+ this._config.chunk(results, this._handle);
113
+ if (this._handle.paused() || this._handle.aborted()) return;
114
+ results = undefined;
115
+ // @ts-expect-error
116
+ this._completeResults = undefined;
117
+ }
118
+
119
+ if (!this._config.step && !this._config.chunk) {
120
+ this._completeResults.data = this._completeResults.data.concat(results.data);
121
+ this._completeResults.errors = this._completeResults.errors.concat(results.errors);
122
+ this._completeResults.meta = results.meta;
123
+ }
124
+
125
+ if (
126
+ !this._completed &&
127
+ finishedIncludingPreview &&
128
+ isFunction(this._config.complete) &&
129
+ (!results || !results.meta.aborted)
130
+ ) {
131
+ this._config.complete(this._completeResults, this._input);
132
+ this._completed = true;
133
+ }
134
+
135
+ // if (!finishedIncludingPreview && (!results || !results.meta.paused)) this._nextChunk();
136
+
137
+ // eslint-disable-next-line consistent-return
138
+ return results;
139
+ }
140
+
141
+ _sendError(error) {
142
+ if (isFunction(this._config.error)) this._config.error(error);
143
+ }
144
+ }
145
+
146
+ class StringStreamer extends ChunkStreamer {
147
+ remaining;
148
+
149
+ constructor(config = {}) {
150
+ super(config);
151
+ }
152
+
153
+ stream(s) {
154
+ this.remaining = s;
155
+ return this._nextChunk();
156
+ }
157
+
158
+ _nextChunk() {
159
+ if (this._finished) return;
160
+ const size = this._config.chunkSize;
161
+ const chunk = size ? this.remaining.substr(0, size) : this.remaining;
162
+ this.remaining = size ? this.remaining.substr(size) : '';
163
+ this._finished = !this.remaining;
164
+ // eslint-disable-next-line consistent-return
165
+ return this.parseChunk(chunk);
166
+ }
167
+ }
168
+
169
+ const FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
170
+ const ISO_DATE =
171
+ /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
172
+
173
+ // Use one ParserHandle per entire CSV file or string
174
+ export class ParserHandle {
175
+ _config;
176
+
177
+ /** Number of times step was called (number of rows parsed) */
178
+ _stepCounter = 0;
179
+ /** Number of rows that have been parsed so far */
180
+ _rowCounter = 0;
181
+ /** The input being parsed */
182
+ _input;
183
+ /** The core parser being used */
184
+ _parser;
185
+ /** Whether we are paused or not */
186
+ _paused = false;
187
+ /** Whether the parser has aborted or not */
188
+ _aborted = false;
189
+ /** Temporary state between delimiter detection and processing results */
190
+ _delimiterError: boolean = false;
191
+ /** Fields are from the header row of the input, if there is one */
192
+ _fields: string[] = [];
193
+ /** The last results returned from the parser */
194
+ _results: {
195
+ data: any[][] | Record<string, any>[];
196
+ errors: any[];
197
+ meta: Record<string, any>;
198
+ } = {
199
+ data: [],
200
+ errors: [],
201
+ meta: {}
202
+ };
203
+
204
+ constructor(_config: CSVParserConfig) {
205
+ // One goal is to minimize the use of regular expressions...
206
+
207
+ if (isFunction(_config.step)) {
208
+ const userStep = _config.step;
209
+ _config.step = (results) => {
210
+ this._results = results;
211
+
212
+ if (this.needsHeaderRow()) {
213
+ this.processResults();
214
+ }
215
+ // only call user's step function after header row
216
+ else {
217
+ this.processResults();
218
+
219
+ // It's possbile that this line was empty and there's no row here after all
220
+ if (!this._results.data || this._results.data.length === 0) return;
221
+
222
+ this._stepCounter += results.data.length;
223
+ if (_config.preview && this._stepCounter > _config.preview) {
224
+ this._parser.abort();
225
+ } else {
226
+ userStep(this._results, this);
227
+ }
228
+ }
229
+ };
230
+ }
231
+ this._config = _config;
232
+ }
233
+
234
+ /**
235
+ * Parses input. Most users won't need, and shouldn't mess with, the baseIndex
236
+ * and ignoreLastRow parameters. They are used by streamers (wrapper functions)
237
+ * when an input comes in multiple chunks, like from a file.
238
+ */
239
+ parse(input, baseIndex, ignoreLastRow) {
240
+ const quoteChar = this._config.quoteChar || '"';
241
+ if (!this._config.newline) this._config.newline = guessLineEndings(input, quoteChar);
242
+
243
+ this._delimiterError = false;
244
+ if (!this._config.delimiter) {
245
+ const delimGuess = this.guessDelimiter(
246
+ input,
247
+ this._config.newline,
248
+ this._config.skipEmptyLines,
249
+ this._config.comments,
250
+ this._config.delimitersToGuess
251
+ );
252
+ if (delimGuess.successful) {
253
+ this._config.delimiter = delimGuess.bestDelimiter;
254
+ } else {
255
+ this._delimiterError = true; // add error after parsing (otherwise it would be overwritten)
256
+ this._config.delimiter = Papa.DefaultDelimiter;
257
+ }
258
+ this._results.meta.delimiter = this._config.delimiter;
259
+ } else if (isFunction(this._config.delimiter)) {
260
+ this._config.delimiter = this._config.delimiter(input);
261
+ this._results.meta.delimiter = this._config.delimiter;
262
+ }
263
+
264
+ const parserConfig = copy(this._config);
265
+ if (this._config.preview && this._config.header) parserConfig.preview++; // to compensate for header row
266
+
267
+ this._input = input;
268
+ this._parser = new Parser(parserConfig);
269
+ this._results = this._parser.parse(this._input, baseIndex, ignoreLastRow);
270
+ this.processResults();
271
+ return this._paused ? {meta: {paused: true}} : this._results || {meta: {paused: false}};
272
+ }
273
+
274
+ paused() {
275
+ return this._paused;
276
+ }
277
+
278
+ pause() {
279
+ this._paused = true;
280
+ this._parser.abort();
281
+ this._input = this._input.substr(this._parser.getCharIndex());
282
+ }
283
+
284
+ resume() {
285
+ this._paused = false;
286
+ // @ts-expect-error
287
+ this.streamer.parseChunk(this._input, true);
288
+ }
289
+
290
+ aborted() {
291
+ return this._aborted;
292
+ }
293
+
294
+ abort() {
295
+ this._aborted = true;
296
+ this._parser.abort();
297
+ this._results.meta.aborted = true;
298
+ if (isFunction(this._config.complete)) {
299
+ this._config.complete(this._results);
300
+ }
301
+ this._input = '';
302
+ }
303
+
304
+ testEmptyLine(s) {
305
+ return this._config.skipEmptyLines === 'greedy'
306
+ ? s.join('').trim() === ''
307
+ : s.length === 1 && s[0].length === 0;
308
+ }
309
+
310
+ processResults() {
311
+ if (this._results && this._delimiterError) {
312
+ this.addError(
313
+ 'Delimiter',
314
+ 'UndetectableDelimiter',
315
+ `Unable to auto-detect delimiting character; defaulted to '${Papa.DefaultDelimiter}'`
316
+ );
317
+ this._delimiterError = false;
318
+ }
319
+
320
+ if (this._config.skipEmptyLines) {
321
+ for (let i = 0; i < this._results.data.length; i++)
322
+ if (this.testEmptyLine(this._results.data[i])) this._results.data.splice(i--, 1);
323
+ }
324
+
325
+ if (this.needsHeaderRow()) {
326
+ this.fillHeaderFields();
327
+ }
328
+
329
+ return this.applyHeaderAndDynamicTypingAndTransformation();
330
+ }
331
+
332
+ needsHeaderRow() {
333
+ return this._config.header && this._fields.length === 0;
334
+ }
335
+
336
+ fillHeaderFields() {
337
+ if (!this._results) return;
338
+
339
+ const addHeder = (header) => {
340
+ if (isFunction(this._config.transformHeader)) header = this._config.transformHeader(header);
341
+ this._fields.push(header);
342
+ };
343
+
344
+ if (Array.isArray(this._results.data[0])) {
345
+ for (let i = 0; this.needsHeaderRow() && i < this._results.data.length; i++)
346
+ this._results.data[i].forEach(addHeder);
347
+
348
+ this._results.data.splice(0, 1);
349
+ }
350
+ // if _results.data[0] is not an array, we are in a step where _results.data is the row.
351
+ else {
352
+ this._results.data.forEach(addHeder);
353
+ }
354
+ }
355
+
356
+ shouldApplyDynamicTyping(field) {
357
+ // Cache function values to avoid calling it for each row
358
+ if (this._config.dynamicTypingFunction && this._config.dynamicTyping?.[field] === undefined) {
359
+ this._config.dynamicTyping[field] = this._config.dynamicTypingFunction(field);
360
+ }
361
+ return (this._config.dynamicTyping?.[field] || this._config.dynamicTyping) === true;
362
+ }
363
+
364
+ parseDynamic(field, value) {
365
+ if (this.shouldApplyDynamicTyping(field)) {
366
+ if (value === 'true' || value === 'TRUE') return true;
367
+ else if (value === 'false' || value === 'FALSE') return false;
368
+ else if (FLOAT.test(value)) return parseFloat(value);
369
+ else if (ISO_DATE.test(value)) return new Date(value);
370
+ return value === '' ? null : value;
371
+ }
372
+ return value;
373
+ }
374
+
375
+ applyHeaderAndDynamicTypingAndTransformation() {
376
+ if (
377
+ !this._results ||
378
+ !this._results.data ||
379
+ (!this._config.header && !this._config.dynamicTyping && !this._config.transform)
380
+ ) {
381
+ return this._results;
382
+ }
383
+
384
+ let incrementBy = 1;
385
+ if (!this._results.data[0] || Array.isArray(this._results.data[0])) {
386
+ this._results.data = this._results.data.map(this.processRow.bind(this));
387
+ incrementBy = this._results.data.length;
388
+ } else {
389
+ // @ts-expect-error
390
+ this._results.data = this.processRow(this._results.data, 0);
391
+ }
392
+
393
+ if (this._config.header && this._results.meta) this._results.meta.fields = this._fields;
394
+
395
+ this._rowCounter += incrementBy;
396
+ return this._results;
397
+ }
398
+
399
+ processRow(rowSource, i): any[] | Record<string, any> {
400
+ const row = this._config.header ? {} : [];
401
+
402
+ let j;
403
+ for (j = 0; j < rowSource.length; j++) {
404
+ let field = j;
405
+ let value = rowSource[j];
406
+
407
+ if (this._config.header)
408
+ field = j >= this._fields.length ? '__parsed_extra' : this._fields[j];
409
+
410
+ if (this._config.transform) value = this._config.transform(value, field);
411
+
412
+ value = this.parseDynamic(field, value);
413
+
414
+ if (field === '__parsed_extra') {
415
+ row[field] = row[field] || [];
416
+ row[field].push(value);
417
+ } else row[field] = value;
418
+ }
419
+
420
+ if (this._config.header) {
421
+ if (j > this._fields.length)
422
+ this.addError(
423
+ 'FieldMismatch',
424
+ 'TooManyFields',
425
+ `Too many fields: expected ${this._fields.length} fields but parsed ${j}`,
426
+ this._rowCounter + i
427
+ );
428
+ else if (j < this._fields.length)
429
+ this.addError(
430
+ 'FieldMismatch',
431
+ 'TooFewFields',
432
+ `Too few fields: expected ${this._fields.length} fields but parsed ${j}`,
433
+ this._rowCounter + i
434
+ );
435
+ }
436
+
437
+ return row;
438
+ }
439
+
440
+ // eslint-disable-next-line complexity, max-statements
441
+ guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
442
+ let bestDelim;
443
+ let bestDelta;
444
+ let fieldCountPrevRow;
445
+
446
+ delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
447
+
448
+ for (let i = 0; i < delimitersToGuess.length; i++) {
449
+ const delim = delimitersToGuess[i];
450
+ let avgFieldCount = 0;
451
+ let delta = 0;
452
+ let emptyLinesCount = 0;
453
+ fieldCountPrevRow = undefined;
454
+
455
+ const preview = new Parser({
456
+ comments,
457
+ delimiter: delim,
458
+ newline,
459
+ preview: 10
460
+ }).parse(input);
461
+
462
+ for (let j = 0; j < preview.data.length; j++) {
463
+ if (skipEmptyLines && this.testEmptyLine(preview.data[j])) {
464
+ emptyLinesCount++;
465
+ continue;
466
+ }
467
+ const fieldCount = preview.data[j].length;
468
+ avgFieldCount += fieldCount;
469
+
470
+ if (typeof fieldCountPrevRow === 'undefined') {
471
+ fieldCountPrevRow = 0;
472
+ continue;
473
+ } else if (fieldCount > 1) {
474
+ delta += Math.abs(fieldCount - fieldCountPrevRow);
475
+ fieldCountPrevRow = fieldCount;
476
+ }
477
+ }
478
+
479
+ if (preview.data.length > 0) avgFieldCount /= preview.data.length - emptyLinesCount;
480
+
481
+ if ((typeof bestDelta === 'undefined' || delta > bestDelta) && avgFieldCount > 1.99) {
482
+ bestDelta = delta;
483
+ bestDelim = delim;
484
+ }
485
+ }
486
+
487
+ this._config.delimiter = bestDelim;
488
+
489
+ return {
490
+ successful: Boolean(bestDelim),
491
+ bestDelimiter: bestDelim
492
+ };
493
+ }
494
+
495
+ addError(type, code, msg, row?) {
496
+ this._results.errors.push({
497
+ type,
498
+ code,
499
+ message: msg,
500
+ row
501
+ });
502
+ }
503
+ }
504
+
505
+ function guessLineEndings(input, quoteChar) {
506
+ input = input.substr(0, 1024 * 1024); // max length 1 MB
507
+ // Replace all the text inside quotes
508
+ const re = new RegExp(`${escapeRegExp(quoteChar)}([^]*?)${escapeRegExp(quoteChar)}`, 'gm');
509
+ input = input.replace(re, '');
510
+
511
+ const r = input.split('\r');
512
+
513
+ const n = input.split('\n');
514
+
515
+ const nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
516
+
517
+ if (r.length === 1 || nAppearsFirst) return '\n';
518
+
519
+ let numWithN = 0;
520
+ for (let i = 0; i < r.length; i++) {
521
+ if (r[i][0] === '\n') numWithN++;
522
+ }
523
+
524
+ return numWithN >= r.length / 2 ? '\r\n' : '\r';
525
+ }
526
+
527
+ /** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
528
+ function escapeRegExp(string) {
529
+ return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
530
+ }
531
+
532
+ /** The core parser implements speedy and correct CSV parsing */
533
+ // eslint-disable-next-line complexity, max-statements
534
+ export function Parser(config: CSVParserConfig = {}) {
535
+ // Unpack the config object
536
+ let delim = config.delimiter;
537
+ let newline = config.newline;
538
+ let comments = config.comments;
539
+ const step = config.step;
540
+ const preview = config.preview;
541
+ const fastMode = config.fastMode;
542
+ let quoteChar;
543
+ /** Allows for no quoteChar by setting quoteChar to undefined in config */
544
+ if (config.quoteChar === undefined) {
545
+ quoteChar = '"';
546
+ } else {
547
+ quoteChar = config.quoteChar;
548
+ }
549
+ let escapeChar = quoteChar;
550
+ if (config.escapeChar !== undefined) {
551
+ escapeChar = config.escapeChar;
552
+ }
553
+
554
+ // Delimiter must be valid
555
+ if (typeof delim !== 'string' || Papa.BAD_DELIMITERS.indexOf(delim) > -1) delim = ',';
556
+
557
+ // Comment character must be valid
558
+ if (comments === delim) {
559
+ throw new Error('Comment character same as delimiter');
560
+ } else if (comments === true) {
561
+ comments = '#';
562
+ } else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1) {
563
+ comments = false;
564
+ }
565
+
566
+ // Newline must be valid: \r, \n, or \r\n
567
+ if (newline !== '\n' && newline !== '\r' && newline !== '\r\n') newline = '\n';
568
+
569
+ // We're gonna need these at the Parser scope
570
+ let cursor = 0;
571
+ let aborted = false;
572
+
573
+ // @ts-expect-error
574
+ // eslint-disable-next-line complexity, max-statements
575
+ this.parse = function (input, baseIndex, ignoreLastRow) {
576
+ // For some reason, in Chrome, this speeds things up (!?)
577
+ if (typeof input !== 'string') throw new Error('Input must be a string');
578
+
579
+ // We don't need to compute some of these every time parse() is called,
580
+ // but having them in a more local scope seems to perform better
581
+ const inputLen = input.length;
582
+ const delimLen = delim.length;
583
+ const newlineLen = newline.length;
584
+ // @ts-expect-error
585
+ const commentsLen = comments.length;
586
+ const stepIsFunction = isFunction(step);
587
+
588
+ // Establish starting state
589
+ cursor = 0;
590
+ let data: any[][] | Record<string, any> = [];
591
+ let errors: any[] = [];
592
+ let row: any[] | Record<string, any> = [];
593
+ let lastCursor: number = 0;
594
+
595
+ if (!input) return returnable();
596
+
597
+ if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1)) {
598
+ const rows = input.split(newline);
599
+ for (let i = 0; i < rows.length; i++) {
600
+ const row = rows[i];
601
+ cursor += row.length;
602
+ if (i !== rows.length - 1) cursor += newline.length;
603
+ else if (ignoreLastRow) return returnable();
604
+ if (comments && row.substr(0, commentsLen) === comments) continue;
605
+ if (stepIsFunction) {
606
+ data = [];
607
+ pushRow(row.split(delim));
608
+ doStep();
609
+ if (aborted) return returnable();
610
+ } else pushRow(row.split(delim));
611
+ if (preview && i >= preview) {
612
+ data = data.slice(0, preview);
613
+ return returnable(true);
614
+ }
615
+ }
616
+ return returnable();
617
+ }
618
+
619
+ let nextDelim = input.indexOf(delim, cursor);
620
+ let nextNewline = input.indexOf(newline, cursor);
621
+ const quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
622
+ let quoteSearch;
623
+
624
+ // Parser loop
625
+ for (;;) {
626
+ // Field has opening quote
627
+ if (input[cursor] === quoteChar) {
628
+ // Start our search for the closing quote where the cursor is
629
+ quoteSearch = cursor;
630
+
631
+ // Skip the opening quote
632
+ cursor++;
633
+
634
+ for (;;) {
635
+ // Find closing quote
636
+ quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
637
+
638
+ // No other quotes are found - no other delimiters
639
+ if (quoteSearch === -1) {
640
+ if (!ignoreLastRow) {
641
+ // No closing quote... what a pity
642
+ errors.push({
643
+ type: 'Quotes',
644
+ code: 'MissingQuotes',
645
+ message: 'Quoted field unterminated',
646
+ row: data.length, // row has yet to be inserted
647
+ index: cursor
648
+ });
649
+ }
650
+ return finish();
651
+ }
652
+
653
+ // Closing quote at EOF
654
+ if (quoteSearch === inputLen - 1) {
655
+ const value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
656
+ return finish(value);
657
+ }
658
+
659
+ // If this quote is escaped, it's part of the data; skip it
660
+ // If the quote character is the escape character, then check if the next character is the escape character
661
+ if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar) {
662
+ quoteSearch++;
663
+ continue;
664
+ }
665
+
666
+ // If the quote character is not the escape character, then check if the previous character was the escape character
667
+ if (
668
+ quoteChar !== escapeChar &&
669
+ quoteSearch !== 0 &&
670
+ input[quoteSearch - 1] === escapeChar
671
+ ) {
672
+ continue;
673
+ }
674
+
675
+ // Check up to nextDelim or nextNewline, whichever is closest
676
+ const checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
677
+ const spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
678
+
679
+ // Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
680
+ if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) {
681
+ row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
682
+ cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
683
+ nextDelim = input.indexOf(delim, cursor);
684
+ nextNewline = input.indexOf(newline, cursor);
685
+
686
+ if (stepIsFunction) {
687
+ doStep();
688
+ if (aborted) return returnable();
689
+ }
690
+
691
+ if (preview && data.length >= preview) return returnable(true);
692
+
693
+ break;
694
+ }
695
+
696
+ const spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
697
+
698
+ // Closing quote followed by newline or 'unnecessary spaces + newLine'
699
+ if (
700
+ input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline
701
+ ) {
702
+ row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
703
+ saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
704
+ nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
705
+
706
+ if (stepIsFunction) {
707
+ doStep();
708
+ if (aborted) return returnable();
709
+ }
710
+
711
+ if (preview && data.length >= preview) return returnable(true);
712
+
713
+ break;
714
+ }
715
+
716
+ // Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
717
+ errors.push({
718
+ type: 'Quotes',
719
+ code: 'InvalidQuotes',
720
+ message: 'Trailing quote on quoted field is malformed',
721
+ row: data.length, // row has yet to be inserted
722
+ index: cursor
723
+ });
724
+
725
+ quoteSearch++;
726
+ continue;
727
+ }
728
+
729
+ if (stepIsFunction) {
730
+ doStep();
731
+ if (aborted) return returnable();
732
+ }
733
+
734
+ if (preview && data.length >= preview) return returnable(true);
735
+ continue;
736
+ }
737
+
738
+ // Comment found at start of new line
739
+ if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) {
740
+ if (nextNewline === -1)
741
+ // Comment ends at EOF
742
+ return returnable();
743
+ cursor = nextNewline + newlineLen;
744
+ nextNewline = input.indexOf(newline, cursor);
745
+ nextDelim = input.indexOf(delim, cursor);
746
+ continue;
747
+ }
748
+
749
+ // Next delimiter comes before next newline, so we've reached end of field
750
+ if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) {
751
+ row.push(input.substring(cursor, nextDelim));
752
+ cursor = nextDelim + delimLen;
753
+ nextDelim = input.indexOf(delim, cursor);
754
+ continue;
755
+ }
756
+
757
+ // End of row
758
+ if (nextNewline !== -1) {
759
+ row.push(input.substring(cursor, nextNewline));
760
+ saveRow(nextNewline + newlineLen);
761
+
762
+ if (stepIsFunction) {
763
+ doStep();
764
+ if (aborted) return returnable();
765
+ }
766
+
767
+ if (preview && data.length >= preview) return returnable(true);
768
+
769
+ continue;
770
+ }
771
+
772
+ break;
773
+ }
774
+
775
+ return finish();
776
+
777
+ function pushRow(row) {
778
+ data.push(row);
779
+ lastCursor = cursor;
780
+ }
781
+
782
+ /**
783
+ * checks if there are extra spaces after closing quote and given index without any text
784
+ * if Yes, returns the number of spaces
785
+ */
786
+ function extraSpaces(index) {
787
+ let spaceLength = 0;
788
+ if (index !== -1) {
789
+ const textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index);
790
+ if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') {
791
+ spaceLength = textBetweenClosingQuoteAndIndex.length;
792
+ }
793
+ }
794
+ return spaceLength;
795
+ }
796
+
797
+ /**
798
+ * Appends the remaining input from cursor to the end into
799
+ * row, saves the row, calls step, and returns the results.
800
+ */
801
+ function finish(value?: any) {
802
+ if (ignoreLastRow) return returnable();
803
+ if (typeof value === 'undefined') value = input.substr(cursor);
804
+ row.push(value);
805
+ cursor = inputLen; // important in case parsing is paused
806
+ pushRow(row);
807
+ if (stepIsFunction) doStep();
808
+ return returnable();
809
+ }
810
+
811
+ /**
812
+ * Appends the current row to the results. It sets the cursor
813
+ * to newCursor and finds the nextNewline. The caller should
814
+ * take care to execute user's step function and check for
815
+ * preview and end parsing if necessary.
816
+ */
817
+ function saveRow(newCursor) {
818
+ cursor = newCursor;
819
+ pushRow(row);
820
+ row = [];
821
+ nextNewline = input.indexOf(newline, cursor);
822
+ }
823
+
824
+ /** Returns an object with the results, errors, and meta. */
825
+ function returnable(stopped?: boolean, step?) {
826
+ const isStep = step || false;
827
+ return {
828
+ data: isStep ? data[0] : data,
829
+ errors,
830
+ meta: {
831
+ delimiter: delim,
832
+ linebreak: newline,
833
+ aborted,
834
+ truncated: Boolean(stopped),
835
+ cursor: lastCursor + (baseIndex || 0)
836
+ }
837
+ };
838
+ }
839
+
840
+ /** Executes the user's step function and resets data & errors. */
841
+ function doStep() {
842
+ // @ts-expect-error
843
+ step(returnable(undefined, true));
844
+ data = [];
845
+ errors = [];
846
+ }
847
+ };
848
+
849
+ /** Sets the abort flag */
850
+ // @ts-expect-error
851
+ this.abort = function () {
852
+ aborted = true;
853
+ };
854
+
855
+ /** Gets the cursor position */
856
+ // @ts-expect-error
857
+ this.getCharIndex = function () {
858
+ return cursor;
859
+ };
860
+ }
861
+
862
+ /** Makes a deep copy of an array or object (mostly) */
863
+ function copy(obj) {
864
+ if (typeof obj !== 'object' || obj === null) return obj;
865
+ const cpy = Array.isArray(obj) ? [] : {};
866
+ for (const key in obj) cpy[key] = copy(obj[key]);
867
+ return cpy;
868
+ }
869
+
870
+ function isFunction(func: unknown): func is Function {
871
+ return typeof func === 'function';
872
+ }