@loaders.gl/csv 4.3.0-alpha.8 → 4.3.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/csv-loader.d.ts.map +1 -1
- package/dist/csv-loader.js +1 -2
- package/dist/csv-writer.d.ts +0 -2
- package/dist/csv-writer.d.ts.map +1 -1
- package/dist/dist.dev.js +304 -311
- package/dist/dist.min.js +12 -12
- package/dist/index.cjs +285 -292
- package/dist/index.cjs.map +3 -3
- package/dist/papaparse/async-iterator-streamer.d.ts +28 -4
- package/dist/papaparse/async-iterator-streamer.d.ts.map +1 -1
- package/dist/papaparse/async-iterator-streamer.js +11 -12
- package/dist/papaparse/papaparse.d.ts +108 -12
- package/dist/papaparse/papaparse.d.ts.map +1 -1
- package/dist/papaparse/papaparse.js +274 -314
- package/package.json +4 -4
- package/src/csv-loader.ts +0 -1
- package/src/csv-writer.ts +0 -2
- package/src/papaparse/async-iterator-streamer.ts +11 -14
- package/src/papaparse/papaparse.ts +330 -342
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
// @ts-nocheck
|
|
2
1
|
// This is a fork of papaparse
|
|
3
2
|
// https://github.com/mholt/PapaParse
|
|
4
3
|
/* @license
|
|
@@ -14,53 +13,35 @@ License: MIT
|
|
|
14
13
|
// - Remove unused Worker support (loaders.gl worker system used instead)
|
|
15
14
|
// - Remove unused jQuery plugin support
|
|
16
15
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
16
|
+
export type CSVParserConfig = {
|
|
17
|
+
dynamicTyping?: boolean | Function | {};
|
|
18
|
+
dynamicTypingFunction?: Function;
|
|
19
|
+
chunk?: boolean;
|
|
20
|
+
chunkSize?: number | null;
|
|
21
|
+
step?: Function;
|
|
22
|
+
transform?: boolean;
|
|
23
|
+
preview?: number;
|
|
24
|
+
newline?: string;
|
|
25
|
+
comments?: boolean;
|
|
26
|
+
skipEmptyLines?: boolean | 'greedy';
|
|
27
|
+
delimitersToGuess?: string[];
|
|
28
|
+
quoteChar?: string;
|
|
29
|
+
escapeChar?: string;
|
|
30
|
+
delimiter?: string;
|
|
31
|
+
// Convert numbers and boolean values in rows from strings
|
|
32
|
+
fastMode?: boolean;
|
|
33
|
+
};
|
|
30
34
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
+
// const defaultConfig: Required<CSVParserConfig> = {
|
|
36
|
+
// dynamicTyping: false,
|
|
37
|
+
// dynamicTypingFunction: undefined!,
|
|
38
|
+
// transform: false
|
|
39
|
+
// };
|
|
35
40
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
ParserHandle: ParserHandle,
|
|
39
|
-
|
|
40
|
-
// BEGIN FORK
|
|
41
|
-
ChunkStreamer: ChunkStreamer,
|
|
42
|
-
StringStreamer: StringStreamer
|
|
43
|
-
};
|
|
44
|
-
export default Papa;
|
|
41
|
+
/* eslint-disable */
|
|
42
|
+
const BYTE_ORDER_MARK = '\ufeff';
|
|
45
43
|
|
|
46
|
-
|
|
47
|
-
Papa.NetworkStreamer = NetworkStreamer;
|
|
48
|
-
Papa.FileStreamer = FileStreamer;
|
|
49
|
-
Papa.ReadableStreamStreamer = ReadableStreamStreamer;
|
|
50
|
-
if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
|
|
51
|
-
Papa.DuplexStreamStreamer = DuplexStreamStreamer;
|
|
52
|
-
}
|
|
53
|
-
*/
|
|
54
|
-
// END FORK
|
|
55
|
-
|
|
56
|
-
// BEGIN FORK
|
|
57
|
-
// Adds an argument to papa.parse
|
|
58
|
-
// function CsvToJson(_input, _config)
|
|
59
|
-
function CsvToJson(
|
|
60
|
-
_input,
|
|
61
|
-
_config,
|
|
62
|
-
UserDefinedStreamer? // BEGIN FORK
|
|
63
|
-
) {
|
|
44
|
+
function CsvToJson(_input, _config: CSVParserConfig = {}, Streamer: any = StringStreamer) {
|
|
64
45
|
_config = _config || {};
|
|
65
46
|
var dynamicTyping = _config.dynamicTyping || false;
|
|
66
47
|
if (isFunction(dynamicTyping)) {
|
|
@@ -72,56 +53,7 @@ function CsvToJson(
|
|
|
72
53
|
|
|
73
54
|
_config.transform = isFunction(_config.transform) ? _config.transform : false;
|
|
74
55
|
|
|
75
|
-
|
|
76
|
-
var w = newWorker();
|
|
77
|
-
|
|
78
|
-
w.userStep = _config.step;
|
|
79
|
-
w.userChunk = _config.chunk;
|
|
80
|
-
w.userComplete = _config.complete;
|
|
81
|
-
w.userError = _config.error;
|
|
82
|
-
|
|
83
|
-
_config.step = isFunction(_config.step);
|
|
84
|
-
_config.chunk = isFunction(_config.chunk);
|
|
85
|
-
_config.complete = isFunction(_config.complete);
|
|
86
|
-
_config.error = isFunction(_config.error);
|
|
87
|
-
delete _config.worker; // prevent infinite loop
|
|
88
|
-
|
|
89
|
-
w.postMessage({
|
|
90
|
-
input: _input,
|
|
91
|
-
config: _config,
|
|
92
|
-
workerId: w.id
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
return;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
var streamer = null;
|
|
99
|
-
/*
|
|
100
|
-
if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined') {
|
|
101
|
-
// create a node Duplex stream for use
|
|
102
|
-
// with .pipe
|
|
103
|
-
streamer = new DuplexStreamStreamer(_config);
|
|
104
|
-
return streamer.getStream();
|
|
105
|
-
} else
|
|
106
|
-
*/
|
|
107
|
-
if (typeof _input === 'string') {
|
|
108
|
-
// if (_config.download) streamer = new NetworkStreamer(_config);
|
|
109
|
-
// else
|
|
110
|
-
streamer = new StringStreamer(_config);
|
|
111
|
-
}
|
|
112
|
-
/*
|
|
113
|
-
else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on)) {
|
|
114
|
-
streamer = new ReadableStreamStreamer(_config);
|
|
115
|
-
} else if ((globalThis.File && _input instanceof File) || _input instanceof Object)
|
|
116
|
-
// ...Safari. (see issue #106)
|
|
117
|
-
streamer = new FileStreamer(_config);
|
|
118
|
-
*/
|
|
119
|
-
|
|
120
|
-
// BEGIN FORK
|
|
121
|
-
if (!streamer) {
|
|
122
|
-
streamer = new UserDefinedStreamer(_config);
|
|
123
|
-
}
|
|
124
|
-
// END FORK
|
|
56
|
+
var streamer = new Streamer(_config);
|
|
125
57
|
|
|
126
58
|
return streamer.stream(_input);
|
|
127
59
|
}
|
|
@@ -162,7 +94,7 @@ function JsonToCsv(_input, _config) {
|
|
|
162
94
|
if (Array.isArray(_input)) {
|
|
163
95
|
if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines);
|
|
164
96
|
else if (typeof _input[0] === 'object')
|
|
165
|
-
return serialize(_columns ||
|
|
97
|
+
return serialize(_columns || Object.keys(_input[0]), _input, _skipEmptyLines);
|
|
166
98
|
} else if (typeof _input === 'object') {
|
|
167
99
|
if (typeof _input.data === 'string') _input.data = JSON.parse(_input.data);
|
|
168
100
|
|
|
@@ -170,7 +102,7 @@ function JsonToCsv(_input, _config) {
|
|
|
170
102
|
if (!_input.fields) _input.fields = _input.meta && _input.meta.fields;
|
|
171
103
|
|
|
172
104
|
if (!_input.fields)
|
|
173
|
-
_input.fields = Array.isArray(_input.data[0]) ? _input.fields :
|
|
105
|
+
_input.fields = Array.isArray(_input.data[0]) ? _input.fields : Object.keys(_input.data[0]);
|
|
174
106
|
|
|
175
107
|
if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object')
|
|
176
108
|
_input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
|
|
@@ -217,14 +149,6 @@ function JsonToCsv(_input, _config) {
|
|
|
217
149
|
}
|
|
218
150
|
}
|
|
219
151
|
|
|
220
|
-
/** Turns an object's keys into an array */
|
|
221
|
-
function objectKeys(obj) {
|
|
222
|
-
if (typeof obj !== 'object') return [];
|
|
223
|
-
var keys = [];
|
|
224
|
-
for (var key in obj) keys.push(key);
|
|
225
|
-
return keys;
|
|
226
|
-
}
|
|
227
|
-
|
|
228
152
|
/** The double for loop that iterates the data and writes out a CSV string including header row */
|
|
229
153
|
function serialize(fields, data, skipEmptyLines) {
|
|
230
154
|
var csv = '';
|
|
@@ -257,7 +181,7 @@ function JsonToCsv(_input, _config) {
|
|
|
257
181
|
: data[row].length === 1 && data[row][0].length === 0;
|
|
258
182
|
}
|
|
259
183
|
if (skipEmptyLines === 'greedy' && hasHeader) {
|
|
260
|
-
var line = [];
|
|
184
|
+
var line: string[] = [];
|
|
261
185
|
for (var c = 0; c < maxCol; c++) {
|
|
262
186
|
var cx = dataKeyedByField ? fields[c] : c;
|
|
263
187
|
line.push(data[row][cx]);
|
|
@@ -304,25 +228,38 @@ function JsonToCsv(_input, _config) {
|
|
|
304
228
|
}
|
|
305
229
|
|
|
306
230
|
/** ChunkStreamer is the base prototype for various streamer implementations. */
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
231
|
+
class ChunkStreamer {
|
|
232
|
+
_handle;
|
|
233
|
+
_config;
|
|
234
|
+
|
|
235
|
+
_finished = false;
|
|
236
|
+
_completed = false;
|
|
237
|
+
_input = null;
|
|
238
|
+
_baseIndex = 0;
|
|
239
|
+
_partialLine = '';
|
|
240
|
+
_rowCount = 0;
|
|
241
|
+
_start = 0;
|
|
242
|
+
isFirstChunk = true;
|
|
243
|
+
_completeResults = {
|
|
319
244
|
data: [],
|
|
320
245
|
errors: [],
|
|
321
246
|
meta: {}
|
|
322
247
|
};
|
|
323
|
-
replaceConfig.call(this, config);
|
|
324
248
|
|
|
325
|
-
|
|
249
|
+
constructor(config: CSVParserConfig) {
|
|
250
|
+
// Deep-copy the config so we can edit it
|
|
251
|
+
var configCopy = {...config};
|
|
252
|
+
// @ts-expect-error
|
|
253
|
+
configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
|
|
254
|
+
if (!config.step && !config.chunk) {
|
|
255
|
+
configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
|
|
256
|
+
}
|
|
257
|
+
this._handle = new ParserHandle(configCopy);
|
|
258
|
+
this._handle.streamer = this;
|
|
259
|
+
this._config = configCopy; // persist the copy to the caller
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
parseChunk(chunk, isFakeChunk?: boolean) {
|
|
326
263
|
// First chunk pre-processing
|
|
327
264
|
if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
|
|
328
265
|
var modifiedChunk = this._config.beforeFirstChunk(chunk);
|
|
@@ -354,6 +291,7 @@ function ChunkStreamer(config) {
|
|
|
354
291
|
this._config.chunk(results, this._handle);
|
|
355
292
|
if (this._handle.paused() || this._handle.aborted()) return;
|
|
356
293
|
results = undefined;
|
|
294
|
+
// @ts-expect-error
|
|
357
295
|
this._completeResults = undefined;
|
|
358
296
|
}
|
|
359
297
|
|
|
@@ -373,87 +311,101 @@ function ChunkStreamer(config) {
|
|
|
373
311
|
this._completed = true;
|
|
374
312
|
}
|
|
375
313
|
|
|
376
|
-
if (!finishedIncludingPreview && (!results || !results.meta.paused)) this._nextChunk();
|
|
314
|
+
// if (!finishedIncludingPreview && (!results || !results.meta.paused)) this._nextChunk();
|
|
377
315
|
|
|
378
316
|
return results;
|
|
379
|
-
}
|
|
317
|
+
}
|
|
380
318
|
|
|
381
|
-
|
|
319
|
+
_sendError(error) {
|
|
382
320
|
if (isFunction(this._config.error)) this._config.error(error);
|
|
383
|
-
};
|
|
384
|
-
|
|
385
|
-
function replaceConfig(config) {
|
|
386
|
-
// Deep-copy the config so we can edit it
|
|
387
|
-
var configCopy = copy(config);
|
|
388
|
-
configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
|
|
389
|
-
if (!config.step && !config.chunk) configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
|
|
390
|
-
this._handle = new ParserHandle(configCopy);
|
|
391
|
-
this._handle.streamer = this;
|
|
392
|
-
this._config = configCopy; // persist the copy to the caller
|
|
393
321
|
}
|
|
394
322
|
}
|
|
395
|
-
function StringStreamer(config) {
|
|
396
|
-
config = config || {};
|
|
397
|
-
ChunkStreamer.call(this, config);
|
|
398
323
|
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
324
|
+
class StringStreamer extends ChunkStreamer {
|
|
325
|
+
remaining;
|
|
326
|
+
|
|
327
|
+
constructor(config = {}) {
|
|
328
|
+
super(config);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
stream(s) {
|
|
332
|
+
this.remaining = s;
|
|
402
333
|
return this._nextChunk();
|
|
403
|
-
}
|
|
404
|
-
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
_nextChunk() {
|
|
405
337
|
if (this._finished) return;
|
|
406
338
|
var size = this._config.chunkSize;
|
|
407
|
-
var chunk = size ? remaining.substr(0, size) : remaining;
|
|
408
|
-
remaining = size ? remaining.substr(size) : '';
|
|
409
|
-
this._finished = !remaining;
|
|
339
|
+
var chunk = size ? this.remaining.substr(0, size) : this.remaining;
|
|
340
|
+
this.remaining = size ? this.remaining.substr(size) : '';
|
|
341
|
+
this._finished = !this.remaining;
|
|
410
342
|
return this.parseChunk(chunk);
|
|
411
|
-
}
|
|
343
|
+
}
|
|
412
344
|
}
|
|
413
|
-
|
|
414
|
-
|
|
345
|
+
|
|
346
|
+
const FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
|
|
347
|
+
const ISO_DATE =
|
|
348
|
+
/(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
|
|
415
349
|
|
|
416
350
|
// Use one ParserHandle per entire CSV file or string
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
351
|
+
class ParserHandle {
|
|
352
|
+
_config;
|
|
353
|
+
|
|
354
|
+
/** Number of times step was called (number of rows parsed) */
|
|
355
|
+
_stepCounter = 0;
|
|
356
|
+
/** Number of rows that have been parsed so far */
|
|
357
|
+
_rowCounter = 0;
|
|
358
|
+
/** The input being parsed */
|
|
359
|
+
_input;
|
|
360
|
+
/** The core parser being used */
|
|
361
|
+
_parser;
|
|
362
|
+
/** Whether we are paused or not */
|
|
363
|
+
_paused = false;
|
|
364
|
+
/** Whether the parser has aborted or not */
|
|
365
|
+
_aborted = false;
|
|
366
|
+
/** Temporary state between delimiter detection and processing results */
|
|
367
|
+
_delimiterError: boolean = false;
|
|
368
|
+
/** Fields are from the header row of the input, if there is one */
|
|
369
|
+
_fields: string[] = [];
|
|
370
|
+
/** The last results returned from the parser */
|
|
371
|
+
_results: {
|
|
372
|
+
data: any[][] | Record<string, any>[];
|
|
373
|
+
errors: any[];
|
|
374
|
+
meta: Record<string, any>;
|
|
375
|
+
} = {
|
|
434
376
|
data: [],
|
|
435
377
|
errors: [],
|
|
436
378
|
meta: {}
|
|
437
379
|
};
|
|
438
380
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
_config.step = function (results) {
|
|
442
|
-
_results = results;
|
|
443
|
-
|
|
444
|
-
if (needsHeaderRow()) processResults();
|
|
445
|
-
// only call user's step function after header row
|
|
446
|
-
else {
|
|
447
|
-
processResults();
|
|
381
|
+
constructor(_config: CSVParserConfig) {
|
|
382
|
+
// One goal is to minimize the use of regular expressions...
|
|
448
383
|
|
|
449
|
-
|
|
450
|
-
|
|
384
|
+
if (isFunction(_config.step)) {
|
|
385
|
+
var userStep = _config.step;
|
|
386
|
+
_config.step = (results) => {
|
|
387
|
+
this._results = results;
|
|
451
388
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
389
|
+
if (this.needsHeaderRow()) {
|
|
390
|
+
this.processResults();
|
|
391
|
+
}
|
|
392
|
+
// only call user's step function after header row
|
|
393
|
+
else {
|
|
394
|
+
this.processResults();
|
|
395
|
+
|
|
396
|
+
// It's possbile that this line was empty and there's no row here after all
|
|
397
|
+
if (!this._results.data || this._results.data.length === 0) return;
|
|
398
|
+
|
|
399
|
+
this._stepCounter += results.data.length;
|
|
400
|
+
if (_config.preview && this._stepCounter > _config.preview) {
|
|
401
|
+
this._parser.abort();
|
|
402
|
+
} else {
|
|
403
|
+
userStep(this._results, this);
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
};
|
|
407
|
+
}
|
|
408
|
+
this._config = _config;
|
|
457
409
|
}
|
|
458
410
|
|
|
459
411
|
/**
|
|
@@ -461,126 +413,133 @@ function ParserHandle(_config) {
|
|
|
461
413
|
* and ignoreLastRow parameters. They are used by streamers (wrapper functions)
|
|
462
414
|
* when an input comes in multiple chunks, like from a file.
|
|
463
415
|
*/
|
|
464
|
-
|
|
465
|
-
var quoteChar = _config.quoteChar || '"';
|
|
466
|
-
if (!_config.newline) _config.newline = guessLineEndings(input, quoteChar);
|
|
416
|
+
parse(input, baseIndex, ignoreLastRow) {
|
|
417
|
+
var quoteChar = this._config.quoteChar || '"';
|
|
418
|
+
if (!this._config.newline) this._config.newline = guessLineEndings(input, quoteChar);
|
|
467
419
|
|
|
468
|
-
_delimiterError = false;
|
|
469
|
-
if (!_config.delimiter) {
|
|
470
|
-
var delimGuess = guessDelimiter(
|
|
420
|
+
this._delimiterError = false;
|
|
421
|
+
if (!this._config.delimiter) {
|
|
422
|
+
var delimGuess = this.guessDelimiter(
|
|
471
423
|
input,
|
|
472
|
-
_config.newline,
|
|
473
|
-
_config.skipEmptyLines,
|
|
474
|
-
_config.comments,
|
|
475
|
-
_config.delimitersToGuess
|
|
424
|
+
this._config.newline,
|
|
425
|
+
this._config.skipEmptyLines,
|
|
426
|
+
this._config.comments,
|
|
427
|
+
this._config.delimitersToGuess
|
|
476
428
|
);
|
|
477
|
-
if (delimGuess.successful)
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
429
|
+
if (delimGuess.successful) {
|
|
430
|
+
this._config.delimiter = delimGuess.bestDelimiter;
|
|
431
|
+
} else {
|
|
432
|
+
this._delimiterError = true; // add error after parsing (otherwise it would be overwritten)
|
|
433
|
+
this._config.delimiter = Papa.DefaultDelimiter;
|
|
481
434
|
}
|
|
482
|
-
_results.meta.delimiter = _config.delimiter;
|
|
483
|
-
} else if (isFunction(_config.delimiter)) {
|
|
484
|
-
_config.delimiter = _config.delimiter(input);
|
|
485
|
-
_results.meta.delimiter = _config.delimiter;
|
|
435
|
+
this._results.meta.delimiter = this._config.delimiter;
|
|
436
|
+
} else if (isFunction(this._config.delimiter)) {
|
|
437
|
+
this._config.delimiter = this._config.delimiter(input);
|
|
438
|
+
this._results.meta.delimiter = this._config.delimiter;
|
|
486
439
|
}
|
|
487
440
|
|
|
488
|
-
var parserConfig = copy(_config);
|
|
489
|
-
if (_config.preview && _config.header) parserConfig.preview++; // to compensate for header row
|
|
441
|
+
var parserConfig = copy(this._config);
|
|
442
|
+
if (this._config.preview && this._config.header) parserConfig.preview++; // to compensate for header row
|
|
490
443
|
|
|
491
|
-
_input = input;
|
|
492
|
-
_parser = new Parser(parserConfig);
|
|
493
|
-
_results = _parser.parse(_input, baseIndex, ignoreLastRow);
|
|
494
|
-
processResults();
|
|
495
|
-
return _paused ? {meta: {paused: true}} : _results || {meta: {paused: false}};
|
|
496
|
-
}
|
|
444
|
+
this._input = input;
|
|
445
|
+
this._parser = new Parser(parserConfig);
|
|
446
|
+
this._results = this._parser.parse(this._input, baseIndex, ignoreLastRow);
|
|
447
|
+
this.processResults();
|
|
448
|
+
return this._paused ? {meta: {paused: true}} : this._results || {meta: {paused: false}};
|
|
449
|
+
}
|
|
497
450
|
|
|
498
|
-
|
|
499
|
-
return _paused;
|
|
500
|
-
}
|
|
451
|
+
paused() {
|
|
452
|
+
return this._paused;
|
|
453
|
+
}
|
|
501
454
|
|
|
502
|
-
|
|
503
|
-
_paused = true;
|
|
504
|
-
_parser.abort();
|
|
505
|
-
_input = _input.substr(_parser.getCharIndex());
|
|
506
|
-
}
|
|
455
|
+
pause() {
|
|
456
|
+
this._paused = true;
|
|
457
|
+
this._parser.abort();
|
|
458
|
+
this._input = this._input.substr(this._parser.getCharIndex());
|
|
459
|
+
}
|
|
507
460
|
|
|
508
|
-
|
|
509
|
-
_paused = false;
|
|
510
|
-
|
|
511
|
-
|
|
461
|
+
resume() {
|
|
462
|
+
this._paused = false;
|
|
463
|
+
// @ts-expect-error
|
|
464
|
+
this.streamer.parseChunk(this._input, true);
|
|
465
|
+
}
|
|
512
466
|
|
|
513
|
-
|
|
514
|
-
return _aborted;
|
|
515
|
-
}
|
|
467
|
+
aborted() {
|
|
468
|
+
return this._aborted;
|
|
469
|
+
}
|
|
516
470
|
|
|
517
|
-
|
|
518
|
-
_aborted = true;
|
|
519
|
-
_parser.abort();
|
|
520
|
-
_results.meta.aborted = true;
|
|
521
|
-
if (isFunction(_config.complete))
|
|
522
|
-
|
|
523
|
-
|
|
471
|
+
abort() {
|
|
472
|
+
this._aborted = true;
|
|
473
|
+
this._parser.abort();
|
|
474
|
+
this._results.meta.aborted = true;
|
|
475
|
+
if (isFunction(this._config.complete)) {
|
|
476
|
+
this._config.complete(this._results);
|
|
477
|
+
}
|
|
478
|
+
this._input = '';
|
|
479
|
+
}
|
|
524
480
|
|
|
525
|
-
|
|
526
|
-
return _config.skipEmptyLines === 'greedy'
|
|
481
|
+
testEmptyLine(s) {
|
|
482
|
+
return this._config.skipEmptyLines === 'greedy'
|
|
527
483
|
? s.join('').trim() === ''
|
|
528
484
|
: s.length === 1 && s[0].length === 0;
|
|
529
485
|
}
|
|
530
486
|
|
|
531
|
-
|
|
532
|
-
if (_results && _delimiterError) {
|
|
533
|
-
addError(
|
|
487
|
+
processResults() {
|
|
488
|
+
if (this._results && this._delimiterError) {
|
|
489
|
+
this.addError(
|
|
534
490
|
'Delimiter',
|
|
535
491
|
'UndetectableDelimiter',
|
|
536
492
|
"Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'"
|
|
537
493
|
);
|
|
538
|
-
_delimiterError = false;
|
|
494
|
+
this._delimiterError = false;
|
|
539
495
|
}
|
|
540
496
|
|
|
541
|
-
if (_config.skipEmptyLines) {
|
|
542
|
-
for (var i = 0; i < _results.data.length; i++)
|
|
543
|
-
if (testEmptyLine(_results.data[i])) _results.data.splice(i--, 1);
|
|
497
|
+
if (this._config.skipEmptyLines) {
|
|
498
|
+
for (var i = 0; i < this._results.data.length; i++)
|
|
499
|
+
if (this.testEmptyLine(this._results.data[i])) this._results.data.splice(i--, 1);
|
|
544
500
|
}
|
|
545
501
|
|
|
546
|
-
if (needsHeaderRow())
|
|
502
|
+
if (this.needsHeaderRow()) {
|
|
503
|
+
this.fillHeaderFields();
|
|
504
|
+
}
|
|
547
505
|
|
|
548
|
-
return applyHeaderAndDynamicTypingAndTransformation();
|
|
506
|
+
return this.applyHeaderAndDynamicTypingAndTransformation();
|
|
549
507
|
}
|
|
550
508
|
|
|
551
|
-
|
|
552
|
-
return _config.header && _fields.length === 0;
|
|
509
|
+
needsHeaderRow() {
|
|
510
|
+
return this._config.header && this._fields.length === 0;
|
|
553
511
|
}
|
|
554
512
|
|
|
555
|
-
|
|
556
|
-
if (!_results) return;
|
|
513
|
+
fillHeaderFields() {
|
|
514
|
+
if (!this._results) return;
|
|
557
515
|
|
|
558
|
-
|
|
559
|
-
if (isFunction(_config.transformHeader)) header = _config.transformHeader(header);
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
}
|
|
516
|
+
const addHeder = (header) => {
|
|
517
|
+
if (isFunction(this._config.transformHeader)) header = this._config.transformHeader(header);
|
|
518
|
+
this._fields.push(header);
|
|
519
|
+
};
|
|
563
520
|
|
|
564
|
-
if (Array.isArray(_results.data[0])) {
|
|
565
|
-
for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
|
|
566
|
-
_results.data[i].forEach(addHeder);
|
|
521
|
+
if (Array.isArray(this._results.data[0])) {
|
|
522
|
+
for (var i = 0; this.needsHeaderRow() && i < this._results.data.length; i++)
|
|
523
|
+
this._results.data[i].forEach(addHeder);
|
|
567
524
|
|
|
568
|
-
_results.data.splice(0, 1);
|
|
525
|
+
this._results.data.splice(0, 1);
|
|
569
526
|
}
|
|
570
527
|
// if _results.data[0] is not an array, we are in a step where _results.data is the row.
|
|
571
|
-
else
|
|
528
|
+
else {
|
|
529
|
+
this._results.data.forEach(addHeder);
|
|
530
|
+
}
|
|
572
531
|
}
|
|
573
532
|
|
|
574
|
-
|
|
533
|
+
shouldApplyDynamicTyping(field) {
|
|
575
534
|
// Cache function values to avoid calling it for each row
|
|
576
|
-
if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
|
|
577
|
-
_config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
|
|
535
|
+
if (this._config.dynamicTypingFunction && this._config.dynamicTyping[field] === undefined) {
|
|
536
|
+
this._config.dynamicTyping[field] = this._config.dynamicTypingFunction(field);
|
|
578
537
|
}
|
|
579
|
-
return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
|
|
538
|
+
return (this._config.dynamicTyping[field] || this._config.dynamicTyping) === true;
|
|
580
539
|
}
|
|
581
540
|
|
|
582
|
-
|
|
583
|
-
if (shouldApplyDynamicTyping(field)) {
|
|
541
|
+
parseDynamic(field, value) {
|
|
542
|
+
if (this.shouldApplyDynamicTyping(field)) {
|
|
584
543
|
if (value === 'true' || value === 'TRUE') return true;
|
|
585
544
|
else if (value === 'false' || value === 'FALSE') return false;
|
|
586
545
|
else if (FLOAT.test(value)) return parseFloat(value);
|
|
@@ -590,67 +549,72 @@ function ParserHandle(_config) {
|
|
|
590
549
|
return value;
|
|
591
550
|
}
|
|
592
551
|
|
|
593
|
-
|
|
552
|
+
applyHeaderAndDynamicTypingAndTransformation() {
|
|
594
553
|
if (
|
|
595
|
-
!_results ||
|
|
596
|
-
!_results.data ||
|
|
597
|
-
(!_config.header && !_config.dynamicTyping && !_config.transform)
|
|
598
|
-
)
|
|
599
|
-
return _results;
|
|
554
|
+
!this._results ||
|
|
555
|
+
!this._results.data ||
|
|
556
|
+
(!this._config.header && !this._config.dynamicTyping && !this._config.transform)
|
|
557
|
+
) {
|
|
558
|
+
return this._results;
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
var incrementBy = 1;
|
|
562
|
+
if (!this._results.data[0] || Array.isArray(this._results.data[0])) {
|
|
563
|
+
this._results.data = this._results.data.map(this.processRow.bind(this));
|
|
564
|
+
incrementBy = this._results.data.length;
|
|
565
|
+
} else {
|
|
566
|
+
// @ts-expect-error
|
|
567
|
+
this._results.data = this.processRow(this._results.data, 0);
|
|
568
|
+
}
|
|
600
569
|
|
|
601
|
-
|
|
602
|
-
var row = _config.header ? {} : [];
|
|
570
|
+
if (this._config.header && this._results.meta) this._results.meta.fields = this._fields;
|
|
603
571
|
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
var value = rowSource[j];
|
|
572
|
+
this._rowCounter += incrementBy;
|
|
573
|
+
return this._results;
|
|
574
|
+
}
|
|
608
575
|
|
|
609
|
-
|
|
576
|
+
processRow(rowSource, i): any[] | Record<string, any> {
|
|
577
|
+
var row = this._config.header ? {} : [];
|
|
610
578
|
|
|
611
|
-
|
|
579
|
+
var j;
|
|
580
|
+
for (j = 0; j < rowSource.length; j++) {
|
|
581
|
+
var field = j;
|
|
582
|
+
var value = rowSource[j];
|
|
612
583
|
|
|
613
|
-
|
|
584
|
+
if (this._config.header)
|
|
585
|
+
field = j >= this._fields.length ? '__parsed_extra' : this._fields[j];
|
|
614
586
|
|
|
615
|
-
|
|
616
|
-
row[field] = row[field] || [];
|
|
617
|
-
row[field].push(value);
|
|
618
|
-
} else row[field] = value;
|
|
619
|
-
}
|
|
587
|
+
if (this._config.transform) value = this._config.transform(value, field);
|
|
620
588
|
|
|
621
|
-
|
|
622
|
-
if (j > _fields.length)
|
|
623
|
-
addError(
|
|
624
|
-
'FieldMismatch',
|
|
625
|
-
'TooManyFields',
|
|
626
|
-
'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j,
|
|
627
|
-
_rowCounter + i
|
|
628
|
-
);
|
|
629
|
-
else if (j < _fields.length)
|
|
630
|
-
addError(
|
|
631
|
-
'FieldMismatch',
|
|
632
|
-
'TooFewFields',
|
|
633
|
-
'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j,
|
|
634
|
-
_rowCounter + i
|
|
635
|
-
);
|
|
636
|
-
}
|
|
589
|
+
value = this.parseDynamic(field, value);
|
|
637
590
|
|
|
638
|
-
|
|
591
|
+
if (field === '__parsed_extra') {
|
|
592
|
+
row[field] = row[field] || [];
|
|
593
|
+
row[field].push(value);
|
|
594
|
+
} else row[field] = value;
|
|
639
595
|
}
|
|
640
596
|
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
597
|
+
if (this._config.header) {
|
|
598
|
+
if (j > this._fields.length)
|
|
599
|
+
this.addError(
|
|
600
|
+
'FieldMismatch',
|
|
601
|
+
'TooManyFields',
|
|
602
|
+
'Too many fields: expected ' + this._fields.length + ' fields but parsed ' + j,
|
|
603
|
+
this._rowCounter + i
|
|
604
|
+
);
|
|
605
|
+
else if (j < this._fields.length)
|
|
606
|
+
this.addError(
|
|
607
|
+
'FieldMismatch',
|
|
608
|
+
'TooFewFields',
|
|
609
|
+
'Too few fields: expected ' + this._fields.length + ' fields but parsed ' + j,
|
|
610
|
+
this._rowCounter + i
|
|
611
|
+
);
|
|
612
|
+
}
|
|
648
613
|
|
|
649
|
-
|
|
650
|
-
return _results;
|
|
614
|
+
return row;
|
|
651
615
|
}
|
|
652
616
|
|
|
653
|
-
|
|
617
|
+
guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
|
|
654
618
|
var bestDelim, bestDelta, fieldCountPrevRow;
|
|
655
619
|
|
|
656
620
|
delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
|
|
@@ -670,7 +634,7 @@ function ParserHandle(_config) {
|
|
|
670
634
|
}).parse(input);
|
|
671
635
|
|
|
672
636
|
for (var j = 0; j < preview.data.length; j++) {
|
|
673
|
-
if (skipEmptyLines && testEmptyLine(preview.data[j])) {
|
|
637
|
+
if (skipEmptyLines && this.testEmptyLine(preview.data[j])) {
|
|
674
638
|
emptyLinesCount++;
|
|
675
639
|
continue;
|
|
676
640
|
}
|
|
@@ -694,7 +658,7 @@ function ParserHandle(_config) {
|
|
|
694
658
|
}
|
|
695
659
|
}
|
|
696
660
|
|
|
697
|
-
_config.delimiter = bestDelim;
|
|
661
|
+
this._config.delimiter = bestDelim;
|
|
698
662
|
|
|
699
663
|
return {
|
|
700
664
|
successful: !!bestDelim,
|
|
@@ -702,36 +666,36 @@ function ParserHandle(_config) {
|
|
|
702
666
|
};
|
|
703
667
|
}
|
|
704
668
|
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
669
|
+
addError(type, code, msg, row?) {
|
|
670
|
+
this._results.errors.push({
|
|
671
|
+
type: type,
|
|
672
|
+
code: code,
|
|
673
|
+
message: msg,
|
|
674
|
+
row: row
|
|
675
|
+
});
|
|
676
|
+
}
|
|
677
|
+
}
|
|
710
678
|
|
|
711
|
-
|
|
679
|
+
function guessLineEndings(input, quoteChar) {
|
|
680
|
+
input = input.substr(0, 1024 * 1024); // max length 1 MB
|
|
681
|
+
// Replace all the text inside quotes
|
|
682
|
+
var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
|
|
683
|
+
input = input.replace(re, '');
|
|
712
684
|
|
|
713
|
-
|
|
685
|
+
var r = input.split('\r');
|
|
714
686
|
|
|
715
|
-
|
|
687
|
+
var n = input.split('\n');
|
|
716
688
|
|
|
717
|
-
|
|
689
|
+
var nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
|
|
718
690
|
|
|
719
|
-
|
|
720
|
-
for (var i = 0; i < r.length; i++) {
|
|
721
|
-
if (r[i][0] === '\n') numWithN++;
|
|
722
|
-
}
|
|
691
|
+
if (r.length === 1 || nAppearsFirst) return '\n';
|
|
723
692
|
|
|
724
|
-
|
|
693
|
+
var numWithN = 0;
|
|
694
|
+
for (var i = 0; i < r.length; i++) {
|
|
695
|
+
if (r[i][0] === '\n') numWithN++;
|
|
725
696
|
}
|
|
726
697
|
|
|
727
|
-
|
|
728
|
-
_results.errors.push({
|
|
729
|
-
type: type,
|
|
730
|
-
code: code,
|
|
731
|
-
message: msg,
|
|
732
|
-
row: row
|
|
733
|
-
});
|
|
734
|
-
}
|
|
698
|
+
return numWithN >= r.length / 2 ? '\r\n' : '\r';
|
|
735
699
|
}
|
|
736
700
|
|
|
737
701
|
/** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
|
|
@@ -777,6 +741,7 @@ function Parser(config) {
|
|
|
777
741
|
var cursor = 0;
|
|
778
742
|
var aborted = false;
|
|
779
743
|
|
|
744
|
+
// @ts-expect-error
|
|
780
745
|
this.parse = function (input, baseIndex, ignoreLastRow) {
|
|
781
746
|
// For some reason, in Chrome, this speeds things up (!?)
|
|
782
747
|
if (typeof input !== 'string') throw new Error('Input must be a string');
|
|
@@ -791,17 +756,17 @@ function Parser(config) {
|
|
|
791
756
|
|
|
792
757
|
// Establish starting state
|
|
793
758
|
cursor = 0;
|
|
794
|
-
var data = [],
|
|
795
|
-
errors = [],
|
|
796
|
-
row = [],
|
|
797
|
-
lastCursor = 0;
|
|
759
|
+
var data: any[][] | Record<string, any> = [],
|
|
760
|
+
errors: any[] = [],
|
|
761
|
+
row: any[] | Record<string, any> = [],
|
|
762
|
+
lastCursor: number = 0;
|
|
798
763
|
|
|
799
764
|
if (!input) return returnable();
|
|
800
765
|
|
|
801
766
|
if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1)) {
|
|
802
767
|
var rows = input.split(newline);
|
|
803
768
|
for (var i = 0; i < rows.length; i++) {
|
|
804
|
-
row = rows[i];
|
|
769
|
+
const row = rows[i];
|
|
805
770
|
cursor += row.length;
|
|
806
771
|
if (i !== rows.length - 1) cursor += newline.length;
|
|
807
772
|
else if (ignoreLastRow) return returnable();
|
|
@@ -1002,7 +967,7 @@ function Parser(config) {
|
|
|
1002
967
|
* Appends the remaining input from cursor to the end into
|
|
1003
968
|
* row, saves the row, calls step, and returns the results.
|
|
1004
969
|
*/
|
|
1005
|
-
function finish(value) {
|
|
970
|
+
function finish(value?: any) {
|
|
1006
971
|
if (ignoreLastRow) return returnable();
|
|
1007
972
|
if (typeof value === 'undefined') value = input.substr(cursor);
|
|
1008
973
|
row.push(value);
|
|
@@ -1026,7 +991,7 @@ function Parser(config) {
|
|
|
1026
991
|
}
|
|
1027
992
|
|
|
1028
993
|
/** Returns an object with the results, errors, and meta. */
|
|
1029
|
-
function returnable(stopped, step) {
|
|
994
|
+
function returnable(stopped?: boolean, step?) {
|
|
1030
995
|
var isStep = step || false;
|
|
1031
996
|
return {
|
|
1032
997
|
data: isStep ? data[0] : data,
|
|
@@ -1050,20 +1015,18 @@ function Parser(config) {
|
|
|
1050
1015
|
};
|
|
1051
1016
|
|
|
1052
1017
|
/** Sets the abort flag */
|
|
1018
|
+
// @ts-expect-error
|
|
1053
1019
|
this.abort = function () {
|
|
1054
1020
|
aborted = true;
|
|
1055
1021
|
};
|
|
1056
1022
|
|
|
1057
1023
|
/** Gets the cursor position */
|
|
1024
|
+
// @ts-expect-error
|
|
1058
1025
|
this.getCharIndex = function () {
|
|
1059
1026
|
return cursor;
|
|
1060
1027
|
};
|
|
1061
1028
|
}
|
|
1062
1029
|
|
|
1063
|
-
function notImplemented() {
|
|
1064
|
-
throw new Error('Not implemented.');
|
|
1065
|
-
}
|
|
1066
|
-
|
|
1067
1030
|
/** Makes a deep copy of an array or object (mostly) */
|
|
1068
1031
|
function copy(obj) {
|
|
1069
1032
|
if (typeof obj !== 'object' || obj === null) return obj;
|
|
@@ -1072,6 +1035,31 @@ function copy(obj) {
|
|
|
1072
1035
|
return cpy;
|
|
1073
1036
|
}
|
|
1074
1037
|
|
|
1075
|
-
function isFunction(func) {
|
|
1038
|
+
function isFunction(func: unknown): func is Function {
|
|
1076
1039
|
return typeof func === 'function';
|
|
1077
1040
|
}
|
|
1041
|
+
|
|
1042
|
+
const Papa = {
|
|
1043
|
+
parse: CsvToJson,
|
|
1044
|
+
unparse: JsonToCsv,
|
|
1045
|
+
|
|
1046
|
+
RECORD_SEP: String.fromCharCode(30),
|
|
1047
|
+
UNIT_SEP: String.fromCharCode(31),
|
|
1048
|
+
BYTE_ORDER_MARK,
|
|
1049
|
+
BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
|
|
1050
|
+
WORKERS_SUPPORTED: false, // !IS_WORKER && !!globalThis.Worker
|
|
1051
|
+
NODE_STREAM_INPUT: 1,
|
|
1052
|
+
|
|
1053
|
+
// Configurable chunk sizes for local and remote files, respectively
|
|
1054
|
+
LocalChunkSize: 1024 * 1024 * 10, // 10 M,
|
|
1055
|
+
RemoteChunkSize: 1024 * 1024 * 5, // 5 M,
|
|
1056
|
+
DefaultDelimiter: ',', // Used if not specified and detection fail,
|
|
1057
|
+
|
|
1058
|
+
// Exposed for testing and development only
|
|
1059
|
+
Parser: Parser,
|
|
1060
|
+
ParserHandle: ParserHandle,
|
|
1061
|
+
|
|
1062
|
+
// BEGIN FORK
|
|
1063
|
+
ChunkStreamer: ChunkStreamer
|
|
1064
|
+
};
|
|
1065
|
+
export default Papa;
|