@loaders.gl/csv 4.3.0-alpha.8 → 4.3.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/csv-loader.d.ts.map +1 -1
- package/dist/csv-loader.js +1 -2
- package/dist/csv-writer.d.ts +0 -2
- package/dist/csv-writer.d.ts.map +1 -1
- package/dist/dist.dev.js +304 -311
- package/dist/dist.min.js +12 -12
- package/dist/index.cjs +285 -292
- package/dist/index.cjs.map +3 -3
- package/dist/papaparse/async-iterator-streamer.d.ts +28 -4
- package/dist/papaparse/async-iterator-streamer.d.ts.map +1 -1
- package/dist/papaparse/async-iterator-streamer.js +11 -12
- package/dist/papaparse/papaparse.d.ts +108 -12
- package/dist/papaparse/papaparse.d.ts.map +1 -1
- package/dist/papaparse/papaparse.js +274 -314
- package/package.json +4 -4
- package/src/csv-loader.ts +0 -1
- package/src/csv-writer.ts +0 -2
- package/src/papaparse/async-iterator-streamer.ts +11 -14
- package/src/papaparse/papaparse.ts +330 -342
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
// @ts-nocheck
|
|
2
1
|
// This is a fork of papaparse
|
|
3
2
|
// https://github.com/mholt/PapaParse
|
|
4
3
|
/* @license
|
|
@@ -13,43 +12,14 @@ License: MIT
|
|
|
13
12
|
// - Remove non Async Iterator streamers (can all be handled by new streamer)
|
|
14
13
|
// - Remove unused Worker support (loaders.gl worker system used instead)
|
|
15
14
|
// - Remove unused jQuery plugin support
|
|
15
|
+
// const defaultConfig: Required<CSVParserConfig> = {
|
|
16
|
+
// dynamicTyping: false,
|
|
17
|
+
// dynamicTypingFunction: undefined!,
|
|
18
|
+
// transform: false
|
|
19
|
+
// };
|
|
16
20
|
/* eslint-disable */
|
|
17
21
|
const BYTE_ORDER_MARK = '\ufeff';
|
|
18
|
-
|
|
19
|
-
parse: CsvToJson,
|
|
20
|
-
unparse: JsonToCsv,
|
|
21
|
-
RECORD_SEP: String.fromCharCode(30),
|
|
22
|
-
UNIT_SEP: String.fromCharCode(31),
|
|
23
|
-
BYTE_ORDER_MARK,
|
|
24
|
-
BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
|
|
25
|
-
WORKERS_SUPPORTED: false, // !IS_WORKER && !!globalThis.Worker
|
|
26
|
-
NODE_STREAM_INPUT: 1,
|
|
27
|
-
// Configurable chunk sizes for local and remote files, respectively
|
|
28
|
-
LocalChunkSize: 1024 * 1024 * 10, // 10 M,
|
|
29
|
-
RemoteChunkSize: 1024 * 1024 * 5, // 5 M,
|
|
30
|
-
DefaultDelimiter: ',', // Used if not specified and detection fail,
|
|
31
|
-
// Exposed for testing and development only
|
|
32
|
-
Parser: Parser,
|
|
33
|
-
ParserHandle: ParserHandle,
|
|
34
|
-
// BEGIN FORK
|
|
35
|
-
ChunkStreamer: ChunkStreamer,
|
|
36
|
-
StringStreamer: StringStreamer
|
|
37
|
-
};
|
|
38
|
-
export default Papa;
|
|
39
|
-
/*
|
|
40
|
-
Papa.NetworkStreamer = NetworkStreamer;
|
|
41
|
-
Papa.FileStreamer = FileStreamer;
|
|
42
|
-
Papa.ReadableStreamStreamer = ReadableStreamStreamer;
|
|
43
|
-
if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
|
|
44
|
-
Papa.DuplexStreamStreamer = DuplexStreamStreamer;
|
|
45
|
-
}
|
|
46
|
-
*/
|
|
47
|
-
// END FORK
|
|
48
|
-
// BEGIN FORK
|
|
49
|
-
// Adds an argument to papa.parse
|
|
50
|
-
// function CsvToJson(_input, _config)
|
|
51
|
-
function CsvToJson(_input, _config, UserDefinedStreamer // BEGIN FORK
|
|
52
|
-
) {
|
|
22
|
+
function CsvToJson(_input, _config = {}, Streamer = StringStreamer) {
|
|
53
23
|
_config = _config || {};
|
|
54
24
|
var dynamicTyping = _config.dynamicTyping || false;
|
|
55
25
|
if (isFunction(dynamicTyping)) {
|
|
@@ -59,50 +29,7 @@ function CsvToJson(_input, _config, UserDefinedStreamer // BEGIN FORK
|
|
|
59
29
|
}
|
|
60
30
|
_config.dynamicTyping = dynamicTyping;
|
|
61
31
|
_config.transform = isFunction(_config.transform) ? _config.transform : false;
|
|
62
|
-
|
|
63
|
-
var w = newWorker();
|
|
64
|
-
w.userStep = _config.step;
|
|
65
|
-
w.userChunk = _config.chunk;
|
|
66
|
-
w.userComplete = _config.complete;
|
|
67
|
-
w.userError = _config.error;
|
|
68
|
-
_config.step = isFunction(_config.step);
|
|
69
|
-
_config.chunk = isFunction(_config.chunk);
|
|
70
|
-
_config.complete = isFunction(_config.complete);
|
|
71
|
-
_config.error = isFunction(_config.error);
|
|
72
|
-
delete _config.worker; // prevent infinite loop
|
|
73
|
-
w.postMessage({
|
|
74
|
-
input: _input,
|
|
75
|
-
config: _config,
|
|
76
|
-
workerId: w.id
|
|
77
|
-
});
|
|
78
|
-
return;
|
|
79
|
-
}
|
|
80
|
-
var streamer = null;
|
|
81
|
-
/*
|
|
82
|
-
if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined') {
|
|
83
|
-
// create a node Duplex stream for use
|
|
84
|
-
// with .pipe
|
|
85
|
-
streamer = new DuplexStreamStreamer(_config);
|
|
86
|
-
return streamer.getStream();
|
|
87
|
-
} else
|
|
88
|
-
*/
|
|
89
|
-
if (typeof _input === 'string') {
|
|
90
|
-
// if (_config.download) streamer = new NetworkStreamer(_config);
|
|
91
|
-
// else
|
|
92
|
-
streamer = new StringStreamer(_config);
|
|
93
|
-
}
|
|
94
|
-
/*
|
|
95
|
-
else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on)) {
|
|
96
|
-
streamer = new ReadableStreamStreamer(_config);
|
|
97
|
-
} else if ((globalThis.File && _input instanceof File) || _input instanceof Object)
|
|
98
|
-
// ...Safari. (see issue #106)
|
|
99
|
-
streamer = new FileStreamer(_config);
|
|
100
|
-
*/
|
|
101
|
-
// BEGIN FORK
|
|
102
|
-
if (!streamer) {
|
|
103
|
-
streamer = new UserDefinedStreamer(_config);
|
|
104
|
-
}
|
|
105
|
-
// END FORK
|
|
32
|
+
var streamer = new Streamer(_config);
|
|
106
33
|
return streamer.stream(_input);
|
|
107
34
|
}
|
|
108
35
|
function JsonToCsv(_input, _config) {
|
|
@@ -131,7 +58,7 @@ function JsonToCsv(_input, _config) {
|
|
|
131
58
|
if (!_input.length || Array.isArray(_input[0]))
|
|
132
59
|
return serialize(null, _input, _skipEmptyLines);
|
|
133
60
|
else if (typeof _input[0] === 'object')
|
|
134
|
-
return serialize(_columns ||
|
|
61
|
+
return serialize(_columns || Object.keys(_input[0]), _input, _skipEmptyLines);
|
|
135
62
|
}
|
|
136
63
|
else if (typeof _input === 'object') {
|
|
137
64
|
if (typeof _input.data === 'string')
|
|
@@ -140,7 +67,7 @@ function JsonToCsv(_input, _config) {
|
|
|
140
67
|
if (!_input.fields)
|
|
141
68
|
_input.fields = _input.meta && _input.meta.fields;
|
|
142
69
|
if (!_input.fields)
|
|
143
|
-
_input.fields = Array.isArray(_input.data[0]) ? _input.fields :
|
|
70
|
+
_input.fields = Array.isArray(_input.data[0]) ? _input.fields : Object.keys(_input.data[0]);
|
|
144
71
|
if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object')
|
|
145
72
|
_input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
|
|
146
73
|
}
|
|
@@ -176,15 +103,6 @@ function JsonToCsv(_input, _config) {
|
|
|
176
103
|
_escapedQuote = _config.escapeChar + _quoteChar;
|
|
177
104
|
}
|
|
178
105
|
}
|
|
179
|
-
/** Turns an object's keys into an array */
|
|
180
|
-
function objectKeys(obj) {
|
|
181
|
-
if (typeof obj !== 'object')
|
|
182
|
-
return [];
|
|
183
|
-
var keys = [];
|
|
184
|
-
for (var key in obj)
|
|
185
|
-
keys.push(key);
|
|
186
|
-
return keys;
|
|
187
|
-
}
|
|
188
106
|
/** The double for loop that iterates the data and writes out a CSV string including header row */
|
|
189
107
|
function serialize(fields, data, skipEmptyLines) {
|
|
190
108
|
var csv = '';
|
|
@@ -260,24 +178,35 @@ function JsonToCsv(_input, _config) {
|
|
|
260
178
|
}
|
|
261
179
|
}
|
|
262
180
|
/** ChunkStreamer is the base prototype for various streamer implementations. */
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
181
|
+
class ChunkStreamer {
|
|
182
|
+
_handle;
|
|
183
|
+
_config;
|
|
184
|
+
_finished = false;
|
|
185
|
+
_completed = false;
|
|
186
|
+
_input = null;
|
|
187
|
+
_baseIndex = 0;
|
|
188
|
+
_partialLine = '';
|
|
189
|
+
_rowCount = 0;
|
|
190
|
+
_start = 0;
|
|
191
|
+
isFirstChunk = true;
|
|
192
|
+
_completeResults = {
|
|
275
193
|
data: [],
|
|
276
194
|
errors: [],
|
|
277
195
|
meta: {}
|
|
278
196
|
};
|
|
279
|
-
|
|
280
|
-
|
|
197
|
+
constructor(config) {
|
|
198
|
+
// Deep-copy the config so we can edit it
|
|
199
|
+
var configCopy = { ...config };
|
|
200
|
+
// @ts-expect-error
|
|
201
|
+
configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
|
|
202
|
+
if (!config.step && !config.chunk) {
|
|
203
|
+
configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
|
|
204
|
+
}
|
|
205
|
+
this._handle = new ParserHandle(configCopy);
|
|
206
|
+
this._handle.streamer = this;
|
|
207
|
+
this._config = configCopy; // persist the copy to the caller
|
|
208
|
+
}
|
|
209
|
+
parseChunk(chunk, isFakeChunk) {
|
|
281
210
|
// First chunk pre-processing
|
|
282
211
|
if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
|
|
283
212
|
var modifiedChunk = this._config.beforeFirstChunk(chunk);
|
|
@@ -304,6 +233,7 @@ function ChunkStreamer(config) {
|
|
|
304
233
|
if (this._handle.paused() || this._handle.aborted())
|
|
305
234
|
return;
|
|
306
235
|
results = undefined;
|
|
236
|
+
// @ts-expect-error
|
|
307
237
|
this._completeResults = undefined;
|
|
308
238
|
}
|
|
309
239
|
if (!this._config.step && !this._config.chunk) {
|
|
@@ -318,189 +248,196 @@ function ChunkStreamer(config) {
|
|
|
318
248
|
this._config.complete(this._completeResults, this._input);
|
|
319
249
|
this._completed = true;
|
|
320
250
|
}
|
|
321
|
-
if (!finishedIncludingPreview && (!results || !results.meta.paused))
|
|
322
|
-
this._nextChunk();
|
|
251
|
+
// if (!finishedIncludingPreview && (!results || !results.meta.paused)) this._nextChunk();
|
|
323
252
|
return results;
|
|
324
|
-
}
|
|
325
|
-
|
|
253
|
+
}
|
|
254
|
+
_sendError(error) {
|
|
326
255
|
if (isFunction(this._config.error))
|
|
327
256
|
this._config.error(error);
|
|
328
|
-
};
|
|
329
|
-
function replaceConfig(config) {
|
|
330
|
-
// Deep-copy the config so we can edit it
|
|
331
|
-
var configCopy = copy(config);
|
|
332
|
-
configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
|
|
333
|
-
if (!config.step && !config.chunk)
|
|
334
|
-
configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
|
|
335
|
-
this._handle = new ParserHandle(configCopy);
|
|
336
|
-
this._handle.streamer = this;
|
|
337
|
-
this._config = configCopy; // persist the copy to the caller
|
|
338
257
|
}
|
|
339
258
|
}
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
259
|
+
class StringStreamer extends ChunkStreamer {
|
|
260
|
+
remaining;
|
|
261
|
+
constructor(config = {}) {
|
|
262
|
+
super(config);
|
|
263
|
+
}
|
|
264
|
+
stream(s) {
|
|
265
|
+
this.remaining = s;
|
|
346
266
|
return this._nextChunk();
|
|
347
|
-
}
|
|
348
|
-
|
|
267
|
+
}
|
|
268
|
+
_nextChunk() {
|
|
349
269
|
if (this._finished)
|
|
350
270
|
return;
|
|
351
271
|
var size = this._config.chunkSize;
|
|
352
|
-
var chunk = size ? remaining.substr(0, size) : remaining;
|
|
353
|
-
remaining = size ? remaining.substr(size) : '';
|
|
354
|
-
this._finished = !remaining;
|
|
272
|
+
var chunk = size ? this.remaining.substr(0, size) : this.remaining;
|
|
273
|
+
this.remaining = size ? this.remaining.substr(size) : '';
|
|
274
|
+
this._finished = !this.remaining;
|
|
355
275
|
return this.parseChunk(chunk);
|
|
356
|
-
}
|
|
276
|
+
}
|
|
357
277
|
}
|
|
358
|
-
|
|
359
|
-
|
|
278
|
+
const FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
|
|
279
|
+
const ISO_DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
|
|
360
280
|
// Use one ParserHandle per entire CSV file or string
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
281
|
+
class ParserHandle {
|
|
282
|
+
_config;
|
|
283
|
+
/** Number of times step was called (number of rows parsed) */
|
|
284
|
+
_stepCounter = 0;
|
|
285
|
+
/** Number of rows that have been parsed so far */
|
|
286
|
+
_rowCounter = 0;
|
|
287
|
+
/** The input being parsed */
|
|
288
|
+
_input;
|
|
289
|
+
/** The core parser being used */
|
|
290
|
+
_parser;
|
|
291
|
+
/** Whether we are paused or not */
|
|
292
|
+
_paused = false;
|
|
293
|
+
/** Whether the parser has aborted or not */
|
|
294
|
+
_aborted = false;
|
|
295
|
+
/** Temporary state between delimiter detection and processing results */
|
|
296
|
+
_delimiterError = false;
|
|
297
|
+
/** Fields are from the header row of the input, if there is one */
|
|
298
|
+
_fields = [];
|
|
299
|
+
/** The last results returned from the parser */
|
|
300
|
+
_results = {
|
|
376
301
|
data: [],
|
|
377
302
|
errors: [],
|
|
378
303
|
meta: {}
|
|
379
304
|
};
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
_config.step
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
//
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
305
|
+
constructor(_config) {
|
|
306
|
+
// One goal is to minimize the use of regular expressions...
|
|
307
|
+
if (isFunction(_config.step)) {
|
|
308
|
+
var userStep = _config.step;
|
|
309
|
+
_config.step = (results) => {
|
|
310
|
+
this._results = results;
|
|
311
|
+
if (this.needsHeaderRow()) {
|
|
312
|
+
this.processResults();
|
|
313
|
+
}
|
|
314
|
+
// only call user's step function after header row
|
|
315
|
+
else {
|
|
316
|
+
this.processResults();
|
|
317
|
+
// It's possbile that this line was empty and there's no row here after all
|
|
318
|
+
if (!this._results.data || this._results.data.length === 0)
|
|
319
|
+
return;
|
|
320
|
+
this._stepCounter += results.data.length;
|
|
321
|
+
if (_config.preview && this._stepCounter > _config.preview) {
|
|
322
|
+
this._parser.abort();
|
|
323
|
+
}
|
|
324
|
+
else {
|
|
325
|
+
userStep(this._results, this);
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
this._config = _config;
|
|
399
331
|
}
|
|
400
332
|
/**
|
|
401
333
|
* Parses input. Most users won't need, and shouldn't mess with, the baseIndex
|
|
402
334
|
* and ignoreLastRow parameters. They are used by streamers (wrapper functions)
|
|
403
335
|
* when an input comes in multiple chunks, like from a file.
|
|
404
336
|
*/
|
|
405
|
-
|
|
406
|
-
var quoteChar = _config.quoteChar || '"';
|
|
407
|
-
if (!_config.newline)
|
|
408
|
-
_config.newline = guessLineEndings(input, quoteChar);
|
|
409
|
-
_delimiterError = false;
|
|
410
|
-
if (!_config.delimiter) {
|
|
411
|
-
var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess);
|
|
412
|
-
if (delimGuess.successful)
|
|
413
|
-
_config.delimiter = delimGuess.bestDelimiter;
|
|
337
|
+
parse(input, baseIndex, ignoreLastRow) {
|
|
338
|
+
var quoteChar = this._config.quoteChar || '"';
|
|
339
|
+
if (!this._config.newline)
|
|
340
|
+
this._config.newline = guessLineEndings(input, quoteChar);
|
|
341
|
+
this._delimiterError = false;
|
|
342
|
+
if (!this._config.delimiter) {
|
|
343
|
+
var delimGuess = this.guessDelimiter(input, this._config.newline, this._config.skipEmptyLines, this._config.comments, this._config.delimitersToGuess);
|
|
344
|
+
if (delimGuess.successful) {
|
|
345
|
+
this._config.delimiter = delimGuess.bestDelimiter;
|
|
346
|
+
}
|
|
414
347
|
else {
|
|
415
|
-
_delimiterError = true; // add error after parsing (otherwise it would be overwritten)
|
|
416
|
-
_config.delimiter = Papa.DefaultDelimiter;
|
|
348
|
+
this._delimiterError = true; // add error after parsing (otherwise it would be overwritten)
|
|
349
|
+
this._config.delimiter = Papa.DefaultDelimiter;
|
|
417
350
|
}
|
|
418
|
-
_results.meta.delimiter = _config.delimiter;
|
|
351
|
+
this._results.meta.delimiter = this._config.delimiter;
|
|
419
352
|
}
|
|
420
|
-
else if (isFunction(_config.delimiter)) {
|
|
421
|
-
_config.delimiter = _config.delimiter(input);
|
|
422
|
-
_results.meta.delimiter = _config.delimiter;
|
|
353
|
+
else if (isFunction(this._config.delimiter)) {
|
|
354
|
+
this._config.delimiter = this._config.delimiter(input);
|
|
355
|
+
this._results.meta.delimiter = this._config.delimiter;
|
|
423
356
|
}
|
|
424
|
-
var parserConfig = copy(_config);
|
|
425
|
-
if (_config.preview && _config.header)
|
|
357
|
+
var parserConfig = copy(this._config);
|
|
358
|
+
if (this._config.preview && this._config.header)
|
|
426
359
|
parserConfig.preview++; // to compensate for header row
|
|
427
|
-
_input = input;
|
|
428
|
-
_parser = new Parser(parserConfig);
|
|
429
|
-
_results = _parser.parse(_input, baseIndex, ignoreLastRow);
|
|
430
|
-
processResults();
|
|
431
|
-
return _paused ? { meta: { paused: true } } : _results || { meta: { paused: false } };
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
return _paused;
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
_paused = true;
|
|
438
|
-
_parser.abort();
|
|
439
|
-
_input = _input.substr(_parser.getCharIndex());
|
|
440
|
-
}
|
|
441
|
-
|
|
442
|
-
_paused = false;
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
360
|
+
this._input = input;
|
|
361
|
+
this._parser = new Parser(parserConfig);
|
|
362
|
+
this._results = this._parser.parse(this._input, baseIndex, ignoreLastRow);
|
|
363
|
+
this.processResults();
|
|
364
|
+
return this._paused ? { meta: { paused: true } } : this._results || { meta: { paused: false } };
|
|
365
|
+
}
|
|
366
|
+
paused() {
|
|
367
|
+
return this._paused;
|
|
368
|
+
}
|
|
369
|
+
pause() {
|
|
370
|
+
this._paused = true;
|
|
371
|
+
this._parser.abort();
|
|
372
|
+
this._input = this._input.substr(this._parser.getCharIndex());
|
|
373
|
+
}
|
|
374
|
+
resume() {
|
|
375
|
+
this._paused = false;
|
|
376
|
+
// @ts-expect-error
|
|
377
|
+
this.streamer.parseChunk(this._input, true);
|
|
378
|
+
}
|
|
379
|
+
aborted() {
|
|
380
|
+
return this._aborted;
|
|
381
|
+
}
|
|
382
|
+
abort() {
|
|
383
|
+
this._aborted = true;
|
|
384
|
+
this._parser.abort();
|
|
385
|
+
this._results.meta.aborted = true;
|
|
386
|
+
if (isFunction(this._config.complete)) {
|
|
387
|
+
this._config.complete(this._results);
|
|
388
|
+
}
|
|
389
|
+
this._input = '';
|
|
390
|
+
}
|
|
391
|
+
testEmptyLine(s) {
|
|
392
|
+
return this._config.skipEmptyLines === 'greedy'
|
|
458
393
|
? s.join('').trim() === ''
|
|
459
394
|
: s.length === 1 && s[0].length === 0;
|
|
460
395
|
}
|
|
461
|
-
|
|
462
|
-
if (_results && _delimiterError) {
|
|
463
|
-
addError('Delimiter', 'UndetectableDelimiter', "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'");
|
|
464
|
-
_delimiterError = false;
|
|
396
|
+
processResults() {
|
|
397
|
+
if (this._results && this._delimiterError) {
|
|
398
|
+
this.addError('Delimiter', 'UndetectableDelimiter', "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'");
|
|
399
|
+
this._delimiterError = false;
|
|
465
400
|
}
|
|
466
|
-
if (_config.skipEmptyLines) {
|
|
467
|
-
for (var i = 0; i < _results.data.length; i++)
|
|
468
|
-
if (testEmptyLine(_results.data[i]))
|
|
469
|
-
_results.data.splice(i--, 1);
|
|
401
|
+
if (this._config.skipEmptyLines) {
|
|
402
|
+
for (var i = 0; i < this._results.data.length; i++)
|
|
403
|
+
if (this.testEmptyLine(this._results.data[i]))
|
|
404
|
+
this._results.data.splice(i--, 1);
|
|
470
405
|
}
|
|
471
|
-
if (needsHeaderRow())
|
|
472
|
-
fillHeaderFields();
|
|
473
|
-
|
|
406
|
+
if (this.needsHeaderRow()) {
|
|
407
|
+
this.fillHeaderFields();
|
|
408
|
+
}
|
|
409
|
+
return this.applyHeaderAndDynamicTypingAndTransformation();
|
|
474
410
|
}
|
|
475
|
-
|
|
476
|
-
return _config.header && _fields.length === 0;
|
|
411
|
+
needsHeaderRow() {
|
|
412
|
+
return this._config.header && this._fields.length === 0;
|
|
477
413
|
}
|
|
478
|
-
|
|
479
|
-
if (!_results)
|
|
414
|
+
fillHeaderFields() {
|
|
415
|
+
if (!this._results)
|
|
480
416
|
return;
|
|
481
|
-
|
|
482
|
-
if (isFunction(_config.transformHeader))
|
|
483
|
-
header = _config.transformHeader(header);
|
|
484
|
-
_fields.push(header);
|
|
485
|
-
}
|
|
486
|
-
if (Array.isArray(_results.data[0])) {
|
|
487
|
-
for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
|
|
488
|
-
_results.data[i].forEach(addHeder);
|
|
489
|
-
_results.data.splice(0, 1);
|
|
417
|
+
const addHeder = (header) => {
|
|
418
|
+
if (isFunction(this._config.transformHeader))
|
|
419
|
+
header = this._config.transformHeader(header);
|
|
420
|
+
this._fields.push(header);
|
|
421
|
+
};
|
|
422
|
+
if (Array.isArray(this._results.data[0])) {
|
|
423
|
+
for (var i = 0; this.needsHeaderRow() && i < this._results.data.length; i++)
|
|
424
|
+
this._results.data[i].forEach(addHeder);
|
|
425
|
+
this._results.data.splice(0, 1);
|
|
490
426
|
}
|
|
491
427
|
// if _results.data[0] is not an array, we are in a step where _results.data is the row.
|
|
492
|
-
else
|
|
493
|
-
_results.data.forEach(addHeder);
|
|
428
|
+
else {
|
|
429
|
+
this._results.data.forEach(addHeder);
|
|
430
|
+
}
|
|
494
431
|
}
|
|
495
|
-
|
|
432
|
+
shouldApplyDynamicTyping(field) {
|
|
496
433
|
// Cache function values to avoid calling it for each row
|
|
497
|
-
if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
|
|
498
|
-
_config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
|
|
434
|
+
if (this._config.dynamicTypingFunction && this._config.dynamicTyping[field] === undefined) {
|
|
435
|
+
this._config.dynamicTyping[field] = this._config.dynamicTypingFunction(field);
|
|
499
436
|
}
|
|
500
|
-
return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
|
|
437
|
+
return (this._config.dynamicTyping[field] || this._config.dynamicTyping) === true;
|
|
501
438
|
}
|
|
502
|
-
|
|
503
|
-
if (shouldApplyDynamicTyping(field)) {
|
|
439
|
+
parseDynamic(field, value) {
|
|
440
|
+
if (this.shouldApplyDynamicTyping(field)) {
|
|
504
441
|
if (value === 'true' || value === 'TRUE')
|
|
505
442
|
return true;
|
|
506
443
|
else if (value === 'false' || value === 'FALSE')
|
|
@@ -514,50 +451,53 @@ function ParserHandle(_config) {
|
|
|
514
451
|
}
|
|
515
452
|
return value;
|
|
516
453
|
}
|
|
517
|
-
|
|
518
|
-
if (!_results ||
|
|
519
|
-
!_results.data ||
|
|
520
|
-
(!_config.header && !_config.dynamicTyping && !_config.transform))
|
|
521
|
-
return _results;
|
|
522
|
-
function processRow(rowSource, i) {
|
|
523
|
-
var row = _config.header ? {} : [];
|
|
524
|
-
var j;
|
|
525
|
-
for (j = 0; j < rowSource.length; j++) {
|
|
526
|
-
var field = j;
|
|
527
|
-
var value = rowSource[j];
|
|
528
|
-
if (_config.header)
|
|
529
|
-
field = j >= _fields.length ? '__parsed_extra' : _fields[j];
|
|
530
|
-
if (_config.transform)
|
|
531
|
-
value = _config.transform(value, field);
|
|
532
|
-
value = parseDynamic(field, value);
|
|
533
|
-
if (field === '__parsed_extra') {
|
|
534
|
-
row[field] = row[field] || [];
|
|
535
|
-
row[field].push(value);
|
|
536
|
-
}
|
|
537
|
-
else
|
|
538
|
-
row[field] = value;
|
|
539
|
-
}
|
|
540
|
-
if (_config.header) {
|
|
541
|
-
if (j > _fields.length)
|
|
542
|
-
addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
|
|
543
|
-
else if (j < _fields.length)
|
|
544
|
-
addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
|
|
545
|
-
}
|
|
546
|
-
return row;
|
|
454
|
+
applyHeaderAndDynamicTypingAndTransformation() {
|
|
455
|
+
if (!this._results ||
|
|
456
|
+
!this._results.data ||
|
|
457
|
+
(!this._config.header && !this._config.dynamicTyping && !this._config.transform)) {
|
|
458
|
+
return this._results;
|
|
547
459
|
}
|
|
548
460
|
var incrementBy = 1;
|
|
549
|
-
if (!_results.data[0] || Array.isArray(_results.data[0])) {
|
|
550
|
-
_results.data = _results.data.map(processRow);
|
|
551
|
-
incrementBy = _results.data.length;
|
|
552
|
-
}
|
|
553
|
-
else
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
461
|
+
if (!this._results.data[0] || Array.isArray(this._results.data[0])) {
|
|
462
|
+
this._results.data = this._results.data.map(this.processRow.bind(this));
|
|
463
|
+
incrementBy = this._results.data.length;
|
|
464
|
+
}
|
|
465
|
+
else {
|
|
466
|
+
// @ts-expect-error
|
|
467
|
+
this._results.data = this.processRow(this._results.data, 0);
|
|
468
|
+
}
|
|
469
|
+
if (this._config.header && this._results.meta)
|
|
470
|
+
this._results.meta.fields = this._fields;
|
|
471
|
+
this._rowCounter += incrementBy;
|
|
472
|
+
return this._results;
|
|
473
|
+
}
|
|
474
|
+
processRow(rowSource, i) {
|
|
475
|
+
var row = this._config.header ? {} : [];
|
|
476
|
+
var j;
|
|
477
|
+
for (j = 0; j < rowSource.length; j++) {
|
|
478
|
+
var field = j;
|
|
479
|
+
var value = rowSource[j];
|
|
480
|
+
if (this._config.header)
|
|
481
|
+
field = j >= this._fields.length ? '__parsed_extra' : this._fields[j];
|
|
482
|
+
if (this._config.transform)
|
|
483
|
+
value = this._config.transform(value, field);
|
|
484
|
+
value = this.parseDynamic(field, value);
|
|
485
|
+
if (field === '__parsed_extra') {
|
|
486
|
+
row[field] = row[field] || [];
|
|
487
|
+
row[field].push(value);
|
|
488
|
+
}
|
|
489
|
+
else
|
|
490
|
+
row[field] = value;
|
|
491
|
+
}
|
|
492
|
+
if (this._config.header) {
|
|
493
|
+
if (j > this._fields.length)
|
|
494
|
+
this.addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + this._fields.length + ' fields but parsed ' + j, this._rowCounter + i);
|
|
495
|
+
else if (j < this._fields.length)
|
|
496
|
+
this.addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + this._fields.length + ' fields but parsed ' + j, this._rowCounter + i);
|
|
497
|
+
}
|
|
498
|
+
return row;
|
|
499
|
+
}
|
|
500
|
+
guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
|
|
561
501
|
var bestDelim, bestDelta, fieldCountPrevRow;
|
|
562
502
|
delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
|
|
563
503
|
for (var i = 0; i < delimitersToGuess.length; i++) {
|
|
@@ -571,7 +511,7 @@ function ParserHandle(_config) {
|
|
|
571
511
|
preview: 10
|
|
572
512
|
}).parse(input);
|
|
573
513
|
for (var j = 0; j < preview.data.length; j++) {
|
|
574
|
-
if (skipEmptyLines && testEmptyLine(preview.data[j])) {
|
|
514
|
+
if (skipEmptyLines && this.testEmptyLine(preview.data[j])) {
|
|
575
515
|
emptyLinesCount++;
|
|
576
516
|
continue;
|
|
577
517
|
}
|
|
@@ -593,31 +533,14 @@ function ParserHandle(_config) {
|
|
|
593
533
|
bestDelim = delim;
|
|
594
534
|
}
|
|
595
535
|
}
|
|
596
|
-
_config.delimiter = bestDelim;
|
|
536
|
+
this._config.delimiter = bestDelim;
|
|
597
537
|
return {
|
|
598
538
|
successful: !!bestDelim,
|
|
599
539
|
bestDelimiter: bestDelim
|
|
600
540
|
};
|
|
601
541
|
}
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
// Replace all the text inside quotes
|
|
605
|
-
var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
|
|
606
|
-
input = input.replace(re, '');
|
|
607
|
-
var r = input.split('\r');
|
|
608
|
-
var n = input.split('\n');
|
|
609
|
-
var nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
|
|
610
|
-
if (r.length === 1 || nAppearsFirst)
|
|
611
|
-
return '\n';
|
|
612
|
-
var numWithN = 0;
|
|
613
|
-
for (var i = 0; i < r.length; i++) {
|
|
614
|
-
if (r[i][0] === '\n')
|
|
615
|
-
numWithN++;
|
|
616
|
-
}
|
|
617
|
-
return numWithN >= r.length / 2 ? '\r\n' : '\r';
|
|
618
|
-
}
|
|
619
|
-
function addError(type, code, msg, row) {
|
|
620
|
-
_results.errors.push({
|
|
542
|
+
addError(type, code, msg, row) {
|
|
543
|
+
this._results.errors.push({
|
|
621
544
|
type: type,
|
|
622
545
|
code: code,
|
|
623
546
|
message: msg,
|
|
@@ -625,6 +548,23 @@ function ParserHandle(_config) {
|
|
|
625
548
|
});
|
|
626
549
|
}
|
|
627
550
|
}
|
|
551
|
+
function guessLineEndings(input, quoteChar) {
|
|
552
|
+
input = input.substr(0, 1024 * 1024); // max length 1 MB
|
|
553
|
+
// Replace all the text inside quotes
|
|
554
|
+
var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
|
|
555
|
+
input = input.replace(re, '');
|
|
556
|
+
var r = input.split('\r');
|
|
557
|
+
var n = input.split('\n');
|
|
558
|
+
var nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
|
|
559
|
+
if (r.length === 1 || nAppearsFirst)
|
|
560
|
+
return '\n';
|
|
561
|
+
var numWithN = 0;
|
|
562
|
+
for (var i = 0; i < r.length; i++) {
|
|
563
|
+
if (r[i][0] === '\n')
|
|
564
|
+
numWithN++;
|
|
565
|
+
}
|
|
566
|
+
return numWithN >= r.length / 2 ? '\r\n' : '\r';
|
|
567
|
+
}
|
|
628
568
|
/** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
|
|
629
569
|
function escapeRegExp(string) {
|
|
630
570
|
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
|
|
@@ -667,6 +607,7 @@ function Parser(config) {
|
|
|
667
607
|
// We're gonna need these at the Parser scope
|
|
668
608
|
var cursor = 0;
|
|
669
609
|
var aborted = false;
|
|
610
|
+
// @ts-expect-error
|
|
670
611
|
this.parse = function (input, baseIndex, ignoreLastRow) {
|
|
671
612
|
// For some reason, in Chrome, this speeds things up (!?)
|
|
672
613
|
if (typeof input !== 'string')
|
|
@@ -683,7 +624,7 @@ function Parser(config) {
|
|
|
683
624
|
if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1)) {
|
|
684
625
|
var rows = input.split(newline);
|
|
685
626
|
for (var i = 0; i < rows.length; i++) {
|
|
686
|
-
row = rows[i];
|
|
627
|
+
const row = rows[i];
|
|
687
628
|
cursor += row.length;
|
|
688
629
|
if (i !== rows.length - 1)
|
|
689
630
|
cursor += newline.length;
|
|
@@ -908,17 +849,16 @@ function Parser(config) {
|
|
|
908
849
|
}
|
|
909
850
|
};
|
|
910
851
|
/** Sets the abort flag */
|
|
852
|
+
// @ts-expect-error
|
|
911
853
|
this.abort = function () {
|
|
912
854
|
aborted = true;
|
|
913
855
|
};
|
|
914
856
|
/** Gets the cursor position */
|
|
857
|
+
// @ts-expect-error
|
|
915
858
|
this.getCharIndex = function () {
|
|
916
859
|
return cursor;
|
|
917
860
|
};
|
|
918
861
|
}
|
|
919
|
-
function notImplemented() {
|
|
920
|
-
throw new Error('Not implemented.');
|
|
921
|
-
}
|
|
922
862
|
/** Makes a deep copy of an array or object (mostly) */
|
|
923
863
|
function copy(obj) {
|
|
924
864
|
if (typeof obj !== 'object' || obj === null)
|
|
@@ -931,3 +871,23 @@ function copy(obj) {
|
|
|
931
871
|
function isFunction(func) {
|
|
932
872
|
return typeof func === 'function';
|
|
933
873
|
}
|
|
874
|
+
const Papa = {
|
|
875
|
+
parse: CsvToJson,
|
|
876
|
+
unparse: JsonToCsv,
|
|
877
|
+
RECORD_SEP: String.fromCharCode(30),
|
|
878
|
+
UNIT_SEP: String.fromCharCode(31),
|
|
879
|
+
BYTE_ORDER_MARK,
|
|
880
|
+
BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
|
|
881
|
+
WORKERS_SUPPORTED: false, // !IS_WORKER && !!globalThis.Worker
|
|
882
|
+
NODE_STREAM_INPUT: 1,
|
|
883
|
+
// Configurable chunk sizes for local and remote files, respectively
|
|
884
|
+
LocalChunkSize: 1024 * 1024 * 10, // 10 M,
|
|
885
|
+
RemoteChunkSize: 1024 * 1024 * 5, // 5 M,
|
|
886
|
+
DefaultDelimiter: ',', // Used if not specified and detection fail,
|
|
887
|
+
// Exposed for testing and development only
|
|
888
|
+
Parser: Parser,
|
|
889
|
+
ParserHandle: ParserHandle,
|
|
890
|
+
// BEGIN FORK
|
|
891
|
+
ChunkStreamer: ChunkStreamer
|
|
892
|
+
};
|
|
893
|
+
export default Papa;
|