@loaders.gl/csv 3.1.0-alpha.4 → 4.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/{esm/bundle.js → bundle.js} +0 -0
  2. package/dist/bundle.js.map +1 -0
  3. package/dist/{esm/csv-loader.js → csv-loader.js} +3 -3
  4. package/dist/csv-loader.js.map +1 -0
  5. package/dist/{esm/csv-writer.js → csv-writer.js} +0 -0
  6. package/dist/{es5/csv-writer.js.map → csv-writer.js.map} +0 -0
  7. package/dist/{esm/index.js → index.js} +0 -0
  8. package/dist/index.js.map +1 -0
  9. package/dist/{esm/lib → lib}/async-iterator-streamer.js +0 -0
  10. package/dist/lib/async-iterator-streamer.js.map +1 -0
  11. package/dist/{es5/libs → libs}/papaparse.js +0 -0
  12. package/dist/{esm/papaparse → papaparse}/async-iterator-streamer.js +0 -0
  13. package/dist/papaparse/async-iterator-streamer.js.map +1 -0
  14. package/package.json +7 -7
  15. package/dist/dist.min.js +0 -9
  16. package/dist/dist.min.js.map +0 -1
  17. package/dist/es5/bundle.js +0 -7
  18. package/dist/es5/bundle.js.map +0 -1
  19. package/dist/es5/csv-loader.js +0 -309
  20. package/dist/es5/csv-loader.js.map +0 -1
  21. package/dist/es5/csv-writer.js +0 -2
  22. package/dist/es5/index.js +0 -14
  23. package/dist/es5/index.js.map +0 -1
  24. package/dist/es5/lib/async-iterator-streamer.js +0 -140
  25. package/dist/es5/lib/async-iterator-streamer.js.map +0 -1
  26. package/dist/es5/papaparse/async-iterator-streamer.js +0 -140
  27. package/dist/es5/papaparse/async-iterator-streamer.js.map +0 -1
  28. package/dist/esm/bundle.js.map +0 -1
  29. package/dist/esm/csv-loader.js.map +0 -1
  30. package/dist/esm/csv-writer.js.map +0 -1
  31. package/dist/esm/index.js.map +0 -1
  32. package/dist/esm/lib/async-iterator-streamer.js.map +0 -1
  33. package/dist/esm/libs/papaparse.js +0 -1102
  34. package/dist/esm/papaparse/async-iterator-streamer.js.map +0 -1
@@ -1,1102 +0,0 @@
1
- // This is a fork of papaparse
2
- // https://github.com/mholt/PapaParse
3
- /* @license
4
- Papa Parse
5
- v5.0.0-beta.0
6
- https://github.com/mholt/PapaParse
7
- License: MIT
8
- */
9
- // FORK SUMMARY:
10
- // - Adopt ES6 exports
11
- // - Implement new AsyncIteratorStreamer
12
- // - Remove non Async Iterator streamers (can all be handled by new streamer)
13
- // - Remove unused Worker support (loaders.gl worker system used instead)
14
- // - Remove unused jQuery plugin support
15
-
16
- /* eslint-disable */
17
- // @ts-nocheck
18
- var global = (function() {
19
- // alternative method, similar to `Function('return this')()`
20
- // but without using `eval` (which is disabled when
21
- // using Content Security Policy).
22
-
23
- if (typeof self !== 'undefined') {
24
- return self;
25
- }
26
- if (typeof window !== 'undefined') {
27
- return window;
28
- }
29
- if (typeof global !== 'undefined') {
30
- return global;
31
- }
32
-
33
- // When running tests none of the above have been defined
34
- return {};
35
- })();
36
-
37
- var IS_PAPA_WORKER = false;
38
-
39
- var Papa = {};
40
- module.exports = Papa;
41
- Papa.parse = CsvToJson;
42
- Papa.unparse = JsonToCsv;
43
-
44
- Papa.RECORD_SEP = String.fromCharCode(30);
45
- Papa.UNIT_SEP = String.fromCharCode(31);
46
- Papa.BYTE_ORDER_MARK = '\ufeff';
47
- Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK];
48
- Papa.WORKERS_SUPPORTED = false; // !IS_WORKER && !!global.Worker;
49
- Papa.NODE_STREAM_INPUT = 1;
50
-
51
- // Configurable chunk sizes for local and remote files, respectively
52
- Papa.LocalChunkSize = 1024 * 1024 * 10; // 10 MB
53
- Papa.RemoteChunkSize = 1024 * 1024 * 5; // 5 MB
54
- Papa.DefaultDelimiter = ','; // Used if not specified and detection fails
55
-
56
- // Exposed for testing and development only
57
- Papa.Parser = Parser;
58
- Papa.ParserHandle = ParserHandle;
59
-
60
- // BEGIN FORK
61
- Papa.ChunkStreamer = ChunkStreamer;
62
- Papa.StringStreamer = StringStreamer;
63
- /*
64
- Papa.NetworkStreamer = NetworkStreamer;
65
- Papa.FileStreamer = FileStreamer;
66
- Papa.ReadableStreamStreamer = ReadableStreamStreamer;
67
- if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
68
- Papa.DuplexStreamStreamer = DuplexStreamStreamer;
69
- }
70
- */
71
- // END FORK
72
-
73
- // BEGIN FORK
74
- // Adds an argument to papa.parse
75
- // function CsvToJson(_input, _config)
76
- function CsvToJson(
77
- _input,
78
- _config,
79
- UserDefinedStreamer // BEGIN FORK
80
- ) {
81
- _config = _config || {};
82
- var dynamicTyping = _config.dynamicTyping || false;
83
- if (isFunction(dynamicTyping)) {
84
- _config.dynamicTypingFunction = dynamicTyping;
85
- // Will be filled on first row call
86
- dynamicTyping = {};
87
- }
88
- _config.dynamicTyping = dynamicTyping;
89
-
90
- _config.transform = isFunction(_config.transform) ? _config.transform : false;
91
-
92
- if (_config.worker && Papa.WORKERS_SUPPORTED) {
93
- var w = newWorker();
94
-
95
- w.userStep = _config.step;
96
- w.userChunk = _config.chunk;
97
- w.userComplete = _config.complete;
98
- w.userError = _config.error;
99
-
100
- _config.step = isFunction(_config.step);
101
- _config.chunk = isFunction(_config.chunk);
102
- _config.complete = isFunction(_config.complete);
103
- _config.error = isFunction(_config.error);
104
- delete _config.worker; // prevent infinite loop
105
-
106
- w.postMessage({
107
- input: _input,
108
- config: _config,
109
- workerId: w.id
110
- });
111
-
112
- return;
113
- }
114
-
115
- var streamer = null;
116
- /*
117
- if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined') {
118
- // create a node Duplex stream for use
119
- // with .pipe
120
- streamer = new DuplexStreamStreamer(_config);
121
- return streamer.getStream();
122
- } else
123
- */
124
- if (typeof _input === 'string') {
125
- // if (_config.download) streamer = new NetworkStreamer(_config);
126
- // else
127
- streamer = new StringStreamer(_config);
128
- }
129
- /*
130
- else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on)) {
131
- streamer = new ReadableStreamStreamer(_config);
132
- } else if ((global.File && _input instanceof File) || _input instanceof Object)
133
- // ...Safari. (see issue #106)
134
- streamer = new FileStreamer(_config);
135
- */
136
-
137
- // BEGIN FORK
138
- if (!streamer) {
139
- streamer = new UserDefinedStreamer(_config);
140
- }
141
- // END FORK
142
-
143
- return streamer.stream(_input);
144
- }
145
-
146
- function JsonToCsv(_input, _config) {
147
- // Default configuration
148
-
149
- /** whether to surround every datum with quotes */
150
- var _quotes = false;
151
-
152
- /** whether to write headers */
153
- var _writeHeader = true;
154
-
155
- /** delimiting character(s) */
156
- var _delimiter = ',';
157
-
158
- /** newline character(s) */
159
- var _newline = '\r\n';
160
-
161
- /** quote character */
162
- var _quoteChar = '"';
163
-
164
- /** escaped quote character, either "" or <config.escapeChar>" */
165
- var _escapedQuote = _quoteChar + _quoteChar;
166
-
167
- /** whether to skip empty lines */
168
- var _skipEmptyLines = false;
169
-
170
- /** the columns (keys) we expect when we unparse objects */
171
- var _columns = null;
172
-
173
- unpackConfig();
174
-
175
- var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g');
176
-
177
- if (typeof _input === 'string') _input = JSON.parse(_input);
178
-
179
- if (Array.isArray(_input)) {
180
- if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines);
181
- else if (typeof _input[0] === 'object')
182
- return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines);
183
- } else if (typeof _input === 'object') {
184
- if (typeof _input.data === 'string') _input.data = JSON.parse(_input.data);
185
-
186
- if (Array.isArray(_input.data)) {
187
- if (!_input.fields) _input.fields = _input.meta && _input.meta.fields;
188
-
189
- if (!_input.fields)
190
- _input.fields = Array.isArray(_input.data[0]) ? _input.fields : objectKeys(_input.data[0]);
191
-
192
- if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object')
193
- _input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
194
- }
195
-
196
- return serialize(_input.fields || [], _input.data || [], _skipEmptyLines);
197
- }
198
-
199
- // Default (any valid paths should return before this)
200
- throw new Error('Unable to serialize unrecognized input');
201
-
202
- function unpackConfig() {
203
- if (typeof _config !== 'object') return;
204
-
205
- if (
206
- typeof _config.delimiter === 'string' &&
207
- !Papa.BAD_DELIMITERS.filter(function(value) {
208
- return _config.delimiter.indexOf(value) !== -1;
209
- }).length
210
- ) {
211
- _delimiter = _config.delimiter;
212
- }
213
-
214
- if (typeof _config.quotes === 'boolean' || Array.isArray(_config.quotes))
215
- _quotes = _config.quotes;
216
-
217
- if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string')
218
- _skipEmptyLines = _config.skipEmptyLines;
219
-
220
- if (typeof _config.newline === 'string') _newline = _config.newline;
221
-
222
- if (typeof _config.quoteChar === 'string') _quoteChar = _config.quoteChar;
223
-
224
- if (typeof _config.header === 'boolean') _writeHeader = _config.header;
225
-
226
- if (Array.isArray(_config.columns)) {
227
- if (_config.columns.length === 0) throw new Error('Option columns is empty');
228
-
229
- _columns = _config.columns;
230
- }
231
-
232
- if (_config.escapeChar !== undefined) {
233
- _escapedQuote = _config.escapeChar + _quoteChar;
234
- }
235
- }
236
-
237
- /** Turns an object's keys into an array */
238
- function objectKeys(obj) {
239
- if (typeof obj !== 'object') return [];
240
- var keys = [];
241
- for (var key in obj) keys.push(key);
242
- return keys;
243
- }
244
-
245
- /** The double for loop that iterates the data and writes out a CSV string including header row */
246
- function serialize(fields, data, skipEmptyLines) {
247
- var csv = '';
248
-
249
- if (typeof fields === 'string') fields = JSON.parse(fields);
250
- if (typeof data === 'string') data = JSON.parse(data);
251
-
252
- var hasHeader = Array.isArray(fields) && fields.length > 0;
253
- var dataKeyedByField = !Array.isArray(data[0]);
254
-
255
- // If there a header row, write it first
256
- if (hasHeader && _writeHeader) {
257
- for (var i = 0; i < fields.length; i++) {
258
- if (i > 0) csv += _delimiter;
259
- csv += safe(fields[i], i);
260
- }
261
- if (data.length > 0) csv += _newline;
262
- }
263
-
264
- // Then write out the data
265
- for (var row = 0; row < data.length; row++) {
266
- var maxCol = hasHeader ? fields.length : data[row].length;
267
-
268
- var emptyLine = false;
269
- var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0;
270
- if (skipEmptyLines && !hasHeader) {
271
- emptyLine =
272
- skipEmptyLines === 'greedy'
273
- ? data[row].join('').trim() === ''
274
- : data[row].length === 1 && data[row][0].length === 0;
275
- }
276
- if (skipEmptyLines === 'greedy' && hasHeader) {
277
- var line = [];
278
- for (var c = 0; c < maxCol; c++) {
279
- var cx = dataKeyedByField ? fields[c] : c;
280
- line.push(data[row][cx]);
281
- }
282
- emptyLine = line.join('').trim() === '';
283
- }
284
- if (!emptyLine) {
285
- for (var col = 0; col < maxCol; col++) {
286
- if (col > 0 && !nullLine) csv += _delimiter;
287
- var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
288
- csv += safe(data[row][colIdx], col);
289
- }
290
- if (row < data.length - 1 && (!skipEmptyLines || (maxCol > 0 && !nullLine))) {
291
- csv += _newline;
292
- }
293
- }
294
- }
295
- return csv;
296
- }
297
-
298
- /** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
299
- function safe(str, col) {
300
- if (typeof str === 'undefined' || str === null) return '';
301
-
302
- if (str.constructor === Date) return JSON.stringify(str).slice(1, 25);
303
-
304
- str = str.toString().replace(quoteCharRegex, _escapedQuote);
305
-
306
- var needsQuotes =
307
- (typeof _quotes === 'boolean' && _quotes) ||
308
- (Array.isArray(_quotes) && _quotes[col]) ||
309
- hasAny(str, Papa.BAD_DELIMITERS) ||
310
- str.indexOf(_delimiter) > -1 ||
311
- str.charAt(0) === ' ' ||
312
- str.charAt(str.length - 1) === ' ';
313
-
314
- return needsQuotes ? _quoteChar + str + _quoteChar : str;
315
- }
316
-
317
- function hasAny(str, substrings) {
318
- for (var i = 0; i < substrings.length; i++) if (str.indexOf(substrings[i]) > -1) return true;
319
- return false;
320
- }
321
- }
322
-
323
- /** ChunkStreamer is the base prototype for various streamer implementations. */
324
- function ChunkStreamer(config) {
325
- this._handle = null;
326
- this._finished = false;
327
- this._completed = false;
328
- this._input = null;
329
- this._baseIndex = 0;
330
- this._partialLine = '';
331
- this._rowCount = 0;
332
- this._start = 0;
333
- this._nextChunk = null;
334
- this.isFirstChunk = true;
335
- this._completeResults = {
336
- data: [],
337
- errors: [],
338
- meta: {}
339
- };
340
- replaceConfig.call(this, config);
341
-
342
- this.parseChunk = function(chunk, isFakeChunk) {
343
- // First chunk pre-processing
344
- if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
345
- var modifiedChunk = this._config.beforeFirstChunk(chunk);
346
- if (modifiedChunk !== undefined) chunk = modifiedChunk;
347
- }
348
- this.isFirstChunk = false;
349
-
350
- // Rejoin the line we likely just split in two by chunking the file
351
- var aggregate = this._partialLine + chunk;
352
- this._partialLine = '';
353
-
354
- var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
355
-
356
- if (this._handle.paused() || this._handle.aborted()) return;
357
-
358
- var lastIndex = results.meta.cursor;
359
-
360
- if (!this._finished) {
361
- this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
362
- this._baseIndex = lastIndex;
363
- }
364
-
365
- if (results && results.data) this._rowCount += results.data.length;
366
-
367
- var finishedIncludingPreview =
368
- this._finished || (this._config.preview && this._rowCount >= this._config.preview);
369
-
370
- if (IS_PAPA_WORKER) {
371
- global.postMessage({
372
- results: results,
373
- workerId: Papa.WORKER_ID,
374
- finished: finishedIncludingPreview
375
- });
376
- } else if (isFunction(this._config.chunk) && !isFakeChunk) {
377
- this._config.chunk(results, this._handle);
378
- if (this._handle.paused() || this._handle.aborted()) return;
379
- results = undefined;
380
- this._completeResults = undefined;
381
- }
382
-
383
- if (!this._config.step && !this._config.chunk) {
384
- this._completeResults.data = this._completeResults.data.concat(results.data);
385
- this._completeResults.errors = this._completeResults.errors.concat(results.errors);
386
- this._completeResults.meta = results.meta;
387
- }
388
-
389
- if (
390
- !this._completed &&
391
- finishedIncludingPreview &&
392
- isFunction(this._config.complete) &&
393
- (!results || !results.meta.aborted)
394
- ) {
395
- this._config.complete(this._completeResults, this._input);
396
- this._completed = true;
397
- }
398
-
399
- if (!finishedIncludingPreview && (!results || !results.meta.paused)) this._nextChunk();
400
-
401
- return results;
402
- };
403
-
404
- this._sendError = function(error) {
405
- if (isFunction(this._config.error)) this._config.error(error);
406
- else if (IS_PAPA_WORKER && this._config.error) {
407
- global.postMessage({
408
- workerId: Papa.WORKER_ID,
409
- error: error,
410
- finished: false
411
- });
412
- }
413
- };
414
-
415
- function replaceConfig(config) {
416
- // Deep-copy the config so we can edit it
417
- var configCopy = copy(config);
418
- configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
419
- if (!config.step && !config.chunk) configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
420
- this._handle = new ParserHandle(configCopy);
421
- this._handle.streamer = this;
422
- this._config = configCopy; // persist the copy to the caller
423
- }
424
- }
425
- function StringStreamer(config) {
426
- config = config || {};
427
- ChunkStreamer.call(this, config);
428
-
429
- var remaining;
430
- this.stream = function(s) {
431
- remaining = s;
432
- return this._nextChunk();
433
- };
434
- this._nextChunk = function() {
435
- if (this._finished) return;
436
- var size = this._config.chunkSize;
437
- var chunk = size ? remaining.substr(0, size) : remaining;
438
- remaining = size ? remaining.substr(size) : '';
439
- this._finished = !remaining;
440
- return this.parseChunk(chunk);
441
- };
442
- }
443
- StringStreamer.prototype = Object.create(StringStreamer.prototype);
444
- StringStreamer.prototype.constructor = StringStreamer;
445
-
446
- // Use one ParserHandle per entire CSV file or string
447
- function ParserHandle(_config) {
448
- // One goal is to minimize the use of regular expressions...
449
- var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
450
- var ISO_DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
451
-
452
- var self = this;
453
- var _stepCounter = 0; // Number of times step was called (number of rows parsed)
454
- var _rowCounter = 0; // Number of rows that have been parsed so far
455
- var _input; // The input being parsed
456
- var _parser; // The core parser being used
457
- var _paused = false; // Whether we are paused or not
458
- var _aborted = false; // Whether the parser has aborted or not
459
- var _delimiterError; // Temporary state between delimiter detection and processing results
460
- var _fields = []; // Fields are from the header row of the input, if there is one
461
- var _results = {
462
- // The last results returned from the parser
463
- data: [],
464
- errors: [],
465
- meta: {}
466
- };
467
-
468
- if (isFunction(_config.step)) {
469
- var userStep = _config.step;
470
- _config.step = function(results) {
471
- _results = results;
472
-
473
- if (needsHeaderRow()) processResults();
474
- // only call user's step function after header row
475
- else {
476
- processResults();
477
-
478
- // It's possbile that this line was empty and there's no row here after all
479
- if (!_results.data || _results.data.length === 0) return;
480
-
481
- _stepCounter += results.data.length;
482
- if (_config.preview && _stepCounter > _config.preview) _parser.abort();
483
- else userStep(_results, self);
484
- }
485
- };
486
- }
487
-
488
- /**
489
- * Parses input. Most users won't need, and shouldn't mess with, the baseIndex
490
- * and ignoreLastRow parameters. They are used by streamers (wrapper functions)
491
- * when an input comes in multiple chunks, like from a file.
492
- */
493
- this.parse = function(input, baseIndex, ignoreLastRow) {
494
- var quoteChar = _config.quoteChar || '"';
495
- if (!_config.newline) _config.newline = guessLineEndings(input, quoteChar);
496
-
497
- _delimiterError = false;
498
- if (!_config.delimiter) {
499
- var delimGuess = guessDelimiter(
500
- input,
501
- _config.newline,
502
- _config.skipEmptyLines,
503
- _config.comments,
504
- _config.delimitersToGuess
505
- );
506
- if (delimGuess.successful) _config.delimiter = delimGuess.bestDelimiter;
507
- else {
508
- _delimiterError = true; // add error after parsing (otherwise it would be overwritten)
509
- _config.delimiter = Papa.DefaultDelimiter;
510
- }
511
- _results.meta.delimiter = _config.delimiter;
512
- } else if (isFunction(_config.delimiter)) {
513
- _config.delimiter = _config.delimiter(input);
514
- _results.meta.delimiter = _config.delimiter;
515
- }
516
-
517
- var parserConfig = copy(_config);
518
- if (_config.preview && _config.header) parserConfig.preview++; // to compensate for header row
519
-
520
- _input = input;
521
- _parser = new Parser(parserConfig);
522
- _results = _parser.parse(_input, baseIndex, ignoreLastRow);
523
- processResults();
524
- return _paused ? {meta: {paused: true}} : _results || {meta: {paused: false}};
525
- };
526
-
527
- this.paused = function() {
528
- return _paused;
529
- };
530
-
531
- this.pause = function() {
532
- _paused = true;
533
- _parser.abort();
534
- _input = _input.substr(_parser.getCharIndex());
535
- };
536
-
537
- this.resume = function() {
538
- _paused = false;
539
- self.streamer.parseChunk(_input, true);
540
- };
541
-
542
- this.aborted = function() {
543
- return _aborted;
544
- };
545
-
546
- this.abort = function() {
547
- _aborted = true;
548
- _parser.abort();
549
- _results.meta.aborted = true;
550
- if (isFunction(_config.complete)) _config.complete(_results);
551
- _input = '';
552
- };
553
-
554
- function testEmptyLine(s) {
555
- return _config.skipEmptyLines === 'greedy'
556
- ? s.join('').trim() === ''
557
- : s.length === 1 && s[0].length === 0;
558
- }
559
-
560
- function processResults() {
561
- if (_results && _delimiterError) {
562
- addError(
563
- 'Delimiter',
564
- 'UndetectableDelimiter',
565
- "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'"
566
- );
567
- _delimiterError = false;
568
- }
569
-
570
- if (_config.skipEmptyLines) {
571
- for (var i = 0; i < _results.data.length; i++)
572
- if (testEmptyLine(_results.data[i])) _results.data.splice(i--, 1);
573
- }
574
-
575
- if (needsHeaderRow()) fillHeaderFields();
576
-
577
- return applyHeaderAndDynamicTypingAndTransformation();
578
- }
579
-
580
- function needsHeaderRow() {
581
- return _config.header && _fields.length === 0;
582
- }
583
-
584
- function fillHeaderFields() {
585
- if (!_results) return;
586
-
587
- function addHeder(header) {
588
- if (isFunction(_config.transformHeader)) header = _config.transformHeader(header);
589
-
590
- _fields.push(header);
591
- }
592
-
593
- if (Array.isArray(_results.data[0])) {
594
- for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
595
- _results.data[i].forEach(addHeder);
596
-
597
- _results.data.splice(0, 1);
598
- }
599
- // if _results.data[0] is not an array, we are in a step where _results.data is the row.
600
- else _results.data.forEach(addHeder);
601
- }
602
-
603
- function shouldApplyDynamicTyping(field) {
604
- // Cache function values to avoid calling it for each row
605
- if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
606
- _config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
607
- }
608
- return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
609
- }
610
-
611
- function parseDynamic(field, value) {
612
- if (shouldApplyDynamicTyping(field)) {
613
- if (value === 'true' || value === 'TRUE') return true;
614
- else if (value === 'false' || value === 'FALSE') return false;
615
- else if (FLOAT.test(value)) return parseFloat(value);
616
- else if (ISO_DATE.test(value)) return new Date(value);
617
- else return value === '' ? null : value;
618
- }
619
- return value;
620
- }
621
-
622
- function applyHeaderAndDynamicTypingAndTransformation() {
623
- if (!_results || !_results.data || (!_config.header && !_config.dynamicTyping && !_config.transform))
624
- return _results;
625
-
626
- function processRow(rowSource, i) {
627
- var row = _config.header ? {} : [];
628
-
629
- var j;
630
- for (j = 0; j < rowSource.length; j++) {
631
- var field = j;
632
- var value = rowSource[j];
633
-
634
- if (_config.header) field = j >= _fields.length ? '__parsed_extra' : _fields[j];
635
-
636
- if (_config.transform) value = _config.transform(value, field);
637
-
638
- value = parseDynamic(field, value);
639
-
640
- if (field === '__parsed_extra') {
641
- row[field] = row[field] || [];
642
- row[field].push(value);
643
- } else row[field] = value;
644
- }
645
-
646
- if (_config.header) {
647
- if (j > _fields.length)
648
- addError(
649
- 'FieldMismatch',
650
- 'TooManyFields',
651
- 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j,
652
- _rowCounter + i
653
- );
654
- else if (j < _fields.length)
655
- addError(
656
- 'FieldMismatch',
657
- 'TooFewFields',
658
- 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j,
659
- _rowCounter + i
660
- );
661
- }
662
-
663
- return row;
664
- }
665
-
666
- var incrementBy = 1;
667
- if (!_results.data[0] || Array.isArray(_results.data[0])) {
668
- _results.data = _results.data.map(processRow);
669
- incrementBy = _results.data.length;
670
- } else _results.data = processRow(_results.data, 0);
671
-
672
- if (_config.header && _results.meta) _results.meta.fields = _fields;
673
-
674
- _rowCounter += incrementBy;
675
- return _results;
676
- }
677
-
678
- function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
679
- var bestDelim, bestDelta, fieldCountPrevRow;
680
-
681
- delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
682
-
683
- for (var i = 0; i < delimitersToGuess.length; i++) {
684
- var delim = delimitersToGuess[i];
685
- var delta = 0,
686
- avgFieldCount = 0,
687
- emptyLinesCount = 0;
688
- fieldCountPrevRow = undefined;
689
-
690
- var preview = new Parser({
691
- comments: comments,
692
- delimiter: delim,
693
- newline: newline,
694
- preview: 10
695
- }).parse(input);
696
-
697
- for (var j = 0; j < preview.data.length; j++) {
698
- if (skipEmptyLines && testEmptyLine(preview.data[j])) {
699
- emptyLinesCount++;
700
- continue;
701
- }
702
- var fieldCount = preview.data[j].length;
703
- avgFieldCount += fieldCount;
704
-
705
- if (typeof fieldCountPrevRow === 'undefined') {
706
- fieldCountPrevRow = 0;
707
- continue;
708
- } else if (fieldCount > 1) {
709
- delta += Math.abs(fieldCount - fieldCountPrevRow);
710
- fieldCountPrevRow = fieldCount;
711
- }
712
- }
713
-
714
- if (preview.data.length > 0) avgFieldCount /= preview.data.length - emptyLinesCount;
715
-
716
- if ((typeof bestDelta === 'undefined' || delta > bestDelta) && avgFieldCount > 1.99) {
717
- bestDelta = delta;
718
- bestDelim = delim;
719
- }
720
- }
721
-
722
- _config.delimiter = bestDelim;
723
-
724
- return {
725
- successful: !!bestDelim,
726
- bestDelimiter: bestDelim
727
- };
728
- }
729
-
730
- function guessLineEndings(input, quoteChar) {
731
- input = input.substr(0, 1024 * 1024); // max length 1 MB
732
- // Replace all the text inside quotes
733
- var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
734
- input = input.replace(re, '');
735
-
736
- var r = input.split('\r');
737
-
738
- var n = input.split('\n');
739
-
740
- var nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
741
-
742
- if (r.length === 1 || nAppearsFirst) return '\n';
743
-
744
- var numWithN = 0;
745
- for (var i = 0; i < r.length; i++) {
746
- if (r[i][0] === '\n') numWithN++;
747
- }
748
-
749
- return numWithN >= r.length / 2 ? '\r\n' : '\r';
750
- }
751
-
752
- function addError(type, code, msg, row) {
753
- _results.errors.push({
754
- type: type,
755
- code: code,
756
- message: msg,
757
- row: row
758
- });
759
- }
760
- }
761
-
762
- /** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
763
- function escapeRegExp(string) {
764
- return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
765
- }
766
-
767
- /** The core parser implements speedy and correct CSV parsing */
768
- function Parser(config) {
769
- // Unpack the config object
770
- config = config || {};
771
- var delim = config.delimiter;
772
- var newline = config.newline;
773
- var comments = config.comments;
774
- var step = config.step;
775
- var preview = config.preview;
776
- var fastMode = config.fastMode;
777
- var quoteChar;
778
- /** Allows for no quoteChar by setting quoteChar to undefined in config */
779
- if (config.quoteChar === undefined) {
780
- quoteChar = '"';
781
- } else {
782
- quoteChar = config.quoteChar;
783
- }
784
- var escapeChar = quoteChar;
785
- if (config.escapeChar !== undefined) {
786
- escapeChar = config.escapeChar;
787
- }
788
-
789
- // Delimiter must be valid
790
- if (typeof delim !== 'string' || Papa.BAD_DELIMITERS.indexOf(delim) > -1) delim = ',';
791
-
792
- // Comment character must be valid
793
- if (comments === delim) throw new Error('Comment character same as delimiter');
794
- else if (comments === true) comments = '#';
795
- else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1)
796
- comments = false;
797
-
798
- // Newline must be valid: \r, \n, or \r\n
799
- if (newline !== '\n' && newline !== '\r' && newline !== '\r\n') newline = '\n';
800
-
801
- // We're gonna need these at the Parser scope
802
- var cursor = 0;
803
- var aborted = false;
804
-
805
- this.parse = function(input, baseIndex, ignoreLastRow) {
806
- // For some reason, in Chrome, this speeds things up (!?)
807
- if (typeof input !== 'string') throw new Error('Input must be a string');
808
-
809
- // We don't need to compute some of these every time parse() is called,
810
- // but having them in a more local scope seems to perform better
811
- var inputLen = input.length,
812
- delimLen = delim.length,
813
- newlineLen = newline.length,
814
- commentsLen = comments.length;
815
- var stepIsFunction = isFunction(step);
816
-
817
- // Establish starting state
818
- cursor = 0;
819
- var data = [],
820
- errors = [],
821
- row = [],
822
- lastCursor = 0;
823
-
824
- if (!input) return returnable();
825
-
826
- if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1)) {
827
- var rows = input.split(newline);
828
- for (var i = 0; i < rows.length; i++) {
829
- row = rows[i];
830
- cursor += row.length;
831
- if (i !== rows.length - 1) cursor += newline.length;
832
- else if (ignoreLastRow) return returnable();
833
- if (comments && row.substr(0, commentsLen) === comments) continue;
834
- if (stepIsFunction) {
835
- data = [];
836
- pushRow(row.split(delim));
837
- doStep();
838
- if (aborted) return returnable();
839
- } else pushRow(row.split(delim));
840
- if (preview && i >= preview) {
841
- data = data.slice(0, preview);
842
- return returnable(true);
843
- }
844
- }
845
- return returnable();
846
- }
847
-
848
- var nextDelim = input.indexOf(delim, cursor);
849
- var nextNewline = input.indexOf(newline, cursor);
850
- var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
851
- var quoteSearch;
852
-
853
- // Parser loop
854
- for (;;) {
855
- // Field has opening quote
856
- if (input[cursor] === quoteChar) {
857
- // Start our search for the closing quote where the cursor is
858
- quoteSearch = cursor;
859
-
860
- // Skip the opening quote
861
- cursor++;
862
-
863
- for (;;) {
864
- // Find closing quote
865
- quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
866
-
867
- //No other quotes are found - no other delimiters
868
- if (quoteSearch === -1) {
869
- if (!ignoreLastRow) {
870
- // No closing quote... what a pity
871
- errors.push({
872
- type: 'Quotes',
873
- code: 'MissingQuotes',
874
- message: 'Quoted field unterminated',
875
- row: data.length, // row has yet to be inserted
876
- index: cursor
877
- });
878
- }
879
- return finish();
880
- }
881
-
882
- // Closing quote at EOF
883
- if (quoteSearch === inputLen - 1) {
884
- var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
885
- return finish(value);
886
- }
887
-
888
- // If this quote is escaped, it's part of the data; skip it
889
- // If the quote character is the escape character, then check if the next character is the escape character
890
- if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar) {
891
- quoteSearch++;
892
- continue;
893
- }
894
-
895
- // If the quote character is not the escape character, then check if the previous character was the escape character
896
- if (
897
- quoteChar !== escapeChar &&
898
- quoteSearch !== 0 &&
899
- input[quoteSearch - 1] === escapeChar
900
- ) {
901
- continue;
902
- }
903
-
904
- // Check up to nextDelim or nextNewline, whichever is closest
905
- var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
906
- var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
907
-
908
- // Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
909
- if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) {
910
- row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
911
- cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
912
- nextDelim = input.indexOf(delim, cursor);
913
- nextNewline = input.indexOf(newline, cursor);
914
-
915
- if (stepIsFunction) {
916
- doStep();
917
- if (aborted) return returnable();
918
- }
919
-
920
- if (preview && data.length >= preview) return returnable(true);
921
-
922
- break;
923
- }
924
-
925
- var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
926
-
927
- // Closing quote followed by newline or 'unnecessary spaces + newLine'
928
- if (
929
- input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline
930
- ) {
931
- row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
932
- saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
933
- nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
934
-
935
- if (stepIsFunction) {
936
- doStep();
937
- if (aborted) return returnable();
938
- }
939
-
940
- if (preview && data.length >= preview) return returnable(true);
941
-
942
- break;
943
- }
944
-
945
- // Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
946
- errors.push({
947
- type: 'Quotes',
948
- code: 'InvalidQuotes',
949
- message: 'Trailing quote on quoted field is malformed',
950
- row: data.length, // row has yet to be inserted
951
- index: cursor
952
- });
953
-
954
- quoteSearch++;
955
- continue;
956
- }
957
-
958
- if (stepIsFunction) {
959
- doStep();
960
- if (aborted) return returnable();
961
- }
962
-
963
- if (preview && data.length >= preview) return returnable(true);
964
- continue;
965
- }
966
-
967
- // Comment found at start of new line
968
- if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) {
969
- if (nextNewline === -1)
970
- // Comment ends at EOF
971
- return returnable();
972
- cursor = nextNewline + newlineLen;
973
- nextNewline = input.indexOf(newline, cursor);
974
- nextDelim = input.indexOf(delim, cursor);
975
- continue;
976
- }
977
-
978
- // Next delimiter comes before next newline, so we've reached end of field
979
- if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) {
980
- row.push(input.substring(cursor, nextDelim));
981
- cursor = nextDelim + delimLen;
982
- nextDelim = input.indexOf(delim, cursor);
983
- continue;
984
- }
985
-
986
- // End of row
987
- if (nextNewline !== -1) {
988
- row.push(input.substring(cursor, nextNewline));
989
- saveRow(nextNewline + newlineLen);
990
-
991
- if (stepIsFunction) {
992
- doStep();
993
- if (aborted) return returnable();
994
- }
995
-
996
- if (preview && data.length >= preview) return returnable(true);
997
-
998
- continue;
999
- }
1000
-
1001
- break;
1002
- }
1003
-
1004
- return finish();
1005
-
1006
- function pushRow(row) {
1007
- data.push(row);
1008
- lastCursor = cursor;
1009
- }
1010
-
1011
- /**
1012
- * checks if there are extra spaces after closing quote and given index without any text
1013
- * if Yes, returns the number of spaces
1014
- */
1015
- function extraSpaces(index) {
1016
- var spaceLength = 0;
1017
- if (index !== -1) {
1018
- var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index);
1019
- if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') {
1020
- spaceLength = textBetweenClosingQuoteAndIndex.length;
1021
- }
1022
- }
1023
- return spaceLength;
1024
- }
1025
-
1026
- /**
1027
- * Appends the remaining input from cursor to the end into
1028
- * row, saves the row, calls step, and returns the results.
1029
- */
1030
- function finish(value) {
1031
- if (ignoreLastRow) return returnable();
1032
- if (typeof value === 'undefined') value = input.substr(cursor);
1033
- row.push(value);
1034
- cursor = inputLen; // important in case parsing is paused
1035
- pushRow(row);
1036
- if (stepIsFunction) doStep();
1037
- return returnable();
1038
- }
1039
-
1040
- /**
1041
- * Appends the current row to the results. It sets the cursor
1042
- * to newCursor and finds the nextNewline. The caller should
1043
- * take care to execute user's step function and check for
1044
- * preview and end parsing if necessary.
1045
- */
1046
- function saveRow(newCursor) {
1047
- cursor = newCursor;
1048
- pushRow(row);
1049
- row = [];
1050
- nextNewline = input.indexOf(newline, cursor);
1051
- }
1052
-
1053
- /** Returns an object with the results, errors, and meta. */
1054
- function returnable(stopped, step) {
1055
- var isStep = step || false;
1056
- return {
1057
- data: isStep ? data[0] : data,
1058
- errors: errors,
1059
- meta: {
1060
- delimiter: delim,
1061
- linebreak: newline,
1062
- aborted: aborted,
1063
- truncated: !!stopped,
1064
- cursor: lastCursor + (baseIndex || 0)
1065
- }
1066
- };
1067
- }
1068
-
1069
- /** Executes the user's step function and resets data & errors. */
1070
- function doStep() {
1071
- step(returnable(undefined, true));
1072
- data = [];
1073
- errors = [];
1074
- }
1075
- };
1076
-
1077
- /** Sets the abort flag */
1078
- this.abort = function() {
1079
- aborted = true;
1080
- };
1081
-
1082
- /** Gets the cursor position */
1083
- this.getCharIndex = function() {
1084
- return cursor;
1085
- };
1086
- }
1087
-
1088
- function notImplemented() {
1089
- throw new Error('Not implemented.');
1090
- }
1091
-
1092
- /** Makes a deep copy of an array or object (mostly) */
1093
- function copy(obj) {
1094
- if (typeof obj !== 'object' || obj === null) return obj;
1095
- var cpy = Array.isArray(obj) ? [] : {};
1096
- for (var key in obj) cpy[key] = copy(obj[key]);
1097
- return cpy;
1098
- }
1099
-
1100
- function isFunction(func) {
1101
- return typeof func === 'function';
1102
- }