@loaders.gl/csv 4.0.0-alpha.4 → 4.0.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,231 +1,121 @@
1
- // This is a fork of papaparse
2
- // https://github.com/mholt/PapaParse
3
1
  /* @license
4
2
  Papa Parse
5
3
  v5.0.0-beta.0
6
4
  https://github.com/mholt/PapaParse
7
5
  License: MIT
8
6
  */
9
- // FORK SUMMARY:
10
- // - Adopt ES6 exports
11
- // - Implement new AsyncIteratorStreamer
12
- // - Remove non Async Iterator streamers (can all be handled by new streamer)
13
- // - Remove unused Worker support (loaders.gl worker system used instead)
14
- // - Remove unused jQuery plugin support
15
-
16
- /* eslint-disable */
17
- // @ts-nocheck
18
- var global = (function() {
19
- // alternative method, similar to `Function('return this')()`
20
- // but without using `eval` (which is disabled when
21
- // using Content Security Policy).
22
-
23
- if (typeof self !== 'undefined') {
24
- return self;
25
- }
26
- if (typeof window !== 'undefined') {
27
- return window;
28
- }
29
- if (typeof global !== 'undefined') {
30
- return global;
31
- }
32
-
33
- // When running tests none of the above have been defined
34
- return {};
35
- })();
36
-
37
- var IS_PAPA_WORKER = false;
38
-
39
- var Papa = {};
40
- module.exports = Papa;
41
- Papa.parse = CsvToJson;
42
- Papa.unparse = JsonToCsv;
43
-
44
- Papa.RECORD_SEP = String.fromCharCode(30);
45
- Papa.UNIT_SEP = String.fromCharCode(31);
46
- Papa.BYTE_ORDER_MARK = '\ufeff';
47
- Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK];
48
- Papa.WORKERS_SUPPORTED = false; // !IS_WORKER && !!global.Worker;
49
- Papa.NODE_STREAM_INPUT = 1;
50
-
51
- // Configurable chunk sizes for local and remote files, respectively
52
- Papa.LocalChunkSize = 1024 * 1024 * 10; // 10 MB
53
- Papa.RemoteChunkSize = 1024 * 1024 * 5; // 5 MB
54
- Papa.DefaultDelimiter = ','; // Used if not specified and detection fails
55
-
56
- // Exposed for testing and development only
57
- Papa.Parser = Parser;
58
- Papa.ParserHandle = ParserHandle;
59
-
60
- // BEGIN FORK
61
- Papa.ChunkStreamer = ChunkStreamer;
62
- Papa.StringStreamer = StringStreamer;
63
- /*
64
- Papa.NetworkStreamer = NetworkStreamer;
65
- Papa.FileStreamer = FileStreamer;
66
- Papa.ReadableStreamStreamer = ReadableStreamStreamer;
67
- if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
68
- Papa.DuplexStreamStreamer = DuplexStreamStreamer;
69
- }
70
- */
71
- // END FORK
72
-
73
- // BEGIN FORK
74
- // Adds an argument to papa.parse
75
- // function CsvToJson(_input, _config)
76
- function CsvToJson(
77
- _input,
78
- _config,
79
- UserDefinedStreamer // BEGIN FORK
80
- ) {
7
+ const BYTE_ORDER_MARK = '\ufeff';
8
+ const Papa = {
9
+ parse: CsvToJson,
10
+ unparse: JsonToCsv,
11
+ RECORD_SEP: String.fromCharCode(30),
12
+ UNIT_SEP: String.fromCharCode(31),
13
+ BYTE_ORDER_MARK,
14
+ BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
15
+ WORKERS_SUPPORTED: false,
16
+ NODE_STREAM_INPUT: 1,
17
+ LocalChunkSize: 1024 * 1024 * 10,
18
+ RemoteChunkSize: 1024 * 1024 * 5,
19
+ DefaultDelimiter: ',',
20
+ Parser: Parser,
21
+ ParserHandle: ParserHandle,
22
+ ChunkStreamer: ChunkStreamer,
23
+ StringStreamer: StringStreamer
24
+ };
25
+ export default Papa;
26
+
27
+ function CsvToJson(_input, _config, UserDefinedStreamer) {
81
28
  _config = _config || {};
82
29
  var dynamicTyping = _config.dynamicTyping || false;
30
+
83
31
  if (isFunction(dynamicTyping)) {
84
32
  _config.dynamicTypingFunction = dynamicTyping;
85
- // Will be filled on first row call
86
33
  dynamicTyping = {};
87
34
  }
88
- _config.dynamicTyping = dynamicTyping;
89
35
 
36
+ _config.dynamicTyping = dynamicTyping;
90
37
  _config.transform = isFunction(_config.transform) ? _config.transform : false;
91
38
 
92
39
  if (_config.worker && Papa.WORKERS_SUPPORTED) {
93
40
  var w = newWorker();
94
-
95
41
  w.userStep = _config.step;
96
42
  w.userChunk = _config.chunk;
97
43
  w.userComplete = _config.complete;
98
44
  w.userError = _config.error;
99
-
100
45
  _config.step = isFunction(_config.step);
101
46
  _config.chunk = isFunction(_config.chunk);
102
47
  _config.complete = isFunction(_config.complete);
103
48
  _config.error = isFunction(_config.error);
104
- delete _config.worker; // prevent infinite loop
105
-
49
+ delete _config.worker;
106
50
  w.postMessage({
107
51
  input: _input,
108
52
  config: _config,
109
53
  workerId: w.id
110
54
  });
111
-
112
55
  return;
113
56
  }
114
57
 
115
58
  var streamer = null;
116
- /*
117
- if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined') {
118
- // create a node Duplex stream for use
119
- // with .pipe
120
- streamer = new DuplexStreamStreamer(_config);
121
- return streamer.getStream();
122
- } else
123
- */
59
+
124
60
  if (typeof _input === 'string') {
125
- // if (_config.download) streamer = new NetworkStreamer(_config);
126
- // else
127
61
  streamer = new StringStreamer(_config);
128
62
  }
129
- /*
130
- else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on)) {
131
- streamer = new ReadableStreamStreamer(_config);
132
- } else if ((global.File && _input instanceof File) || _input instanceof Object)
133
- // ...Safari. (see issue #106)
134
- streamer = new FileStreamer(_config);
135
- */
136
-
137
- // BEGIN FORK
63
+
138
64
  if (!streamer) {
139
65
  streamer = new UserDefinedStreamer(_config);
140
66
  }
141
- // END FORK
142
67
 
143
68
  return streamer.stream(_input);
144
69
  }
145
70
 
146
71
  function JsonToCsv(_input, _config) {
147
- // Default configuration
148
-
149
- /** whether to surround every datum with quotes */
150
72
  var _quotes = false;
151
-
152
- /** whether to write headers */
153
73
  var _writeHeader = true;
154
-
155
- /** delimiting character(s) */
156
74
  var _delimiter = ',';
157
-
158
- /** newline character(s) */
159
75
  var _newline = '\r\n';
160
-
161
- /** quote character */
162
76
  var _quoteChar = '"';
163
77
 
164
- /** escaped quote character, either "" or <config.escapeChar>" */
165
78
  var _escapedQuote = _quoteChar + _quoteChar;
166
79
 
167
- /** whether to skip empty lines */
168
80
  var _skipEmptyLines = false;
169
-
170
- /** the columns (keys) we expect when we unparse objects */
171
81
  var _columns = null;
172
-
173
82
  unpackConfig();
174
-
175
83
  var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g');
176
-
177
84
  if (typeof _input === 'string') _input = JSON.parse(_input);
178
85
 
179
86
  if (Array.isArray(_input)) {
180
- if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines);
181
- else if (typeof _input[0] === 'object')
182
- return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines);
87
+ if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines);else if (typeof _input[0] === 'object') return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines);
183
88
  } else if (typeof _input === 'object') {
184
89
  if (typeof _input.data === 'string') _input.data = JSON.parse(_input.data);
185
90
 
186
91
  if (Array.isArray(_input.data)) {
187
92
  if (!_input.fields) _input.fields = _input.meta && _input.meta.fields;
188
-
189
- if (!_input.fields)
190
- _input.fields = Array.isArray(_input.data[0]) ? _input.fields : objectKeys(_input.data[0]);
191
-
192
- if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object')
193
- _input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
93
+ if (!_input.fields) _input.fields = Array.isArray(_input.data[0]) ? _input.fields : objectKeys(_input.data[0]);
94
+ if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object') _input.data = [_input.data];
194
95
  }
195
96
 
196
97
  return serialize(_input.fields || [], _input.data || [], _skipEmptyLines);
197
98
  }
198
99
 
199
- // Default (any valid paths should return before this)
200
100
  throw new Error('Unable to serialize unrecognized input');
201
101
 
202
102
  function unpackConfig() {
203
103
  if (typeof _config !== 'object') return;
204
104
 
205
- if (
206
- typeof _config.delimiter === 'string' &&
207
- !Papa.BAD_DELIMITERS.filter(function(value) {
208
- return _config.delimiter.indexOf(value) !== -1;
209
- }).length
210
- ) {
105
+ if (typeof _config.delimiter === 'string' && !Papa.BAD_DELIMITERS.filter(function (value) {
106
+ return _config.delimiter.indexOf(value) !== -1;
107
+ }).length) {
211
108
  _delimiter = _config.delimiter;
212
109
  }
213
110
 
214
- if (typeof _config.quotes === 'boolean' || Array.isArray(_config.quotes))
215
- _quotes = _config.quotes;
216
-
217
- if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string')
218
- _skipEmptyLines = _config.skipEmptyLines;
219
-
111
+ if (typeof _config.quotes === 'boolean' || Array.isArray(_config.quotes)) _quotes = _config.quotes;
112
+ if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string') _skipEmptyLines = _config.skipEmptyLines;
220
113
  if (typeof _config.newline === 'string') _newline = _config.newline;
221
-
222
114
  if (typeof _config.quoteChar === 'string') _quoteChar = _config.quoteChar;
223
-
224
115
  if (typeof _config.header === 'boolean') _writeHeader = _config.header;
225
116
 
226
117
  if (Array.isArray(_config.columns)) {
227
118
  if (_config.columns.length === 0) throw new Error('Option columns is empty');
228
-
229
119
  _columns = _config.columns;
230
120
  }
231
121
 
@@ -234,93 +124,82 @@ function JsonToCsv(_input, _config) {
234
124
  }
235
125
  }
236
126
 
237
- /** Turns an object's keys into an array */
238
127
  function objectKeys(obj) {
239
128
  if (typeof obj !== 'object') return [];
240
129
  var keys = [];
130
+
241
131
  for (var key in obj) keys.push(key);
132
+
242
133
  return keys;
243
134
  }
244
135
 
245
- /** The double for loop that iterates the data and writes out a CSV string including header row */
246
136
  function serialize(fields, data, skipEmptyLines) {
247
137
  var csv = '';
248
-
249
138
  if (typeof fields === 'string') fields = JSON.parse(fields);
250
139
  if (typeof data === 'string') data = JSON.parse(data);
251
-
252
140
  var hasHeader = Array.isArray(fields) && fields.length > 0;
253
141
  var dataKeyedByField = !Array.isArray(data[0]);
254
142
 
255
- // If there a header row, write it first
256
143
  if (hasHeader && _writeHeader) {
257
144
  for (var i = 0; i < fields.length; i++) {
258
145
  if (i > 0) csv += _delimiter;
259
146
  csv += safe(fields[i], i);
260
147
  }
148
+
261
149
  if (data.length > 0) csv += _newline;
262
150
  }
263
151
 
264
- // Then write out the data
265
152
  for (var row = 0; row < data.length; row++) {
266
153
  var maxCol = hasHeader ? fields.length : data[row].length;
267
-
268
154
  var emptyLine = false;
269
155
  var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0;
156
+
270
157
  if (skipEmptyLines && !hasHeader) {
271
- emptyLine =
272
- skipEmptyLines === 'greedy'
273
- ? data[row].join('').trim() === ''
274
- : data[row].length === 1 && data[row][0].length === 0;
158
+ emptyLine = skipEmptyLines === 'greedy' ? data[row].join('').trim() === '' : data[row].length === 1 && data[row][0].length === 0;
275
159
  }
160
+
276
161
  if (skipEmptyLines === 'greedy' && hasHeader) {
277
162
  var line = [];
163
+
278
164
  for (var c = 0; c < maxCol; c++) {
279
165
  var cx = dataKeyedByField ? fields[c] : c;
280
166
  line.push(data[row][cx]);
281
167
  }
168
+
282
169
  emptyLine = line.join('').trim() === '';
283
170
  }
171
+
284
172
  if (!emptyLine) {
285
173
  for (var col = 0; col < maxCol; col++) {
286
174
  if (col > 0 && !nullLine) csv += _delimiter;
287
175
  var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
288
176
  csv += safe(data[row][colIdx], col);
289
177
  }
290
- if (row < data.length - 1 && (!skipEmptyLines || (maxCol > 0 && !nullLine))) {
178
+
179
+ if (row < data.length - 1 && (!skipEmptyLines || maxCol > 0 && !nullLine)) {
291
180
  csv += _newline;
292
181
  }
293
182
  }
294
183
  }
184
+
295
185
  return csv;
296
186
  }
297
187
 
298
- /** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
299
188
  function safe(str, col) {
300
189
  if (typeof str === 'undefined' || str === null) return '';
301
-
302
190
  if (str.constructor === Date) return JSON.stringify(str).slice(1, 25);
303
-
304
191
  str = str.toString().replace(quoteCharRegex, _escapedQuote);
305
-
306
- var needsQuotes =
307
- (typeof _quotes === 'boolean' && _quotes) ||
308
- (Array.isArray(_quotes) && _quotes[col]) ||
309
- hasAny(str, Papa.BAD_DELIMITERS) ||
310
- str.indexOf(_delimiter) > -1 ||
311
- str.charAt(0) === ' ' ||
312
- str.charAt(str.length - 1) === ' ';
313
-
192
+ var needsQuotes = typeof _quotes === 'boolean' && _quotes || Array.isArray(_quotes) && _quotes[col] || hasAny(str, Papa.BAD_DELIMITERS) || str.indexOf(_delimiter) > -1 || str.charAt(0) === ' ' || str.charAt(str.length - 1) === ' ';
314
193
  return needsQuotes ? _quoteChar + str + _quoteChar : str;
315
194
  }
316
195
 
317
196
  function hasAny(str, substrings) {
318
197
  for (var i = 0; i < substrings.length; i++) if (str.indexOf(substrings[i]) > -1) return true;
198
+
319
199
  return false;
320
200
  }
321
201
  }
322
202
 
323
- /** ChunkStreamer is the base prototype for various streamer implementations. */
324
203
  function ChunkStreamer(config) {
325
204
  this._handle = null;
326
205
  this._finished = false;
@@ -339,22 +218,20 @@ function ChunkStreamer(config) {
339
218
  };
340
219
  replaceConfig.call(this, config);
341
220
 
342
- this.parseChunk = function(chunk, isFakeChunk) {
343
- // First chunk pre-processing
221
+ this.parseChunk = function (chunk, isFakeChunk) {
344
222
  if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
345
223
  var modifiedChunk = this._config.beforeFirstChunk(chunk);
224
+
346
225
  if (modifiedChunk !== undefined) chunk = modifiedChunk;
347
226
  }
348
- this.isFirstChunk = false;
349
227
 
350
- // Rejoin the line we likely just split in two by chunking the file
228
+ this.isFirstChunk = false;
351
229
  var aggregate = this._partialLine + chunk;
352
230
  this._partialLine = '';
353
231
 
354
232
  var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
355
233
 
356
234
  if (this._handle.paused() || this._handle.aborted()) return;
357
-
358
235
  var lastIndex = results.meta.cursor;
359
236
 
360
237
  if (!this._finished) {
@@ -363,18 +240,11 @@ function ChunkStreamer(config) {
363
240
  }
364
241
 
365
242
  if (results && results.data) this._rowCount += results.data.length;
243
+ var finishedIncludingPreview = this._finished || this._config.preview && this._rowCount >= this._config.preview;
366
244
 
367
- var finishedIncludingPreview =
368
- this._finished || (this._config.preview && this._rowCount >= this._config.preview);
369
-
370
- if (IS_PAPA_WORKER) {
371
- global.postMessage({
372
- results: results,
373
- workerId: Papa.WORKER_ID,
374
- finished: finishedIncludingPreview
375
- });
376
- } else if (isFunction(this._config.chunk) && !isFakeChunk) {
245
+ if (isFunction(this._config.chunk) && !isFakeChunk) {
377
246
  this._config.chunk(results, this._handle);
247
+
378
248
  if (this._handle.paused() || this._handle.aborted()) return;
379
249
  results = undefined;
380
250
  this._completeResults = undefined;
@@ -386,52 +256,41 @@ function ChunkStreamer(config) {
386
256
  this._completeResults.meta = results.meta;
387
257
  }
388
258
 
389
- if (
390
- !this._completed &&
391
- finishedIncludingPreview &&
392
- isFunction(this._config.complete) &&
393
- (!results || !results.meta.aborted)
394
- ) {
259
+ if (!this._completed && finishedIncludingPreview && isFunction(this._config.complete) && (!results || !results.meta.aborted)) {
395
260
  this._config.complete(this._completeResults, this._input);
261
+
396
262
  this._completed = true;
397
263
  }
398
264
 
399
265
  if (!finishedIncludingPreview && (!results || !results.meta.paused)) this._nextChunk();
400
-
401
266
  return results;
402
267
  };
403
268
 
404
- this._sendError = function(error) {
269
+ this._sendError = function (error) {
405
270
  if (isFunction(this._config.error)) this._config.error(error);
406
- else if (IS_PAPA_WORKER && this._config.error) {
407
- global.postMessage({
408
- workerId: Papa.WORKER_ID,
409
- error: error,
410
- finished: false
411
- });
412
- }
413
271
  };
414
272
 
415
273
  function replaceConfig(config) {
416
- // Deep-copy the config so we can edit it
417
274
  var configCopy = copy(config);
418
- configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
419
- if (!config.step && !config.chunk) configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
275
+ configCopy.chunkSize = parseInt(configCopy.chunkSize);
276
+ if (!config.step && !config.chunk) configCopy.chunkSize = null;
420
277
  this._handle = new ParserHandle(configCopy);
421
278
  this._handle.streamer = this;
422
- this._config = configCopy; // persist the copy to the caller
279
+ this._config = configCopy;
423
280
  }
424
281
  }
282
+
425
283
  function StringStreamer(config) {
426
284
  config = config || {};
427
285
  ChunkStreamer.call(this, config);
428
-
429
286
  var remaining;
430
- this.stream = function(s) {
287
+
288
+ this.stream = function (s) {
431
289
  remaining = s;
432
290
  return this._nextChunk();
433
291
  };
434
- this._nextChunk = function() {
292
+
293
+ this._nextChunk = function () {
435
294
  if (this._finished) return;
436
295
  var size = this._config.chunkSize;
437
296
  var chunk = size ? remaining.substr(0, size) : remaining;
@@ -440,26 +299,28 @@ function StringStreamer(config) {
440
299
  return this.parseChunk(chunk);
441
300
  };
442
301
  }
302
+
443
303
  StringStreamer.prototype = Object.create(StringStreamer.prototype);
444
304
  StringStreamer.prototype.constructor = StringStreamer;
445
305
 
446
- // Use one ParserHandle per entire CSV file or string
447
306
  function ParserHandle(_config) {
448
- // One goal is to minimize the use of regular expressions...
449
307
  var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
450
308
  var ISO_DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
451
-
452
309
  var self = this;
453
- var _stepCounter = 0; // Number of times step was called (number of rows parsed)
454
- var _rowCounter = 0; // Number of rows that have been parsed so far
455
- var _input; // The input being parsed
456
- var _parser; // The core parser being used
457
- var _paused = false; // Whether we are paused or not
458
- var _aborted = false; // Whether the parser has aborted or not
459
- var _delimiterError; // Temporary state between delimiter detection and processing results
460
- var _fields = []; // Fields are from the header row of the input, if there is one
310
+ var _stepCounter = 0;
311
+ var _rowCounter = 0;
312
+
313
+ var _input;
314
+
315
+ var _parser;
316
+
317
+ var _paused = false;
318
+ var _aborted = false;
319
+
320
+ var _delimiterError;
321
+
322
+ var _fields = [];
461
323
  var _results = {
462
- // The last results returned from the parser
463
324
  data: [],
464
325
  errors: [],
465
326
  meta: {}
@@ -467,45 +328,27 @@ function ParserHandle(_config) {
467
328
 
468
329
  if (isFunction(_config.step)) {
469
330
  var userStep = _config.step;
470
- _config.step = function(results) {
471
- _results = results;
472
-
473
- if (needsHeaderRow()) processResults();
474
- // only call user's step function after header row
475
- else {
476
- processResults();
477
331
 
478
- // It's possbile that this line was empty and there's no row here after all
479
- if (!_results.data || _results.data.length === 0) return;
480
-
481
- _stepCounter += results.data.length;
482
- if (_config.preview && _stepCounter > _config.preview) _parser.abort();
483
- else userStep(_results, self);
484
- }
332
+ _config.step = function (results) {
333
+ _results = results;
334
+ if (needsHeaderRow()) processResults();else {
335
+ processResults();
336
+ if (!_results.data || _results.data.length === 0) return;
337
+ _stepCounter += results.data.length;
338
+ if (_config.preview && _stepCounter > _config.preview) _parser.abort();else userStep(_results, self);
339
+ }
485
340
  };
486
341
  }
487
342
 
488
- /**
489
- * Parses input. Most users won't need, and shouldn't mess with, the baseIndex
490
- * and ignoreLastRow parameters. They are used by streamers (wrapper functions)
491
- * when an input comes in multiple chunks, like from a file.
492
- */
493
- this.parse = function(input, baseIndex, ignoreLastRow) {
343
+ this.parse = function (input, baseIndex, ignoreLastRow) {
494
344
  var quoteChar = _config.quoteChar || '"';
495
345
  if (!_config.newline) _config.newline = guessLineEndings(input, quoteChar);
496
-
497
346
  _delimiterError = false;
347
+
498
348
  if (!_config.delimiter) {
499
- var delimGuess = guessDelimiter(
500
- input,
501
- _config.newline,
502
- _config.skipEmptyLines,
503
- _config.comments,
504
- _config.delimitersToGuess
505
- );
506
- if (delimGuess.successful) _config.delimiter = delimGuess.bestDelimiter;
507
- else {
508
- _delimiterError = true; // add error after parsing (otherwise it would be overwritten)
349
+ var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess);
350
+ if (delimGuess.successful) _config.delimiter = delimGuess.bestDelimiter;else {
351
+ _delimiterError = true;
509
352
  _config.delimiter = Papa.DefaultDelimiter;
510
353
  }
511
354
  _results.meta.delimiter = _config.delimiter;
@@ -515,65 +358,68 @@ function ParserHandle(_config) {
515
358
  }
516
359
 
517
360
  var parserConfig = copy(_config);
518
- if (_config.preview && _config.header) parserConfig.preview++; // to compensate for header row
519
-
361
+ if (_config.preview && _config.header) parserConfig.preview++;
520
362
  _input = input;
521
363
  _parser = new Parser(parserConfig);
522
364
  _results = _parser.parse(_input, baseIndex, ignoreLastRow);
523
365
  processResults();
524
- return _paused ? {meta: {paused: true}} : _results || {meta: {paused: false}};
366
+ return _paused ? {
367
+ meta: {
368
+ paused: true
369
+ }
370
+ } : _results || {
371
+ meta: {
372
+ paused: false
373
+ }
374
+ };
525
375
  };
526
376
 
527
- this.paused = function() {
377
+ this.paused = function () {
528
378
  return _paused;
529
379
  };
530
380
 
531
- this.pause = function() {
381
+ this.pause = function () {
532
382
  _paused = true;
383
+
533
384
  _parser.abort();
385
+
534
386
  _input = _input.substr(_parser.getCharIndex());
535
387
  };
536
388
 
537
- this.resume = function() {
389
+ this.resume = function () {
538
390
  _paused = false;
539
391
  self.streamer.parseChunk(_input, true);
540
392
  };
541
393
 
542
- this.aborted = function() {
394
+ this.aborted = function () {
543
395
  return _aborted;
544
396
  };
545
397
 
546
- this.abort = function() {
398
+ this.abort = function () {
547
399
  _aborted = true;
400
+
548
401
  _parser.abort();
402
+
549
403
  _results.meta.aborted = true;
550
404
  if (isFunction(_config.complete)) _config.complete(_results);
551
405
  _input = '';
552
406
  };
553
407
 
554
408
  function testEmptyLine(s) {
555
- return _config.skipEmptyLines === 'greedy'
556
- ? s.join('').trim() === ''
557
- : s.length === 1 && s[0].length === 0;
409
+ return _config.skipEmptyLines === 'greedy' ? s.join('').trim() === '' : s.length === 1 && s[0].length === 0;
558
410
  }
559
411
 
560
412
  function processResults() {
561
413
  if (_results && _delimiterError) {
562
- addError(
563
- 'Delimiter',
564
- 'UndetectableDelimiter',
565
- "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'"
566
- );
414
+ addError('Delimiter', 'UndetectableDelimiter', "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'");
567
415
  _delimiterError = false;
568
416
  }
569
417
 
570
418
  if (_config.skipEmptyLines) {
571
- for (var i = 0; i < _results.data.length; i++)
572
- if (testEmptyLine(_results.data[i])) _results.data.splice(i--, 1);
419
+ for (var i = 0; i < _results.data.length; i++) if (testEmptyLine(_results.data[i])) _results.data.splice(i--, 1);
573
420
  }
574
421
 
575
422
  if (needsHeaderRow()) fillHeaderFields();
576
-
577
423
  return applyHeaderAndDynamicTypingAndTransformation();
578
424
  }
579
425
 
@@ -591,50 +437,40 @@ function ParserHandle(_config) {
591
437
  }
592
438
 
593
439
  if (Array.isArray(_results.data[0])) {
594
- for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
595
- _results.data[i].forEach(addHeder);
440
+ for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) _results.data[i].forEach(addHeder);
596
441
 
597
442
  _results.data.splice(0, 1);
598
- }
599
- // if _results.data[0] is not an array, we are in a step where _results.data is the row.
600
- else _results.data.forEach(addHeder);
443
+ } else _results.data.forEach(addHeder);
601
444
  }
602
445
 
603
446
  function shouldApplyDynamicTyping(field) {
604
- // Cache function values to avoid calling it for each row
605
447
  if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
606
448
  _config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
607
449
  }
450
+
608
451
  return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
609
452
  }
610
453
 
611
454
  function parseDynamic(field, value) {
612
455
  if (shouldApplyDynamicTyping(field)) {
613
- if (value === 'true' || value === 'TRUE') return true;
614
- else if (value === 'false' || value === 'FALSE') return false;
615
- else if (FLOAT.test(value)) return parseFloat(value);
616
- else if (ISO_DATE.test(value)) return new Date(value);
617
- else return value === '' ? null : value;
456
+ if (value === 'true' || value === 'TRUE') return true;else if (value === 'false' || value === 'FALSE') return false;else if (FLOAT.test(value)) return parseFloat(value);else if (ISO_DATE.test(value)) return new Date(value);else return value === '' ? null : value;
618
457
  }
458
+
619
459
  return value;
620
460
  }
621
461
 
622
462
  function applyHeaderAndDynamicTypingAndTransformation() {
623
- if (!_results || !_results.data || (!_config.header && !_config.dynamicTyping && !_config.transform))
624
- return _results;
463
+ if (!_results || !_results.data || !_config.header && !_config.dynamicTyping && !_config.transform) return _results;
625
464
 
626
465
  function processRow(rowSource, i) {
627
466
  var row = _config.header ? {} : [];
628
-
629
467
  var j;
468
+
630
469
  for (j = 0; j < rowSource.length; j++) {
631
470
  var field = j;
632
471
  var value = rowSource[j];
633
-
634
472
  if (_config.header) field = j >= _fields.length ? '__parsed_extra' : _fields[j];
635
-
636
473
  if (_config.transform) value = _config.transform(value, field);
637
-
638
474
  value = parseDynamic(field, value);
639
475
 
640
476
  if (field === '__parsed_extra') {
@@ -644,49 +480,34 @@ function ParserHandle(_config) {
644
480
  }
645
481
 
646
482
  if (_config.header) {
647
- if (j > _fields.length)
648
- addError(
649
- 'FieldMismatch',
650
- 'TooManyFields',
651
- 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j,
652
- _rowCounter + i
653
- );
654
- else if (j < _fields.length)
655
- addError(
656
- 'FieldMismatch',
657
- 'TooFewFields',
658
- 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j,
659
- _rowCounter + i
660
- );
483
+ if (j > _fields.length) addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);else if (j < _fields.length) addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
661
484
  }
662
485
 
663
486
  return row;
664
487
  }
665
488
 
666
489
  var incrementBy = 1;
490
+
667
491
  if (!_results.data[0] || Array.isArray(_results.data[0])) {
668
492
  _results.data = _results.data.map(processRow);
669
493
  incrementBy = _results.data.length;
670
494
  } else _results.data = processRow(_results.data, 0);
671
495
 
672
496
  if (_config.header && _results.meta) _results.meta.fields = _fields;
673
-
674
497
  _rowCounter += incrementBy;
675
498
  return _results;
676
499
  }
677
500
 
678
501
  function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
679
502
  var bestDelim, bestDelta, fieldCountPrevRow;
680
-
681
503
  delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
682
504
 
683
505
  for (var i = 0; i < delimitersToGuess.length; i++) {
684
506
  var delim = delimitersToGuess[i];
685
507
  var delta = 0,
686
- avgFieldCount = 0,
687
- emptyLinesCount = 0;
508
+ avgFieldCount = 0,
509
+ emptyLinesCount = 0;
688
510
  fieldCountPrevRow = undefined;
689
-
690
511
  var preview = new Parser({
691
512
  comments: comments,
692
513
  delimiter: delim,
@@ -699,6 +520,7 @@ function ParserHandle(_config) {
699
520
  emptyLinesCount++;
700
521
  continue;
701
522
  }
523
+
702
524
  var fieldCount = preview.data[j].length;
703
525
  avgFieldCount += fieldCount;
704
526
 
@@ -720,7 +542,6 @@ function ParserHandle(_config) {
720
542
  }
721
543
 
722
544
  _config.delimiter = bestDelim;
723
-
724
545
  return {
725
546
  successful: !!bestDelim,
726
547
  bestDelimiter: bestDelim
@@ -728,20 +549,15 @@ function ParserHandle(_config) {
728
549
  }
729
550
 
730
551
  function guessLineEndings(input, quoteChar) {
731
- input = input.substr(0, 1024 * 1024); // max length 1 MB
732
- // Replace all the text inside quotes
552
+ input = input.substr(0, 1024 * 1024);
733
553
  var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
734
554
  input = input.replace(re, '');
735
-
736
555
  var r = input.split('\r');
737
-
738
556
  var n = input.split('\n');
739
-
740
557
  var nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
741
-
742
558
  if (r.length === 1 || nAppearsFirst) return '\n';
743
-
744
559
  var numWithN = 0;
560
+
745
561
  for (var i = 0; i < r.length; i++) {
746
562
  if (r[i][0] === '\n') numWithN++;
747
563
  }
@@ -759,14 +575,11 @@ function ParserHandle(_config) {
759
575
  }
760
576
  }
761
577
 
762
- /** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
763
578
  function escapeRegExp(string) {
764
- return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
579
+ return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
765
580
  }
766
581
 
767
- /** The core parser implements speedy and correct CSV parsing */
768
582
  function Parser(config) {
769
- // Unpack the config object
770
583
  config = config || {};
771
584
  var delim = config.delimiter;
772
585
  var newline = config.newline;
@@ -775,73 +588,61 @@ function Parser(config) {
775
588
  var preview = config.preview;
776
589
  var fastMode = config.fastMode;
777
590
  var quoteChar;
778
- /** Allows for no quoteChar by setting quoteChar to undefined in config */
591
+
779
592
  if (config.quoteChar === undefined) {
780
593
  quoteChar = '"';
781
594
  } else {
782
595
  quoteChar = config.quoteChar;
783
596
  }
597
+
784
598
  var escapeChar = quoteChar;
599
+
785
600
  if (config.escapeChar !== undefined) {
786
601
  escapeChar = config.escapeChar;
787
602
  }
788
603
 
789
- // Delimiter must be valid
790
604
  if (typeof delim !== 'string' || Papa.BAD_DELIMITERS.indexOf(delim) > -1) delim = ',';
791
-
792
- // Comment character must be valid
793
- if (comments === delim) throw new Error('Comment character same as delimiter');
794
- else if (comments === true) comments = '#';
795
- else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1)
796
- comments = false;
797
-
798
- // Newline must be valid: \r, \n, or \r\n
605
+ if (comments === delim) throw new Error('Comment character same as delimiter');else if (comments === true) comments = '#';else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1) comments = false;
799
606
  if (newline !== '\n' && newline !== '\r' && newline !== '\r\n') newline = '\n';
800
-
801
- // We're gonna need these at the Parser scope
802
607
  var cursor = 0;
803
608
  var aborted = false;
804
609
 
805
- this.parse = function(input, baseIndex, ignoreLastRow) {
806
- // For some reason, in Chrome, this speeds things up (!?)
610
+ this.parse = function (input, baseIndex, ignoreLastRow) {
807
611
  if (typeof input !== 'string') throw new Error('Input must be a string');
808
-
809
- // We don't need to compute some of these every time parse() is called,
810
- // but having them in a more local scope seems to perform better
811
612
  var inputLen = input.length,
812
- delimLen = delim.length,
813
- newlineLen = newline.length,
814
- commentsLen = comments.length;
613
+ delimLen = delim.length,
614
+ newlineLen = newline.length,
615
+ commentsLen = comments.length;
815
616
  var stepIsFunction = isFunction(step);
816
-
817
- // Establish starting state
818
617
  cursor = 0;
819
618
  var data = [],
820
- errors = [],
821
- row = [],
822
- lastCursor = 0;
823
-
619
+ errors = [],
620
+ row = [],
621
+ lastCursor = 0;
824
622
  if (!input) return returnable();
825
623
 
826
- if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1)) {
624
+ if (fastMode || fastMode !== false && input.indexOf(quoteChar) === -1) {
827
625
  var rows = input.split(newline);
626
+
828
627
  for (var i = 0; i < rows.length; i++) {
829
628
  row = rows[i];
830
629
  cursor += row.length;
831
- if (i !== rows.length - 1) cursor += newline.length;
832
- else if (ignoreLastRow) return returnable();
630
+ if (i !== rows.length - 1) cursor += newline.length;else if (ignoreLastRow) return returnable();
833
631
  if (comments && row.substr(0, commentsLen) === comments) continue;
632
+
834
633
  if (stepIsFunction) {
835
634
  data = [];
836
635
  pushRow(row.split(delim));
837
636
  doStep();
838
637
  if (aborted) return returnable();
839
638
  } else pushRow(row.split(delim));
639
+
840
640
  if (preview && i >= preview) {
841
641
  data = data.slice(0, preview);
842
642
  return returnable(true);
843
643
  }
844
644
  }
645
+
845
646
  return returnable();
846
647
  }
847
648
 
@@ -850,62 +651,45 @@ function Parser(config) {
850
651
  var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
851
652
  var quoteSearch;
852
653
 
853
- // Parser loop
854
654
  for (;;) {
855
- // Field has opening quote
856
655
  if (input[cursor] === quoteChar) {
857
- // Start our search for the closing quote where the cursor is
858
656
  quoteSearch = cursor;
859
-
860
- // Skip the opening quote
861
657
  cursor++;
862
658
 
863
659
  for (;;) {
864
- // Find closing quote
865
660
  quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
866
661
 
867
- //No other quotes are found - no other delimiters
868
662
  if (quoteSearch === -1) {
869
663
  if (!ignoreLastRow) {
870
- // No closing quote... what a pity
871
664
  errors.push({
872
665
  type: 'Quotes',
873
666
  code: 'MissingQuotes',
874
667
  message: 'Quoted field unterminated',
875
- row: data.length, // row has yet to be inserted
668
+ row: data.length,
876
669
  index: cursor
877
670
  });
878
671
  }
672
+
879
673
  return finish();
880
674
  }
881
675
 
882
- // Closing quote at EOF
883
676
  if (quoteSearch === inputLen - 1) {
884
677
  var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
885
678
  return finish(value);
886
679
  }
887
680
 
888
- // If this quote is escaped, it's part of the data; skip it
889
- // If the quote character is the escape character, then check if the next character is the escape character
890
681
  if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar) {
891
682
  quoteSearch++;
892
683
  continue;
893
684
  }
894
685
 
895
- // If the quote character is not the escape character, then check if the previous character was the escape character
896
- if (
897
- quoteChar !== escapeChar &&
898
- quoteSearch !== 0 &&
899
- input[quoteSearch - 1] === escapeChar
900
- ) {
686
+ if (quoteChar !== escapeChar && quoteSearch !== 0 && input[quoteSearch - 1] === escapeChar) {
901
687
  continue;
902
688
  }
903
689
 
904
- // Check up to nextDelim or nextNewline, whichever is closest
905
690
  var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
906
691
  var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
907
692
 
908
- // Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
909
693
  if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) {
910
694
  row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
911
695
  cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
@@ -918,19 +702,15 @@ function Parser(config) {
918
702
  }
919
703
 
920
704
  if (preview && data.length >= preview) return returnable(true);
921
-
922
705
  break;
923
706
  }
924
707
 
925
708
  var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
926
709
 
927
- // Closing quote followed by newline or 'unnecessary spaces + newLine'
928
- if (
929
- input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline
930
- ) {
710
+ if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline) {
931
711
  row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
932
712
  saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
933
- nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
713
+ nextDelim = input.indexOf(delim, cursor);
934
714
 
935
715
  if (stepIsFunction) {
936
716
  doStep();
@@ -938,19 +718,16 @@ function Parser(config) {
938
718
  }
939
719
 
940
720
  if (preview && data.length >= preview) return returnable(true);
941
-
942
721
  break;
943
722
  }
944
723
 
945
- // Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
946
724
  errors.push({
947
725
  type: 'Quotes',
948
726
  code: 'InvalidQuotes',
949
727
  message: 'Trailing quote on quoted field is malformed',
950
- row: data.length, // row has yet to be inserted
728
+ row: data.length,
951
729
  index: cursor
952
730
  });
953
-
954
731
  quoteSearch++;
955
732
  continue;
956
733
  }
@@ -964,18 +741,14 @@ function Parser(config) {
964
741
  continue;
965
742
  }
966
743
 
967
- // Comment found at start of new line
968
744
  if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) {
969
- if (nextNewline === -1)
970
- // Comment ends at EOF
971
- return returnable();
745
+ if (nextNewline === -1) return returnable();
972
746
  cursor = nextNewline + newlineLen;
973
747
  nextNewline = input.indexOf(newline, cursor);
974
748
  nextDelim = input.indexOf(delim, cursor);
975
749
  continue;
976
750
  }
977
751
 
978
- // Next delimiter comes before next newline, so we've reached end of field
979
752
  if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) {
980
753
  row.push(input.substring(cursor, nextDelim));
981
754
  cursor = nextDelim + delimLen;
@@ -983,7 +756,6 @@ function Parser(config) {
983
756
  continue;
984
757
  }
985
758
 
986
- // End of row
987
759
  if (nextNewline !== -1) {
988
760
  row.push(input.substring(cursor, nextNewline));
989
761
  saveRow(nextNewline + newlineLen);
@@ -994,7 +766,6 @@ function Parser(config) {
994
766
  }
995
767
 
996
768
  if (preview && data.length >= preview) return returnable(true);
997
-
998
769
  continue;
999
770
  }
1000
771
 
@@ -1008,41 +779,30 @@ function Parser(config) {
1008
779
  lastCursor = cursor;
1009
780
  }
1010
781
 
1011
- /**
1012
- * checks if there are extra spaces after closing quote and given index without any text
1013
- * if Yes, returns the number of spaces
1014
- */
1015
782
  function extraSpaces(index) {
1016
783
  var spaceLength = 0;
784
+
1017
785
  if (index !== -1) {
1018
786
  var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index);
787
+
1019
788
  if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') {
1020
789
  spaceLength = textBetweenClosingQuoteAndIndex.length;
1021
790
  }
1022
791
  }
792
+
1023
793
  return spaceLength;
1024
794
  }
1025
795
 
1026
- /**
1027
- * Appends the remaining input from cursor to the end into
1028
- * row, saves the row, calls step, and returns the results.
1029
- */
1030
796
  function finish(value) {
1031
797
  if (ignoreLastRow) return returnable();
1032
798
  if (typeof value === 'undefined') value = input.substr(cursor);
1033
799
  row.push(value);
1034
- cursor = inputLen; // important in case parsing is paused
800
+ cursor = inputLen;
1035
801
  pushRow(row);
1036
802
  if (stepIsFunction) doStep();
1037
803
  return returnable();
1038
804
  }
1039
805
 
1040
- /**
1041
- * Appends the current row to the results. It sets the cursor
1042
- * to newCursor and finds the nextNewline. The caller should
1043
- * take care to execute user's step function and check for
1044
- * preview and end parsing if necessary.
1045
- */
1046
806
  function saveRow(newCursor) {
1047
807
  cursor = newCursor;
1048
808
  pushRow(row);
@@ -1050,7 +810,6 @@ function Parser(config) {
1050
810
  nextNewline = input.indexOf(newline, cursor);
1051
811
  }
1052
812
 
1053
- /** Returns an object with the results, errors, and meta. */
1054
813
  function returnable(stopped, step) {
1055
814
  var isStep = step || false;
1056
815
  return {
@@ -1066,7 +825,6 @@ function Parser(config) {
1066
825
  };
1067
826
  }
1068
827
 
1069
- /** Executes the user's step function and resets data & errors. */
1070
828
  function doStep() {
1071
829
  step(returnable(undefined, true));
1072
830
  data = [];
@@ -1074,13 +832,11 @@ function Parser(config) {
1074
832
  }
1075
833
  };
1076
834
 
1077
- /** Sets the abort flag */
1078
- this.abort = function() {
835
+ this.abort = function () {
1079
836
  aborted = true;
1080
837
  };
1081
838
 
1082
- /** Gets the cursor position */
1083
- this.getCharIndex = function() {
839
+ this.getCharIndex = function () {
1084
840
  return cursor;
1085
841
  };
1086
842
  }
@@ -1089,14 +845,16 @@ function notImplemented() {
1089
845
  throw new Error('Not implemented.');
1090
846
  }
1091
847
 
1092
- /** Makes a deep copy of an array or object (mostly) */
1093
848
  function copy(obj) {
1094
849
  if (typeof obj !== 'object' || obj === null) return obj;
1095
850
  var cpy = Array.isArray(obj) ? [] : {};
851
+
1096
852
  for (var key in obj) cpy[key] = copy(obj[key]);
853
+
1097
854
  return cpy;
1098
855
  }
1099
856
 
1100
857
  function isFunction(func) {
1101
858
  return typeof func === 'function';
1102
859
  }
860
+ //# sourceMappingURL=papaparse.js.map