@loaders.gl/csv 4.2.0-alpha.4 → 4.2.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,694 +1,933 @@
1
+ // @ts-nocheck
2
+ // This is a fork of papaparse
3
+ // https://github.com/mholt/PapaParse
1
4
  /* @license
2
5
  Papa Parse
3
6
  v5.0.0-beta.0
4
7
  https://github.com/mholt/PapaParse
5
8
  License: MIT
6
9
  */
10
+ // FORK SUMMARY:
11
+ // - Adopt ES6 exports
12
+ // - Implement new AsyncIteratorStreamer
13
+ // - Remove non Async Iterator streamers (can all be handled by new streamer)
14
+ // - Remove unused Worker support (loaders.gl worker system used instead)
15
+ // - Remove unused jQuery plugin support
16
+ /* eslint-disable */
7
17
  const BYTE_ORDER_MARK = '\ufeff';
8
18
  const Papa = {
9
- parse: CsvToJson,
10
- unparse: JsonToCsv,
11
- RECORD_SEP: String.fromCharCode(30),
12
- UNIT_SEP: String.fromCharCode(31),
13
- BYTE_ORDER_MARK,
14
- BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
15
- WORKERS_SUPPORTED: false,
16
- NODE_STREAM_INPUT: 1,
17
- LocalChunkSize: 1024 * 1024 * 10,
18
- RemoteChunkSize: 1024 * 1024 * 5,
19
- DefaultDelimiter: ',',
20
- Parser: Parser,
21
- ParserHandle: ParserHandle,
22
- ChunkStreamer: ChunkStreamer,
23
- StringStreamer: StringStreamer
19
+ parse: CsvToJson,
20
+ unparse: JsonToCsv,
21
+ RECORD_SEP: String.fromCharCode(30),
22
+ UNIT_SEP: String.fromCharCode(31),
23
+ BYTE_ORDER_MARK,
24
+ BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
25
+ WORKERS_SUPPORTED: false, // !IS_WORKER && !!globalThis.Worker
26
+ NODE_STREAM_INPUT: 1,
27
+ // Configurable chunk sizes for local and remote files, respectively
28
+ LocalChunkSize: 1024 * 1024 * 10, // 10 M,
29
+ RemoteChunkSize: 1024 * 1024 * 5, // 5 M,
30
+ DefaultDelimiter: ',', // Used if not specified and detection fail,
31
+ // Exposed for testing and development only
32
+ Parser: Parser,
33
+ ParserHandle: ParserHandle,
34
+ // BEGIN FORK
35
+ ChunkStreamer: ChunkStreamer,
36
+ StringStreamer: StringStreamer
24
37
  };
25
38
  export default Papa;
26
- function CsvToJson(_input, _config, UserDefinedStreamer) {
27
- _config = _config || {};
28
- var dynamicTyping = _config.dynamicTyping || false;
29
- if (isFunction(dynamicTyping)) {
30
- _config.dynamicTypingFunction = dynamicTyping;
31
- dynamicTyping = {};
32
- }
33
- _config.dynamicTyping = dynamicTyping;
34
- _config.transform = isFunction(_config.transform) ? _config.transform : false;
35
- if (_config.worker && Papa.WORKERS_SUPPORTED) {
36
- var w = newWorker();
37
- w.userStep = _config.step;
38
- w.userChunk = _config.chunk;
39
- w.userComplete = _config.complete;
40
- w.userError = _config.error;
41
- _config.step = isFunction(_config.step);
42
- _config.chunk = isFunction(_config.chunk);
43
- _config.complete = isFunction(_config.complete);
44
- _config.error = isFunction(_config.error);
45
- delete _config.worker;
46
- w.postMessage({
47
- input: _input,
48
- config: _config,
49
- workerId: w.id
50
- });
51
- return;
52
- }
53
- var streamer = null;
54
- if (typeof _input === 'string') {
55
- streamer = new StringStreamer(_config);
56
- }
57
- if (!streamer) {
58
- streamer = new UserDefinedStreamer(_config);
59
- }
60
- return streamer.stream(_input);
39
+ /*
40
+ Papa.NetworkStreamer = NetworkStreamer;
41
+ Papa.FileStreamer = FileStreamer;
42
+ Papa.ReadableStreamStreamer = ReadableStreamStreamer;
43
+ if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
44
+ Papa.DuplexStreamStreamer = DuplexStreamStreamer;
61
45
  }
62
- function JsonToCsv(_input, _config) {
63
- var _quotes = false;
64
- var _writeHeader = true;
65
- var _delimiter = ',';
66
- var _newline = '\r\n';
67
- var _quoteChar = '"';
68
- var _escapedQuote = _quoteChar + _quoteChar;
69
- var _skipEmptyLines = false;
70
- var _columns = null;
71
- unpackConfig();
72
- var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g');
73
- if (typeof _input === 'string') _input = JSON.parse(_input);
74
- if (Array.isArray(_input)) {
75
- if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines);else if (typeof _input[0] === 'object') return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines);
76
- } else if (typeof _input === 'object') {
77
- if (typeof _input.data === 'string') _input.data = JSON.parse(_input.data);
78
- if (Array.isArray(_input.data)) {
79
- if (!_input.fields) _input.fields = _input.meta && _input.meta.fields;
80
- if (!_input.fields) _input.fields = Array.isArray(_input.data[0]) ? _input.fields : objectKeys(_input.data[0]);
81
- if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object') _input.data = [_input.data];
46
+ */
47
+ // END FORK
48
+ // BEGIN FORK
49
+ // Adds an argument to papa.parse
50
+ // function CsvToJson(_input, _config)
51
+ function CsvToJson(_input, _config, UserDefinedStreamer // BEGIN FORK
52
+ ) {
53
+ _config = _config || {};
54
+ var dynamicTyping = _config.dynamicTyping || false;
55
+ if (isFunction(dynamicTyping)) {
56
+ _config.dynamicTypingFunction = dynamicTyping;
57
+ // Will be filled on first row call
58
+ dynamicTyping = {};
59
+ }
60
+ _config.dynamicTyping = dynamicTyping;
61
+ _config.transform = isFunction(_config.transform) ? _config.transform : false;
62
+ if (_config.worker && Papa.WORKERS_SUPPORTED) {
63
+ var w = newWorker();
64
+ w.userStep = _config.step;
65
+ w.userChunk = _config.chunk;
66
+ w.userComplete = _config.complete;
67
+ w.userError = _config.error;
68
+ _config.step = isFunction(_config.step);
69
+ _config.chunk = isFunction(_config.chunk);
70
+ _config.complete = isFunction(_config.complete);
71
+ _config.error = isFunction(_config.error);
72
+ delete _config.worker; // prevent infinite loop
73
+ w.postMessage({
74
+ input: _input,
75
+ config: _config,
76
+ workerId: w.id
77
+ });
78
+ return;
82
79
  }
83
- return serialize(_input.fields || [], _input.data || [], _skipEmptyLines);
84
- }
85
- throw new Error('Unable to serialize unrecognized input');
86
- function unpackConfig() {
87
- if (typeof _config !== 'object') return;
88
- if (typeof _config.delimiter === 'string' && !Papa.BAD_DELIMITERS.filter(function (value) {
89
- return _config.delimiter.indexOf(value) !== -1;
90
- }).length) {
91
- _delimiter = _config.delimiter;
80
+ var streamer = null;
81
+ /*
82
+ if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined') {
83
+ // create a node Duplex stream for use
84
+ // with .pipe
85
+ streamer = new DuplexStreamStreamer(_config);
86
+ return streamer.getStream();
87
+ } else
88
+ */
89
+ if (typeof _input === 'string') {
90
+ // if (_config.download) streamer = new NetworkStreamer(_config);
91
+ // else
92
+ streamer = new StringStreamer(_config);
92
93
  }
93
- if (typeof _config.quotes === 'boolean' || Array.isArray(_config.quotes)) _quotes = _config.quotes;
94
- if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string') _skipEmptyLines = _config.skipEmptyLines;
95
- if (typeof _config.newline === 'string') _newline = _config.newline;
96
- if (typeof _config.quoteChar === 'string') _quoteChar = _config.quoteChar;
97
- if (typeof _config.header === 'boolean') _writeHeader = _config.header;
98
- if (Array.isArray(_config.columns)) {
99
- if (_config.columns.length === 0) throw new Error('Option columns is empty');
100
- _columns = _config.columns;
94
+ /*
95
+ else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on)) {
96
+ streamer = new ReadableStreamStreamer(_config);
97
+ } else if ((globalThis.File && _input instanceof File) || _input instanceof Object)
98
+ // ...Safari. (see issue #106)
99
+ streamer = new FileStreamer(_config);
100
+ */
101
+ // BEGIN FORK
102
+ if (!streamer) {
103
+ streamer = new UserDefinedStreamer(_config);
101
104
  }
102
- if (_config.escapeChar !== undefined) {
103
- _escapedQuote = _config.escapeChar + _quoteChar;
105
+ // END FORK
106
+ return streamer.stream(_input);
107
+ }
108
+ function JsonToCsv(_input, _config) {
109
+ // Default configuration
110
+ /** whether to surround every datum with quotes */
111
+ var _quotes = false;
112
+ /** whether to write headers */
113
+ var _writeHeader = true;
114
+ /** delimiting character(s) */
115
+ var _delimiter = ',';
116
+ /** newline character(s) */
117
+ var _newline = '\r\n';
118
+ /** quote character */
119
+ var _quoteChar = '"';
120
+ /** escaped quote character, either "" or <config.escapeChar>" */
121
+ var _escapedQuote = _quoteChar + _quoteChar;
122
+ /** whether to skip empty lines */
123
+ var _skipEmptyLines = false;
124
+ /** the columns (keys) we expect when we unparse objects */
125
+ var _columns = null;
126
+ unpackConfig();
127
+ var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g');
128
+ if (typeof _input === 'string')
129
+ _input = JSON.parse(_input);
130
+ if (Array.isArray(_input)) {
131
+ if (!_input.length || Array.isArray(_input[0]))
132
+ return serialize(null, _input, _skipEmptyLines);
133
+ else if (typeof _input[0] === 'object')
134
+ return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines);
104
135
  }
105
- }
106
- function objectKeys(obj) {
107
- if (typeof obj !== 'object') return [];
108
- var keys = [];
109
- for (var key in obj) keys.push(key);
110
- return keys;
111
- }
112
- function serialize(fields, data, skipEmptyLines) {
113
- var csv = '';
114
- if (typeof fields === 'string') fields = JSON.parse(fields);
115
- if (typeof data === 'string') data = JSON.parse(data);
116
- var hasHeader = Array.isArray(fields) && fields.length > 0;
117
- var dataKeyedByField = !Array.isArray(data[0]);
118
- if (hasHeader && _writeHeader) {
119
- for (var i = 0; i < fields.length; i++) {
120
- if (i > 0) csv += _delimiter;
121
- csv += safe(fields[i], i);
122
- }
123
- if (data.length > 0) csv += _newline;
136
+ else if (typeof _input === 'object') {
137
+ if (typeof _input.data === 'string')
138
+ _input.data = JSON.parse(_input.data);
139
+ if (Array.isArray(_input.data)) {
140
+ if (!_input.fields)
141
+ _input.fields = _input.meta && _input.meta.fields;
142
+ if (!_input.fields)
143
+ _input.fields = Array.isArray(_input.data[0]) ? _input.fields : objectKeys(_input.data[0]);
144
+ if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object')
145
+ _input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
146
+ }
147
+ return serialize(_input.fields || [], _input.data || [], _skipEmptyLines);
124
148
  }
125
- for (var row = 0; row < data.length; row++) {
126
- var maxCol = hasHeader ? fields.length : data[row].length;
127
- var emptyLine = false;
128
- var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0;
129
- if (skipEmptyLines && !hasHeader) {
130
- emptyLine = skipEmptyLines === 'greedy' ? data[row].join('').trim() === '' : data[row].length === 1 && data[row][0].length === 0;
131
- }
132
- if (skipEmptyLines === 'greedy' && hasHeader) {
133
- var line = [];
134
- for (var c = 0; c < maxCol; c++) {
135
- var cx = dataKeyedByField ? fields[c] : c;
136
- line.push(data[row][cx]);
149
+ // Default (any valid paths should return before this)
150
+ throw new Error('Unable to serialize unrecognized input');
151
+ function unpackConfig() {
152
+ if (typeof _config !== 'object')
153
+ return;
154
+ if (typeof _config.delimiter === 'string' &&
155
+ !Papa.BAD_DELIMITERS.filter(function (value) {
156
+ return _config.delimiter.indexOf(value) !== -1;
157
+ }).length) {
158
+ _delimiter = _config.delimiter;
137
159
  }
138
- emptyLine = line.join('').trim() === '';
139
- }
140
- if (!emptyLine) {
141
- for (var col = 0; col < maxCol; col++) {
142
- if (col > 0 && !nullLine) csv += _delimiter;
143
- var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
144
- csv += safe(data[row][colIdx], col);
160
+ if (typeof _config.quotes === 'boolean' || Array.isArray(_config.quotes))
161
+ _quotes = _config.quotes;
162
+ if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string')
163
+ _skipEmptyLines = _config.skipEmptyLines;
164
+ if (typeof _config.newline === 'string')
165
+ _newline = _config.newline;
166
+ if (typeof _config.quoteChar === 'string')
167
+ _quoteChar = _config.quoteChar;
168
+ if (typeof _config.header === 'boolean')
169
+ _writeHeader = _config.header;
170
+ if (Array.isArray(_config.columns)) {
171
+ if (_config.columns.length === 0)
172
+ throw new Error('Option columns is empty');
173
+ _columns = _config.columns;
145
174
  }
146
- if (row < data.length - 1 && (!skipEmptyLines || maxCol > 0 && !nullLine)) {
147
- csv += _newline;
175
+ if (_config.escapeChar !== undefined) {
176
+ _escapedQuote = _config.escapeChar + _quoteChar;
148
177
  }
149
- }
150
178
  }
151
- return csv;
152
- }
153
- function safe(str, col) {
154
- if (typeof str === 'undefined' || str === null) return '';
155
- if (str.constructor === Date) return JSON.stringify(str).slice(1, 25);
156
- str = str.toString().replace(quoteCharRegex, _escapedQuote);
157
- var needsQuotes = typeof _quotes === 'boolean' && _quotes || Array.isArray(_quotes) && _quotes[col] || hasAny(str, Papa.BAD_DELIMITERS) || str.indexOf(_delimiter) > -1 || str.charAt(0) === ' ' || str.charAt(str.length - 1) === ' ';
158
- return needsQuotes ? _quoteChar + str + _quoteChar : str;
159
- }
160
- function hasAny(str, substrings) {
161
- for (var i = 0; i < substrings.length; i++) if (str.indexOf(substrings[i]) > -1) return true;
162
- return false;
163
- }
164
- }
165
- function ChunkStreamer(config) {
166
- this._handle = null;
167
- this._finished = false;
168
- this._completed = false;
169
- this._input = null;
170
- this._baseIndex = 0;
171
- this._partialLine = '';
172
- this._rowCount = 0;
173
- this._start = 0;
174
- this._nextChunk = null;
175
- this.isFirstChunk = true;
176
- this._completeResults = {
177
- data: [],
178
- errors: [],
179
- meta: {}
180
- };
181
- replaceConfig.call(this, config);
182
- this.parseChunk = function (chunk, isFakeChunk) {
183
- if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
184
- var modifiedChunk = this._config.beforeFirstChunk(chunk);
185
- if (modifiedChunk !== undefined) chunk = modifiedChunk;
179
+ /** Turns an object's keys into an array */
180
+ function objectKeys(obj) {
181
+ if (typeof obj !== 'object')
182
+ return [];
183
+ var keys = [];
184
+ for (var key in obj)
185
+ keys.push(key);
186
+ return keys;
186
187
  }
187
- this.isFirstChunk = false;
188
- var aggregate = this._partialLine + chunk;
189
- this._partialLine = '';
190
- var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
191
- if (this._handle.paused() || this._handle.aborted()) return;
192
- var lastIndex = results.meta.cursor;
193
- if (!this._finished) {
194
- this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
195
- this._baseIndex = lastIndex;
188
+ /** The double for loop that iterates the data and writes out a CSV string including header row */
189
+ function serialize(fields, data, skipEmptyLines) {
190
+ var csv = '';
191
+ if (typeof fields === 'string')
192
+ fields = JSON.parse(fields);
193
+ if (typeof data === 'string')
194
+ data = JSON.parse(data);
195
+ var hasHeader = Array.isArray(fields) && fields.length > 0;
196
+ var dataKeyedByField = !Array.isArray(data[0]);
197
+ // If there a header row, write it first
198
+ if (hasHeader && _writeHeader) {
199
+ for (var i = 0; i < fields.length; i++) {
200
+ if (i > 0)
201
+ csv += _delimiter;
202
+ csv += safe(fields[i], i);
203
+ }
204
+ if (data.length > 0)
205
+ csv += _newline;
206
+ }
207
+ // Then write out the data
208
+ for (var row = 0; row < data.length; row++) {
209
+ var maxCol = hasHeader ? fields.length : data[row].length;
210
+ var emptyLine = false;
211
+ var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0;
212
+ if (skipEmptyLines && !hasHeader) {
213
+ emptyLine =
214
+ skipEmptyLines === 'greedy'
215
+ ? data[row].join('').trim() === ''
216
+ : data[row].length === 1 && data[row][0].length === 0;
217
+ }
218
+ if (skipEmptyLines === 'greedy' && hasHeader) {
219
+ var line = [];
220
+ for (var c = 0; c < maxCol; c++) {
221
+ var cx = dataKeyedByField ? fields[c] : c;
222
+ line.push(data[row][cx]);
223
+ }
224
+ emptyLine = line.join('').trim() === '';
225
+ }
226
+ if (!emptyLine) {
227
+ for (var col = 0; col < maxCol; col++) {
228
+ if (col > 0 && !nullLine)
229
+ csv += _delimiter;
230
+ var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
231
+ csv += safe(data[row][colIdx], col);
232
+ }
233
+ if (row < data.length - 1 && (!skipEmptyLines || (maxCol > 0 && !nullLine))) {
234
+ csv += _newline;
235
+ }
236
+ }
237
+ }
238
+ return csv;
196
239
  }
197
- if (results && results.data) this._rowCount += results.data.length;
198
- var finishedIncludingPreview = this._finished || this._config.preview && this._rowCount >= this._config.preview;
199
- if (isFunction(this._config.chunk) && !isFakeChunk) {
200
- this._config.chunk(results, this._handle);
201
- if (this._handle.paused() || this._handle.aborted()) return;
202
- results = undefined;
203
- this._completeResults = undefined;
240
+ /** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
241
+ function safe(str, col) {
242
+ if (typeof str === 'undefined' || str === null)
243
+ return '';
244
+ if (str.constructor === Date)
245
+ return JSON.stringify(str).slice(1, 25);
246
+ str = str.toString().replace(quoteCharRegex, _escapedQuote);
247
+ var needsQuotes = (typeof _quotes === 'boolean' && _quotes) ||
248
+ (Array.isArray(_quotes) && _quotes[col]) ||
249
+ hasAny(str, Papa.BAD_DELIMITERS) ||
250
+ str.indexOf(_delimiter) > -1 ||
251
+ str.charAt(0) === ' ' ||
252
+ str.charAt(str.length - 1) === ' ';
253
+ return needsQuotes ? _quoteChar + str + _quoteChar : str;
204
254
  }
205
- if (!this._config.step && !this._config.chunk) {
206
- this._completeResults.data = this._completeResults.data.concat(results.data);
207
- this._completeResults.errors = this._completeResults.errors.concat(results.errors);
208
- this._completeResults.meta = results.meta;
255
+ function hasAny(str, substrings) {
256
+ for (var i = 0; i < substrings.length; i++)
257
+ if (str.indexOf(substrings[i]) > -1)
258
+ return true;
259
+ return false;
209
260
  }
210
- if (!this._completed && finishedIncludingPreview && isFunction(this._config.complete) && (!results || !results.meta.aborted)) {
211
- this._config.complete(this._completeResults, this._input);
212
- this._completed = true;
261
+ }
262
+ /** ChunkStreamer is the base prototype for various streamer implementations. */
263
+ function ChunkStreamer(config) {
264
+ this._handle = null;
265
+ this._finished = false;
266
+ this._completed = false;
267
+ this._input = null;
268
+ this._baseIndex = 0;
269
+ this._partialLine = '';
270
+ this._rowCount = 0;
271
+ this._start = 0;
272
+ this._nextChunk = null;
273
+ this.isFirstChunk = true;
274
+ this._completeResults = {
275
+ data: [],
276
+ errors: [],
277
+ meta: {}
278
+ };
279
+ replaceConfig.call(this, config);
280
+ this.parseChunk = function (chunk, isFakeChunk) {
281
+ // First chunk pre-processing
282
+ if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
283
+ var modifiedChunk = this._config.beforeFirstChunk(chunk);
284
+ if (modifiedChunk !== undefined)
285
+ chunk = modifiedChunk;
286
+ }
287
+ this.isFirstChunk = false;
288
+ // Rejoin the line we likely just split in two by chunking the file
289
+ var aggregate = this._partialLine + chunk;
290
+ this._partialLine = '';
291
+ var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
292
+ if (this._handle.paused() || this._handle.aborted())
293
+ return;
294
+ var lastIndex = results.meta.cursor;
295
+ if (!this._finished) {
296
+ this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
297
+ this._baseIndex = lastIndex;
298
+ }
299
+ if (results && results.data)
300
+ this._rowCount += results.data.length;
301
+ var finishedIncludingPreview = this._finished || (this._config.preview && this._rowCount >= this._config.preview);
302
+ if (isFunction(this._config.chunk) && !isFakeChunk) {
303
+ this._config.chunk(results, this._handle);
304
+ if (this._handle.paused() || this._handle.aborted())
305
+ return;
306
+ results = undefined;
307
+ this._completeResults = undefined;
308
+ }
309
+ if (!this._config.step && !this._config.chunk) {
310
+ this._completeResults.data = this._completeResults.data.concat(results.data);
311
+ this._completeResults.errors = this._completeResults.errors.concat(results.errors);
312
+ this._completeResults.meta = results.meta;
313
+ }
314
+ if (!this._completed &&
315
+ finishedIncludingPreview &&
316
+ isFunction(this._config.complete) &&
317
+ (!results || !results.meta.aborted)) {
318
+ this._config.complete(this._completeResults, this._input);
319
+ this._completed = true;
320
+ }
321
+ if (!finishedIncludingPreview && (!results || !results.meta.paused))
322
+ this._nextChunk();
323
+ return results;
324
+ };
325
+ this._sendError = function (error) {
326
+ if (isFunction(this._config.error))
327
+ this._config.error(error);
328
+ };
329
+ function replaceConfig(config) {
330
+ // Deep-copy the config so we can edit it
331
+ var configCopy = copy(config);
332
+ configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
333
+ if (!config.step && !config.chunk)
334
+ configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
335
+ this._handle = new ParserHandle(configCopy);
336
+ this._handle.streamer = this;
337
+ this._config = configCopy; // persist the copy to the caller
213
338
  }
214
- if (!finishedIncludingPreview && (!results || !results.meta.paused)) this._nextChunk();
215
- return results;
216
- };
217
- this._sendError = function (error) {
218
- if (isFunction(this._config.error)) this._config.error(error);
219
- };
220
- function replaceConfig(config) {
221
- var configCopy = copy(config);
222
- configCopy.chunkSize = parseInt(configCopy.chunkSize);
223
- if (!config.step && !config.chunk) configCopy.chunkSize = null;
224
- this._handle = new ParserHandle(configCopy);
225
- this._handle.streamer = this;
226
- this._config = configCopy;
227
- }
228
339
  }
229
340
  function StringStreamer(config) {
230
- config = config || {};
231
- ChunkStreamer.call(this, config);
232
- var remaining;
233
- this.stream = function (s) {
234
- remaining = s;
235
- return this._nextChunk();
236
- };
237
- this._nextChunk = function () {
238
- if (this._finished) return;
239
- var size = this._config.chunkSize;
240
- var chunk = size ? remaining.substr(0, size) : remaining;
241
- remaining = size ? remaining.substr(size) : '';
242
- this._finished = !remaining;
243
- return this.parseChunk(chunk);
244
- };
341
+ config = config || {};
342
+ ChunkStreamer.call(this, config);
343
+ var remaining;
344
+ this.stream = function (s) {
345
+ remaining = s;
346
+ return this._nextChunk();
347
+ };
348
+ this._nextChunk = function () {
349
+ if (this._finished)
350
+ return;
351
+ var size = this._config.chunkSize;
352
+ var chunk = size ? remaining.substr(0, size) : remaining;
353
+ remaining = size ? remaining.substr(size) : '';
354
+ this._finished = !remaining;
355
+ return this.parseChunk(chunk);
356
+ };
245
357
  }
246
358
  StringStreamer.prototype = Object.create(StringStreamer.prototype);
247
359
  StringStreamer.prototype.constructor = StringStreamer;
360
+ // Use one ParserHandle per entire CSV file or string
248
361
  function ParserHandle(_config) {
249
- var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
250
- var ISO_DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
251
- var self = this;
252
- var _stepCounter = 0;
253
- var _rowCounter = 0;
254
- var _input;
255
- var _parser;
256
- var _paused = false;
257
- var _aborted = false;
258
- var _delimiterError;
259
- var _fields = [];
260
- var _results = {
261
- data: [],
262
- errors: [],
263
- meta: {}
264
- };
265
- if (isFunction(_config.step)) {
266
- var userStep = _config.step;
267
- _config.step = function (results) {
268
- _results = results;
269
- if (needsHeaderRow()) processResults();else {
270
- processResults();
271
- if (!_results.data || _results.data.length === 0) return;
272
- _stepCounter += results.data.length;
273
- if (_config.preview && _stepCounter > _config.preview) _parser.abort();else userStep(_results, self);
274
- }
362
+ // One goal is to minimize the use of regular expressions...
363
+ var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
364
+ var ISO_DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
365
+ var self = this;
366
+ var _stepCounter = 0; // Number of times step was called (number of rows parsed)
367
+ var _rowCounter = 0; // Number of rows that have been parsed so far
368
+ var _input; // The input being parsed
369
+ var _parser; // The core parser being used
370
+ var _paused = false; // Whether we are paused or not
371
+ var _aborted = false; // Whether the parser has aborted or not
372
+ var _delimiterError; // Temporary state between delimiter detection and processing results
373
+ var _fields = []; // Fields are from the header row of the input, if there is one
374
+ var _results = {
375
+ // The last results returned from the parser
376
+ data: [],
377
+ errors: [],
378
+ meta: {}
275
379
  };
276
- }
277
- this.parse = function (input, baseIndex, ignoreLastRow) {
278
- var quoteChar = _config.quoteChar || '"';
279
- if (!_config.newline) _config.newline = guessLineEndings(input, quoteChar);
280
- _delimiterError = false;
281
- if (!_config.delimiter) {
282
- var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess);
283
- if (delimGuess.successful) _config.delimiter = delimGuess.bestDelimiter;else {
284
- _delimiterError = true;
285
- _config.delimiter = Papa.DefaultDelimiter;
286
- }
287
- _results.meta.delimiter = _config.delimiter;
288
- } else if (isFunction(_config.delimiter)) {
289
- _config.delimiter = _config.delimiter(input);
290
- _results.meta.delimiter = _config.delimiter;
380
+ if (isFunction(_config.step)) {
381
+ var userStep = _config.step;
382
+ _config.step = function (results) {
383
+ _results = results;
384
+ if (needsHeaderRow())
385
+ processResults();
386
+ // only call user's step function after header row
387
+ else {
388
+ processResults();
389
+ // It's possbile that this line was empty and there's no row here after all
390
+ if (!_results.data || _results.data.length === 0)
391
+ return;
392
+ _stepCounter += results.data.length;
393
+ if (_config.preview && _stepCounter > _config.preview)
394
+ _parser.abort();
395
+ else
396
+ userStep(_results, self);
397
+ }
398
+ };
291
399
  }
292
- var parserConfig = copy(_config);
293
- if (_config.preview && _config.header) parserConfig.preview++;
294
- _input = input;
295
- _parser = new Parser(parserConfig);
296
- _results = _parser.parse(_input, baseIndex, ignoreLastRow);
297
- processResults();
298
- return _paused ? {
299
- meta: {
300
- paused: true
301
- }
302
- } : _results || {
303
- meta: {
304
- paused: false
305
- }
400
+ /**
401
+ * Parses input. Most users won't need, and shouldn't mess with, the baseIndex
402
+ * and ignoreLastRow parameters. They are used by streamers (wrapper functions)
403
+ * when an input comes in multiple chunks, like from a file.
404
+ */
405
+ this.parse = function (input, baseIndex, ignoreLastRow) {
406
+ var quoteChar = _config.quoteChar || '"';
407
+ if (!_config.newline)
408
+ _config.newline = guessLineEndings(input, quoteChar);
409
+ _delimiterError = false;
410
+ if (!_config.delimiter) {
411
+ var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess);
412
+ if (delimGuess.successful)
413
+ _config.delimiter = delimGuess.bestDelimiter;
414
+ else {
415
+ _delimiterError = true; // add error after parsing (otherwise it would be overwritten)
416
+ _config.delimiter = Papa.DefaultDelimiter;
417
+ }
418
+ _results.meta.delimiter = _config.delimiter;
419
+ }
420
+ else if (isFunction(_config.delimiter)) {
421
+ _config.delimiter = _config.delimiter(input);
422
+ _results.meta.delimiter = _config.delimiter;
423
+ }
424
+ var parserConfig = copy(_config);
425
+ if (_config.preview && _config.header)
426
+ parserConfig.preview++; // to compensate for header row
427
+ _input = input;
428
+ _parser = new Parser(parserConfig);
429
+ _results = _parser.parse(_input, baseIndex, ignoreLastRow);
430
+ processResults();
431
+ return _paused ? { meta: { paused: true } } : _results || { meta: { paused: false } };
432
+ };
433
+ this.paused = function () {
434
+ return _paused;
435
+ };
436
+ this.pause = function () {
437
+ _paused = true;
438
+ _parser.abort();
439
+ _input = _input.substr(_parser.getCharIndex());
440
+ };
441
+ this.resume = function () {
442
+ _paused = false;
443
+ self.streamer.parseChunk(_input, true);
444
+ };
445
+ this.aborted = function () {
446
+ return _aborted;
306
447
  };
307
- };
308
- this.paused = function () {
309
- return _paused;
310
- };
311
- this.pause = function () {
312
- _paused = true;
313
- _parser.abort();
314
- _input = _input.substr(_parser.getCharIndex());
315
- };
316
- this.resume = function () {
317
- _paused = false;
318
- self.streamer.parseChunk(_input, true);
319
- };
320
- this.aborted = function () {
321
- return _aborted;
322
- };
323
- this.abort = function () {
324
- _aborted = true;
325
- _parser.abort();
326
- _results.meta.aborted = true;
327
- if (isFunction(_config.complete)) _config.complete(_results);
328
- _input = '';
329
- };
330
- function testEmptyLine(s) {
331
- return _config.skipEmptyLines === 'greedy' ? s.join('').trim() === '' : s.length === 1 && s[0].length === 0;
332
- }
333
- function processResults() {
334
- if (_results && _delimiterError) {
335
- addError('Delimiter', 'UndetectableDelimiter', "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'");
336
- _delimiterError = false;
448
+ this.abort = function () {
449
+ _aborted = true;
450
+ _parser.abort();
451
+ _results.meta.aborted = true;
452
+ if (isFunction(_config.complete))
453
+ _config.complete(_results);
454
+ _input = '';
455
+ };
456
+ function testEmptyLine(s) {
457
+ return _config.skipEmptyLines === 'greedy'
458
+ ? s.join('').trim() === ''
459
+ : s.length === 1 && s[0].length === 0;
337
460
  }
338
- if (_config.skipEmptyLines) {
339
- for (var i = 0; i < _results.data.length; i++) if (testEmptyLine(_results.data[i])) _results.data.splice(i--, 1);
461
+ function processResults() {
462
+ if (_results && _delimiterError) {
463
+ addError('Delimiter', 'UndetectableDelimiter', "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'");
464
+ _delimiterError = false;
465
+ }
466
+ if (_config.skipEmptyLines) {
467
+ for (var i = 0; i < _results.data.length; i++)
468
+ if (testEmptyLine(_results.data[i]))
469
+ _results.data.splice(i--, 1);
470
+ }
471
+ if (needsHeaderRow())
472
+ fillHeaderFields();
473
+ return applyHeaderAndDynamicTypingAndTransformation();
340
474
  }
341
- if (needsHeaderRow()) fillHeaderFields();
342
- return applyHeaderAndDynamicTypingAndTransformation();
343
- }
344
- function needsHeaderRow() {
345
- return _config.header && _fields.length === 0;
346
- }
347
- function fillHeaderFields() {
348
- if (!_results) return;
349
- function addHeder(header) {
350
- if (isFunction(_config.transformHeader)) header = _config.transformHeader(header);
351
- _fields.push(header);
475
+ function needsHeaderRow() {
476
+ return _config.header && _fields.length === 0;
352
477
  }
353
- if (Array.isArray(_results.data[0])) {
354
- for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) _results.data[i].forEach(addHeder);
355
- _results.data.splice(0, 1);
356
- } else _results.data.forEach(addHeder);
357
- }
358
- function shouldApplyDynamicTyping(field) {
359
- if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
360
- _config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
478
+ function fillHeaderFields() {
479
+ if (!_results)
480
+ return;
481
+ function addHeder(header) {
482
+ if (isFunction(_config.transformHeader))
483
+ header = _config.transformHeader(header);
484
+ _fields.push(header);
485
+ }
486
+ if (Array.isArray(_results.data[0])) {
487
+ for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
488
+ _results.data[i].forEach(addHeder);
489
+ _results.data.splice(0, 1);
490
+ }
491
+ // if _results.data[0] is not an array, we are in a step where _results.data is the row.
492
+ else
493
+ _results.data.forEach(addHeder);
361
494
  }
362
- return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
363
- }
364
- function parseDynamic(field, value) {
365
- if (shouldApplyDynamicTyping(field)) {
366
- if (value === 'true' || value === 'TRUE') return true;else if (value === 'false' || value === 'FALSE') return false;else if (FLOAT.test(value)) return parseFloat(value);else if (ISO_DATE.test(value)) return new Date(value);else return value === '' ? null : value;
495
+ function shouldApplyDynamicTyping(field) {
496
+ // Cache function values to avoid calling it for each row
497
+ if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
498
+ _config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
499
+ }
500
+ return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
367
501
  }
368
- return value;
369
- }
370
- function applyHeaderAndDynamicTypingAndTransformation() {
371
- if (!_results || !_results.data || !_config.header && !_config.dynamicTyping && !_config.transform) return _results;
372
- function processRow(rowSource, i) {
373
- var row = _config.header ? {} : [];
374
- var j;
375
- for (j = 0; j < rowSource.length; j++) {
376
- var field = j;
377
- var value = rowSource[j];
378
- if (_config.header) field = j >= _fields.length ? '__parsed_extra' : _fields[j];
379
- if (_config.transform) value = _config.transform(value, field);
380
- value = parseDynamic(field, value);
381
- if (field === '__parsed_extra') {
382
- row[field] = row[field] || [];
383
- row[field].push(value);
384
- } else row[field] = value;
385
- }
386
- if (_config.header) {
387
- if (j > _fields.length) addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);else if (j < _fields.length) addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
388
- }
389
- return row;
502
+ function parseDynamic(field, value) {
503
+ if (shouldApplyDynamicTyping(field)) {
504
+ if (value === 'true' || value === 'TRUE')
505
+ return true;
506
+ else if (value === 'false' || value === 'FALSE')
507
+ return false;
508
+ else if (FLOAT.test(value))
509
+ return parseFloat(value);
510
+ else if (ISO_DATE.test(value))
511
+ return new Date(value);
512
+ else
513
+ return value === '' ? null : value;
514
+ }
515
+ return value;
390
516
  }
391
- var incrementBy = 1;
392
- if (!_results.data[0] || Array.isArray(_results.data[0])) {
393
- _results.data = _results.data.map(processRow);
394
- incrementBy = _results.data.length;
395
- } else _results.data = processRow(_results.data, 0);
396
- if (_config.header && _results.meta) _results.meta.fields = _fields;
397
- _rowCounter += incrementBy;
398
- return _results;
399
- }
400
- function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
401
- var bestDelim, bestDelta, fieldCountPrevRow;
402
- delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
403
- for (var i = 0; i < delimitersToGuess.length; i++) {
404
- var delim = delimitersToGuess[i];
405
- var delta = 0,
406
- avgFieldCount = 0,
407
- emptyLinesCount = 0;
408
- fieldCountPrevRow = undefined;
409
- var preview = new Parser({
410
- comments: comments,
411
- delimiter: delim,
412
- newline: newline,
413
- preview: 10
414
- }).parse(input);
415
- for (var j = 0; j < preview.data.length; j++) {
416
- if (skipEmptyLines && testEmptyLine(preview.data[j])) {
417
- emptyLinesCount++;
418
- continue;
517
+ function applyHeaderAndDynamicTypingAndTransformation() {
518
+ if (!_results ||
519
+ !_results.data ||
520
+ (!_config.header && !_config.dynamicTyping && !_config.transform))
521
+ return _results;
522
+ function processRow(rowSource, i) {
523
+ var row = _config.header ? {} : [];
524
+ var j;
525
+ for (j = 0; j < rowSource.length; j++) {
526
+ var field = j;
527
+ var value = rowSource[j];
528
+ if (_config.header)
529
+ field = j >= _fields.length ? '__parsed_extra' : _fields[j];
530
+ if (_config.transform)
531
+ value = _config.transform(value, field);
532
+ value = parseDynamic(field, value);
533
+ if (field === '__parsed_extra') {
534
+ row[field] = row[field] || [];
535
+ row[field].push(value);
536
+ }
537
+ else
538
+ row[field] = value;
539
+ }
540
+ if (_config.header) {
541
+ if (j > _fields.length)
542
+ addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
543
+ else if (j < _fields.length)
544
+ addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
545
+ }
546
+ return row;
419
547
  }
420
- var fieldCount = preview.data[j].length;
421
- avgFieldCount += fieldCount;
422
- if (typeof fieldCountPrevRow === 'undefined') {
423
- fieldCountPrevRow = 0;
424
- continue;
425
- } else if (fieldCount > 1) {
426
- delta += Math.abs(fieldCount - fieldCountPrevRow);
427
- fieldCountPrevRow = fieldCount;
548
+ var incrementBy = 1;
549
+ if (!_results.data[0] || Array.isArray(_results.data[0])) {
550
+ _results.data = _results.data.map(processRow);
551
+ incrementBy = _results.data.length;
428
552
  }
429
- }
430
- if (preview.data.length > 0) avgFieldCount /= preview.data.length - emptyLinesCount;
431
- if ((typeof bestDelta === 'undefined' || delta > bestDelta) && avgFieldCount > 1.99) {
432
- bestDelta = delta;
433
- bestDelim = delim;
434
- }
553
+ else
554
+ _results.data = processRow(_results.data, 0);
555
+ if (_config.header && _results.meta)
556
+ _results.meta.fields = _fields;
557
+ _rowCounter += incrementBy;
558
+ return _results;
435
559
  }
436
- _config.delimiter = bestDelim;
437
- return {
438
- successful: !!bestDelim,
439
- bestDelimiter: bestDelim
440
- };
441
- }
442
- function guessLineEndings(input, quoteChar) {
443
- input = input.substr(0, 1024 * 1024);
444
- var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
445
- input = input.replace(re, '');
446
- var r = input.split('\r');
447
- var n = input.split('\n');
448
- var nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
449
- if (r.length === 1 || nAppearsFirst) return '\n';
450
- var numWithN = 0;
451
- for (var i = 0; i < r.length; i++) {
452
- if (r[i][0] === '\n') numWithN++;
560
+ function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
561
+ var bestDelim, bestDelta, fieldCountPrevRow;
562
+ delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
563
+ for (var i = 0; i < delimitersToGuess.length; i++) {
564
+ var delim = delimitersToGuess[i];
565
+ var delta = 0, avgFieldCount = 0, emptyLinesCount = 0;
566
+ fieldCountPrevRow = undefined;
567
+ var preview = new Parser({
568
+ comments: comments,
569
+ delimiter: delim,
570
+ newline: newline,
571
+ preview: 10
572
+ }).parse(input);
573
+ for (var j = 0; j < preview.data.length; j++) {
574
+ if (skipEmptyLines && testEmptyLine(preview.data[j])) {
575
+ emptyLinesCount++;
576
+ continue;
577
+ }
578
+ var fieldCount = preview.data[j].length;
579
+ avgFieldCount += fieldCount;
580
+ if (typeof fieldCountPrevRow === 'undefined') {
581
+ fieldCountPrevRow = 0;
582
+ continue;
583
+ }
584
+ else if (fieldCount > 1) {
585
+ delta += Math.abs(fieldCount - fieldCountPrevRow);
586
+ fieldCountPrevRow = fieldCount;
587
+ }
588
+ }
589
+ if (preview.data.length > 0)
590
+ avgFieldCount /= preview.data.length - emptyLinesCount;
591
+ if ((typeof bestDelta === 'undefined' || delta > bestDelta) && avgFieldCount > 1.99) {
592
+ bestDelta = delta;
593
+ bestDelim = delim;
594
+ }
595
+ }
596
+ _config.delimiter = bestDelim;
597
+ return {
598
+ successful: !!bestDelim,
599
+ bestDelimiter: bestDelim
600
+ };
601
+ }
602
+ function guessLineEndings(input, quoteChar) {
603
+ input = input.substr(0, 1024 * 1024); // max length 1 MB
604
+ // Replace all the text inside quotes
605
+ var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
606
+ input = input.replace(re, '');
607
+ var r = input.split('\r');
608
+ var n = input.split('\n');
609
+ var nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
610
+ if (r.length === 1 || nAppearsFirst)
611
+ return '\n';
612
+ var numWithN = 0;
613
+ for (var i = 0; i < r.length; i++) {
614
+ if (r[i][0] === '\n')
615
+ numWithN++;
616
+ }
617
+ return numWithN >= r.length / 2 ? '\r\n' : '\r';
618
+ }
619
+ function addError(type, code, msg, row) {
620
+ _results.errors.push({
621
+ type: type,
622
+ code: code,
623
+ message: msg,
624
+ row: row
625
+ });
453
626
  }
454
- return numWithN >= r.length / 2 ? '\r\n' : '\r';
455
- }
456
- function addError(type, code, msg, row) {
457
- _results.errors.push({
458
- type: type,
459
- code: code,
460
- message: msg,
461
- row: row
462
- });
463
- }
464
627
  }
628
+ /** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
465
629
  function escapeRegExp(string) {
466
- return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
630
+ return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
467
631
  }
632
+ /** The core parser implements speedy and correct CSV parsing */
468
633
  function Parser(config) {
469
- config = config || {};
470
- var delim = config.delimiter;
471
- var newline = config.newline;
472
- var comments = config.comments;
473
- var step = config.step;
474
- var preview = config.preview;
475
- var fastMode = config.fastMode;
476
- var quoteChar;
477
- if (config.quoteChar === undefined) {
478
- quoteChar = '"';
479
- } else {
480
- quoteChar = config.quoteChar;
481
- }
482
- var escapeChar = quoteChar;
483
- if (config.escapeChar !== undefined) {
484
- escapeChar = config.escapeChar;
485
- }
486
- if (typeof delim !== 'string' || Papa.BAD_DELIMITERS.indexOf(delim) > -1) delim = ',';
487
- if (comments === delim) throw new Error('Comment character same as delimiter');else if (comments === true) comments = '#';else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1) comments = false;
488
- if (newline !== '\n' && newline !== '\r' && newline !== '\r\n') newline = '\n';
489
- var cursor = 0;
490
- var aborted = false;
491
- this.parse = function (input, baseIndex, ignoreLastRow) {
492
- if (typeof input !== 'string') throw new Error('Input must be a string');
493
- var inputLen = input.length,
494
- delimLen = delim.length,
495
- newlineLen = newline.length,
496
- commentsLen = comments.length;
497
- var stepIsFunction = isFunction(step);
498
- cursor = 0;
499
- var data = [],
500
- errors = [],
501
- row = [],
502
- lastCursor = 0;
503
- if (!input) return returnable();
504
- if (fastMode || fastMode !== false && input.indexOf(quoteChar) === -1) {
505
- var rows = input.split(newline);
506
- for (var i = 0; i < rows.length; i++) {
507
- row = rows[i];
508
- cursor += row.length;
509
- if (i !== rows.length - 1) cursor += newline.length;else if (ignoreLastRow) return returnable();
510
- if (comments && row.substr(0, commentsLen) === comments) continue;
511
- if (stepIsFunction) {
512
- data = [];
513
- pushRow(row.split(delim));
514
- doStep();
515
- if (aborted) return returnable();
516
- } else pushRow(row.split(delim));
517
- if (preview && i >= preview) {
518
- data = data.slice(0, preview);
519
- return returnable(true);
520
- }
521
- }
522
- return returnable();
634
+ // Unpack the config object
635
+ config = config || {};
636
+ var delim = config.delimiter;
637
+ var newline = config.newline;
638
+ var comments = config.comments;
639
+ var step = config.step;
640
+ var preview = config.preview;
641
+ var fastMode = config.fastMode;
642
+ var quoteChar;
643
+ /** Allows for no quoteChar by setting quoteChar to undefined in config */
644
+ if (config.quoteChar === undefined) {
645
+ quoteChar = '"';
646
+ }
647
+ else {
648
+ quoteChar = config.quoteChar;
649
+ }
650
+ var escapeChar = quoteChar;
651
+ if (config.escapeChar !== undefined) {
652
+ escapeChar = config.escapeChar;
523
653
  }
524
- var nextDelim = input.indexOf(delim, cursor);
525
- var nextNewline = input.indexOf(newline, cursor);
526
- var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
527
- var quoteSearch;
528
- for (;;) {
529
- if (input[cursor] === quoteChar) {
530
- quoteSearch = cursor;
531
- cursor++;
654
+ // Delimiter must be valid
655
+ if (typeof delim !== 'string' || Papa.BAD_DELIMITERS.indexOf(delim) > -1)
656
+ delim = ',';
657
+ // Comment character must be valid
658
+ if (comments === delim)
659
+ throw new Error('Comment character same as delimiter');
660
+ else if (comments === true)
661
+ comments = '#';
662
+ else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1)
663
+ comments = false;
664
+ // Newline must be valid: \r, \n, or \r\n
665
+ if (newline !== '\n' && newline !== '\r' && newline !== '\r\n')
666
+ newline = '\n';
667
+ // We're gonna need these at the Parser scope
668
+ var cursor = 0;
669
+ var aborted = false;
670
+ this.parse = function (input, baseIndex, ignoreLastRow) {
671
+ // For some reason, in Chrome, this speeds things up (!?)
672
+ if (typeof input !== 'string')
673
+ throw new Error('Input must be a string');
674
+ // We don't need to compute some of these every time parse() is called,
675
+ // but having them in a more local scope seems to perform better
676
+ var inputLen = input.length, delimLen = delim.length, newlineLen = newline.length, commentsLen = comments.length;
677
+ var stepIsFunction = isFunction(step);
678
+ // Establish starting state
679
+ cursor = 0;
680
+ var data = [], errors = [], row = [], lastCursor = 0;
681
+ if (!input)
682
+ return returnable();
683
+ if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1)) {
684
+ var rows = input.split(newline);
685
+ for (var i = 0; i < rows.length; i++) {
686
+ row = rows[i];
687
+ cursor += row.length;
688
+ if (i !== rows.length - 1)
689
+ cursor += newline.length;
690
+ else if (ignoreLastRow)
691
+ return returnable();
692
+ if (comments && row.substr(0, commentsLen) === comments)
693
+ continue;
694
+ if (stepIsFunction) {
695
+ data = [];
696
+ pushRow(row.split(delim));
697
+ doStep();
698
+ if (aborted)
699
+ return returnable();
700
+ }
701
+ else
702
+ pushRow(row.split(delim));
703
+ if (preview && i >= preview) {
704
+ data = data.slice(0, preview);
705
+ return returnable(true);
706
+ }
707
+ }
708
+ return returnable();
709
+ }
710
+ var nextDelim = input.indexOf(delim, cursor);
711
+ var nextNewline = input.indexOf(newline, cursor);
712
+ var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
713
+ var quoteSearch;
714
+ // Parser loop
532
715
  for (;;) {
533
- quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
534
- if (quoteSearch === -1) {
535
- if (!ignoreLastRow) {
536
- errors.push({
537
- type: 'Quotes',
538
- code: 'MissingQuotes',
539
- message: 'Quoted field unterminated',
540
- row: data.length,
541
- index: cursor
542
- });
716
+ // Field has opening quote
717
+ if (input[cursor] === quoteChar) {
718
+ // Start our search for the closing quote where the cursor is
719
+ quoteSearch = cursor;
720
+ // Skip the opening quote
721
+ cursor++;
722
+ for (;;) {
723
+ // Find closing quote
724
+ quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
725
+ //No other quotes are found - no other delimiters
726
+ if (quoteSearch === -1) {
727
+ if (!ignoreLastRow) {
728
+ // No closing quote... what a pity
729
+ errors.push({
730
+ type: 'Quotes',
731
+ code: 'MissingQuotes',
732
+ message: 'Quoted field unterminated',
733
+ row: data.length, // row has yet to be inserted
734
+ index: cursor
735
+ });
736
+ }
737
+ return finish();
738
+ }
739
+ // Closing quote at EOF
740
+ if (quoteSearch === inputLen - 1) {
741
+ var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
742
+ return finish(value);
743
+ }
744
+ // If this quote is escaped, it's part of the data; skip it
745
+ // If the quote character is the escape character, then check if the next character is the escape character
746
+ if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar) {
747
+ quoteSearch++;
748
+ continue;
749
+ }
750
+ // If the quote character is not the escape character, then check if the previous character was the escape character
751
+ if (quoteChar !== escapeChar &&
752
+ quoteSearch !== 0 &&
753
+ input[quoteSearch - 1] === escapeChar) {
754
+ continue;
755
+ }
756
+ // Check up to nextDelim or nextNewline, whichever is closest
757
+ var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
758
+ var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
759
+ // Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
760
+ if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) {
761
+ row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
762
+ cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
763
+ nextDelim = input.indexOf(delim, cursor);
764
+ nextNewline = input.indexOf(newline, cursor);
765
+ if (stepIsFunction) {
766
+ doStep();
767
+ if (aborted)
768
+ return returnable();
769
+ }
770
+ if (preview && data.length >= preview)
771
+ return returnable(true);
772
+ break;
773
+ }
774
+ var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
775
+ // Closing quote followed by newline or 'unnecessary spaces + newLine'
776
+ if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline) {
777
+ row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
778
+ saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
779
+ nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
780
+ if (stepIsFunction) {
781
+ doStep();
782
+ if (aborted)
783
+ return returnable();
784
+ }
785
+ if (preview && data.length >= preview)
786
+ return returnable(true);
787
+ break;
788
+ }
789
+ // Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
790
+ errors.push({
791
+ type: 'Quotes',
792
+ code: 'InvalidQuotes',
793
+ message: 'Trailing quote on quoted field is malformed',
794
+ row: data.length, // row has yet to be inserted
795
+ index: cursor
796
+ });
797
+ quoteSearch++;
798
+ continue;
799
+ }
800
+ if (stepIsFunction) {
801
+ doStep();
802
+ if (aborted)
803
+ return returnable();
804
+ }
805
+ if (preview && data.length >= preview)
806
+ return returnable(true);
807
+ continue;
543
808
  }
544
- return finish();
545
- }
546
- if (quoteSearch === inputLen - 1) {
547
- var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
548
- return finish(value);
549
- }
550
- if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar) {
551
- quoteSearch++;
552
- continue;
553
- }
554
- if (quoteChar !== escapeChar && quoteSearch !== 0 && input[quoteSearch - 1] === escapeChar) {
555
- continue;
556
- }
557
- var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
558
- var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
559
- if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) {
560
- row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
561
- cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
562
- nextDelim = input.indexOf(delim, cursor);
563
- nextNewline = input.indexOf(newline, cursor);
564
- if (stepIsFunction) {
565
- doStep();
566
- if (aborted) return returnable();
809
+ // Comment found at start of new line
810
+ if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) {
811
+ if (nextNewline === -1)
812
+ // Comment ends at EOF
813
+ return returnable();
814
+ cursor = nextNewline + newlineLen;
815
+ nextNewline = input.indexOf(newline, cursor);
816
+ nextDelim = input.indexOf(delim, cursor);
817
+ continue;
567
818
  }
568
- if (preview && data.length >= preview) return returnable(true);
569
- break;
570
- }
571
- var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
572
- if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline) {
573
- row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
574
- saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
575
- nextDelim = input.indexOf(delim, cursor);
576
- if (stepIsFunction) {
577
- doStep();
578
- if (aborted) return returnable();
819
+ // Next delimiter comes before next newline, so we've reached end of field
820
+ if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) {
821
+ row.push(input.substring(cursor, nextDelim));
822
+ cursor = nextDelim + delimLen;
823
+ nextDelim = input.indexOf(delim, cursor);
824
+ continue;
825
+ }
826
+ // End of row
827
+ if (nextNewline !== -1) {
828
+ row.push(input.substring(cursor, nextNewline));
829
+ saveRow(nextNewline + newlineLen);
830
+ if (stepIsFunction) {
831
+ doStep();
832
+ if (aborted)
833
+ return returnable();
834
+ }
835
+ if (preview && data.length >= preview)
836
+ return returnable(true);
837
+ continue;
579
838
  }
580
- if (preview && data.length >= preview) return returnable(true);
581
839
  break;
582
- }
583
- errors.push({
584
- type: 'Quotes',
585
- code: 'InvalidQuotes',
586
- message: 'Trailing quote on quoted field is malformed',
587
- row: data.length,
588
- index: cursor
589
- });
590
- quoteSearch++;
591
- continue;
592
840
  }
593
- if (stepIsFunction) {
594
- doStep();
595
- if (aborted) return returnable();
841
+ return finish();
842
+ function pushRow(row) {
843
+ data.push(row);
844
+ lastCursor = cursor;
596
845
  }
597
- if (preview && data.length >= preview) return returnable(true);
598
- continue;
599
- }
600
- if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) {
601
- if (nextNewline === -1) return returnable();
602
- cursor = nextNewline + newlineLen;
603
- nextNewline = input.indexOf(newline, cursor);
604
- nextDelim = input.indexOf(delim, cursor);
605
- continue;
606
- }
607
- if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) {
608
- row.push(input.substring(cursor, nextDelim));
609
- cursor = nextDelim + delimLen;
610
- nextDelim = input.indexOf(delim, cursor);
611
- continue;
612
- }
613
- if (nextNewline !== -1) {
614
- row.push(input.substring(cursor, nextNewline));
615
- saveRow(nextNewline + newlineLen);
616
- if (stepIsFunction) {
617
- doStep();
618
- if (aborted) return returnable();
846
+ /**
847
+ * checks if there are extra spaces after closing quote and given index without any text
848
+ * if Yes, returns the number of spaces
849
+ */
850
+ function extraSpaces(index) {
851
+ var spaceLength = 0;
852
+ if (index !== -1) {
853
+ var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index);
854
+ if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') {
855
+ spaceLength = textBetweenClosingQuoteAndIndex.length;
856
+ }
857
+ }
858
+ return spaceLength;
619
859
  }
620
- if (preview && data.length >= preview) return returnable(true);
621
- continue;
622
- }
623
- break;
624
- }
625
- return finish();
626
- function pushRow(row) {
627
- data.push(row);
628
- lastCursor = cursor;
629
- }
630
- function extraSpaces(index) {
631
- var spaceLength = 0;
632
- if (index !== -1) {
633
- var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index);
634
- if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') {
635
- spaceLength = textBetweenClosingQuoteAndIndex.length;
860
+ /**
861
+ * Appends the remaining input from cursor to the end into
862
+ * row, saves the row, calls step, and returns the results.
863
+ */
864
+ function finish(value) {
865
+ if (ignoreLastRow)
866
+ return returnable();
867
+ if (typeof value === 'undefined')
868
+ value = input.substr(cursor);
869
+ row.push(value);
870
+ cursor = inputLen; // important in case parsing is paused
871
+ pushRow(row);
872
+ if (stepIsFunction)
873
+ doStep();
874
+ return returnable();
636
875
  }
637
- }
638
- return spaceLength;
639
- }
640
- function finish(value) {
641
- if (ignoreLastRow) return returnable();
642
- if (typeof value === 'undefined') value = input.substr(cursor);
643
- row.push(value);
644
- cursor = inputLen;
645
- pushRow(row);
646
- if (stepIsFunction) doStep();
647
- return returnable();
648
- }
649
- function saveRow(newCursor) {
650
- cursor = newCursor;
651
- pushRow(row);
652
- row = [];
653
- nextNewline = input.indexOf(newline, cursor);
654
- }
655
- function returnable(stopped, step) {
656
- var isStep = step || false;
657
- return {
658
- data: isStep ? data[0] : data,
659
- errors: errors,
660
- meta: {
661
- delimiter: delim,
662
- linebreak: newline,
663
- aborted: aborted,
664
- truncated: !!stopped,
665
- cursor: lastCursor + (baseIndex || 0)
876
+ /**
877
+ * Appends the current row to the results. It sets the cursor
878
+ * to newCursor and finds the nextNewline. The caller should
879
+ * take care to execute user's step function and check for
880
+ * preview and end parsing if necessary.
881
+ */
882
+ function saveRow(newCursor) {
883
+ cursor = newCursor;
884
+ pushRow(row);
885
+ row = [];
886
+ nextNewline = input.indexOf(newline, cursor);
666
887
  }
667
- };
668
- }
669
- function doStep() {
670
- step(returnable(undefined, true));
671
- data = [];
672
- errors = [];
673
- }
674
- };
675
- this.abort = function () {
676
- aborted = true;
677
- };
678
- this.getCharIndex = function () {
679
- return cursor;
680
- };
888
+ /** Returns an object with the results, errors, and meta. */
889
+ function returnable(stopped, step) {
890
+ var isStep = step || false;
891
+ return {
892
+ data: isStep ? data[0] : data,
893
+ errors: errors,
894
+ meta: {
895
+ delimiter: delim,
896
+ linebreak: newline,
897
+ aborted: aborted,
898
+ truncated: !!stopped,
899
+ cursor: lastCursor + (baseIndex || 0)
900
+ }
901
+ };
902
+ }
903
+ /** Executes the user's step function and resets data & errors. */
904
+ function doStep() {
905
+ step(returnable(undefined, true));
906
+ data = [];
907
+ errors = [];
908
+ }
909
+ };
910
+ /** Sets the abort flag */
911
+ this.abort = function () {
912
+ aborted = true;
913
+ };
914
+ /** Gets the cursor position */
915
+ this.getCharIndex = function () {
916
+ return cursor;
917
+ };
681
918
  }
682
919
  function notImplemented() {
683
- throw new Error('Not implemented.');
920
+ throw new Error('Not implemented.');
684
921
  }
922
+ /** Makes a deep copy of an array or object (mostly) */
685
923
  function copy(obj) {
686
- if (typeof obj !== 'object' || obj === null) return obj;
687
- var cpy = Array.isArray(obj) ? [] : {};
688
- for (var key in obj) cpy[key] = copy(obj[key]);
689
- return cpy;
924
+ if (typeof obj !== 'object' || obj === null)
925
+ return obj;
926
+ var cpy = Array.isArray(obj) ? [] : {};
927
+ for (var key in obj)
928
+ cpy[key] = copy(obj[key]);
929
+ return cpy;
690
930
  }
691
931
  function isFunction(func) {
692
- return typeof func === 'function';
932
+ return typeof func === 'function';
693
933
  }
694
- //# sourceMappingURL=papaparse.js.map