@loaders.gl/csv 3.1.3 → 4.0.0-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/bundle.js +2 -2
  2. package/dist/bundle.js.map +1 -0
  3. package/dist/csv-loader.js +220 -247
  4. package/dist/csv-loader.js.map +1 -0
  5. package/dist/csv-writer.js +2 -6
  6. package/dist/{es5/csv-writer.js.map → csv-writer.js.map} +0 -0
  7. package/dist/index.js +2 -5
  8. package/dist/index.js.map +1 -0
  9. package/dist/papaparse/async-iterator-streamer.js +32 -60
  10. package/dist/papaparse/async-iterator-streamer.js.map +1 -0
  11. package/dist/papaparse/papaparse.js +795 -870
  12. package/dist/papaparse/papaparse.js.map +1 -0
  13. package/package.json +6 -6
  14. package/dist/es5/bundle.js +0 -7
  15. package/dist/es5/bundle.js.map +0 -1
  16. package/dist/es5/csv-loader.js +0 -309
  17. package/dist/es5/csv-loader.js.map +0 -1
  18. package/dist/es5/csv-writer.js +0 -2
  19. package/dist/es5/index.js +0 -14
  20. package/dist/es5/index.js.map +0 -1
  21. package/dist/es5/papaparse/async-iterator-streamer.js +0 -140
  22. package/dist/es5/papaparse/async-iterator-streamer.js.map +0 -1
  23. package/dist/es5/papaparse/papaparse.js +0 -882
  24. package/dist/es5/papaparse/papaparse.js.map +0 -1
  25. package/dist/esm/bundle.js +0 -5
  26. package/dist/esm/bundle.js.map +0 -1
  27. package/dist/esm/csv-loader.js +0 -240
  28. package/dist/esm/csv-loader.js.map +0 -1
  29. package/dist/esm/csv-writer.js +0 -2
  30. package/dist/esm/csv-writer.js.map +0 -1
  31. package/dist/esm/index.js +0 -2
  32. package/dist/esm/index.js.map +0 -1
  33. package/dist/esm/papaparse/async-iterator-streamer.js +0 -35
  34. package/dist/esm/papaparse/async-iterator-streamer.js.map +0 -1
  35. package/dist/esm/papaparse/papaparse.js +0 -860
  36. package/dist/esm/papaparse/papaparse.js.map +0 -1
@@ -1,935 +1,860 @@
1
- "use strict";
2
- // @ts-nocheck
3
- // This is a fork of papaparse
4
- // https://github.com/mholt/PapaParse
5
1
  /* @license
6
2
  Papa Parse
7
3
  v5.0.0-beta.0
8
4
  https://github.com/mholt/PapaParse
9
5
  License: MIT
10
6
  */
11
- // FORK SUMMARY:
12
- // - Adopt ES6 exports
13
- // - Implement new AsyncIteratorStreamer
14
- // - Remove non Async Iterator streamers (can all be handled by new streamer)
15
- // - Remove unused Worker support (loaders.gl worker system used instead)
16
- // - Remove unused jQuery plugin support
17
- Object.defineProperty(exports, "__esModule", { value: true });
18
- /* eslint-disable */
19
7
  const BYTE_ORDER_MARK = '\ufeff';
20
8
  const Papa = {
21
- parse: CsvToJson,
22
- unparse: JsonToCsv,
23
- RECORD_SEP: String.fromCharCode(30),
24
- UNIT_SEP: String.fromCharCode(31),
25
- BYTE_ORDER_MARK,
26
- BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
27
- WORKERS_SUPPORTED: false,
28
- NODE_STREAM_INPUT: 1,
29
- // Configurable chunk sizes for local and remote files, respectively
30
- LocalChunkSize: 1024 * 1024 * 10,
31
- RemoteChunkSize: 1024 * 1024 * 5,
32
- DefaultDelimiter: ',',
33
- // Exposed for testing and development only
34
- Parser: Parser,
35
- ParserHandle: ParserHandle,
36
- // BEGIN FORK
37
- ChunkStreamer: ChunkStreamer,
38
- StringStreamer: StringStreamer
9
+ parse: CsvToJson,
10
+ unparse: JsonToCsv,
11
+ RECORD_SEP: String.fromCharCode(30),
12
+ UNIT_SEP: String.fromCharCode(31),
13
+ BYTE_ORDER_MARK,
14
+ BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
15
+ WORKERS_SUPPORTED: false,
16
+ NODE_STREAM_INPUT: 1,
17
+ LocalChunkSize: 1024 * 1024 * 10,
18
+ RemoteChunkSize: 1024 * 1024 * 5,
19
+ DefaultDelimiter: ',',
20
+ Parser: Parser,
21
+ ParserHandle: ParserHandle,
22
+ ChunkStreamer: ChunkStreamer,
23
+ StringStreamer: StringStreamer
39
24
  };
40
- exports.default = Papa;
41
- /*
42
- Papa.NetworkStreamer = NetworkStreamer;
43
- Papa.FileStreamer = FileStreamer;
44
- Papa.ReadableStreamStreamer = ReadableStreamStreamer;
45
- if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
46
- Papa.DuplexStreamStreamer = DuplexStreamStreamer;
25
+ export default Papa;
26
+
27
+ function CsvToJson(_input, _config, UserDefinedStreamer) {
28
+ _config = _config || {};
29
+ var dynamicTyping = _config.dynamicTyping || false;
30
+
31
+ if (isFunction(dynamicTyping)) {
32
+ _config.dynamicTypingFunction = dynamicTyping;
33
+ dynamicTyping = {};
34
+ }
35
+
36
+ _config.dynamicTyping = dynamicTyping;
37
+ _config.transform = isFunction(_config.transform) ? _config.transform : false;
38
+
39
+ if (_config.worker && Papa.WORKERS_SUPPORTED) {
40
+ var w = newWorker();
41
+ w.userStep = _config.step;
42
+ w.userChunk = _config.chunk;
43
+ w.userComplete = _config.complete;
44
+ w.userError = _config.error;
45
+ _config.step = isFunction(_config.step);
46
+ _config.chunk = isFunction(_config.chunk);
47
+ _config.complete = isFunction(_config.complete);
48
+ _config.error = isFunction(_config.error);
49
+ delete _config.worker;
50
+ w.postMessage({
51
+ input: _input,
52
+ config: _config,
53
+ workerId: w.id
54
+ });
55
+ return;
56
+ }
57
+
58
+ var streamer = null;
59
+
60
+ if (typeof _input === 'string') {
61
+ streamer = new StringStreamer(_config);
62
+ }
63
+
64
+ if (!streamer) {
65
+ streamer = new UserDefinedStreamer(_config);
66
+ }
67
+
68
+ return streamer.stream(_input);
47
69
  }
48
- */
49
- // END FORK
50
- // BEGIN FORK
51
- // Adds an argument to papa.parse
52
- // function CsvToJson(_input, _config)
53
- function CsvToJson(_input, _config, UserDefinedStreamer // BEGIN FORK
54
- ) {
55
- _config = _config || {};
56
- var dynamicTyping = _config.dynamicTyping || false;
57
- if (isFunction(dynamicTyping)) {
58
- _config.dynamicTypingFunction = dynamicTyping;
59
- // Will be filled on first row call
60
- dynamicTyping = {};
61
- }
62
- _config.dynamicTyping = dynamicTyping;
63
- _config.transform = isFunction(_config.transform) ? _config.transform : false;
64
- if (_config.worker && Papa.WORKERS_SUPPORTED) {
65
- var w = newWorker();
66
- w.userStep = _config.step;
67
- w.userChunk = _config.chunk;
68
- w.userComplete = _config.complete;
69
- w.userError = _config.error;
70
- _config.step = isFunction(_config.step);
71
- _config.chunk = isFunction(_config.chunk);
72
- _config.complete = isFunction(_config.complete);
73
- _config.error = isFunction(_config.error);
74
- delete _config.worker; // prevent infinite loop
75
- w.postMessage({
76
- input: _input,
77
- config: _config,
78
- workerId: w.id
79
- });
80
- return;
70
+
71
+ function JsonToCsv(_input, _config) {
72
+ var _quotes = false;
73
+ var _writeHeader = true;
74
+ var _delimiter = ',';
75
+ var _newline = '\r\n';
76
+ var _quoteChar = '"';
77
+
78
+ var _escapedQuote = _quoteChar + _quoteChar;
79
+
80
+ var _skipEmptyLines = false;
81
+ var _columns = null;
82
+ unpackConfig();
83
+ var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g');
84
+ if (typeof _input === 'string') _input = JSON.parse(_input);
85
+
86
+ if (Array.isArray(_input)) {
87
+ if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines);else if (typeof _input[0] === 'object') return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines);
88
+ } else if (typeof _input === 'object') {
89
+ if (typeof _input.data === 'string') _input.data = JSON.parse(_input.data);
90
+
91
+ if (Array.isArray(_input.data)) {
92
+ if (!_input.fields) _input.fields = _input.meta && _input.meta.fields;
93
+ if (!_input.fields) _input.fields = Array.isArray(_input.data[0]) ? _input.fields : objectKeys(_input.data[0]);
94
+ if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object') _input.data = [_input.data];
81
95
  }
82
- var streamer = null;
83
- /*
84
- if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined') {
85
- // create a node Duplex stream for use
86
- // with .pipe
87
- streamer = new DuplexStreamStreamer(_config);
88
- return streamer.getStream();
89
- } else
90
- */
91
- if (typeof _input === 'string') {
92
- // if (_config.download) streamer = new NetworkStreamer(_config);
93
- // else
94
- streamer = new StringStreamer(_config);
96
+
97
+ return serialize(_input.fields || [], _input.data || [], _skipEmptyLines);
98
+ }
99
+
100
+ throw new Error('Unable to serialize unrecognized input');
101
+
102
+ function unpackConfig() {
103
+ if (typeof _config !== 'object') return;
104
+
105
+ if (typeof _config.delimiter === 'string' && !Papa.BAD_DELIMITERS.filter(function (value) {
106
+ return _config.delimiter.indexOf(value) !== -1;
107
+ }).length) {
108
+ _delimiter = _config.delimiter;
95
109
  }
96
- /*
97
- else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on)) {
98
- streamer = new ReadableStreamStreamer(_config);
99
- } else if ((globalThis.File && _input instanceof File) || _input instanceof Object)
100
- // ...Safari. (see issue #106)
101
- streamer = new FileStreamer(_config);
102
- */
103
- // BEGIN FORK
104
- if (!streamer) {
105
- streamer = new UserDefinedStreamer(_config);
110
+
111
+ if (typeof _config.quotes === 'boolean' || Array.isArray(_config.quotes)) _quotes = _config.quotes;
112
+ if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string') _skipEmptyLines = _config.skipEmptyLines;
113
+ if (typeof _config.newline === 'string') _newline = _config.newline;
114
+ if (typeof _config.quoteChar === 'string') _quoteChar = _config.quoteChar;
115
+ if (typeof _config.header === 'boolean') _writeHeader = _config.header;
116
+
117
+ if (Array.isArray(_config.columns)) {
118
+ if (_config.columns.length === 0) throw new Error('Option columns is empty');
119
+ _columns = _config.columns;
106
120
  }
107
- // END FORK
108
- return streamer.stream(_input);
109
- }
110
- function JsonToCsv(_input, _config) {
111
- // Default configuration
112
- /** whether to surround every datum with quotes */
113
- var _quotes = false;
114
- /** whether to write headers */
115
- var _writeHeader = true;
116
- /** delimiting character(s) */
117
- var _delimiter = ',';
118
- /** newline character(s) */
119
- var _newline = '\r\n';
120
- /** quote character */
121
- var _quoteChar = '"';
122
- /** escaped quote character, either "" or <config.escapeChar>" */
123
- var _escapedQuote = _quoteChar + _quoteChar;
124
- /** whether to skip empty lines */
125
- var _skipEmptyLines = false;
126
- /** the columns (keys) we expect when we unparse objects */
127
- var _columns = null;
128
- unpackConfig();
129
- var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g');
130
- if (typeof _input === 'string')
131
- _input = JSON.parse(_input);
132
- if (Array.isArray(_input)) {
133
- if (!_input.length || Array.isArray(_input[0]))
134
- return serialize(null, _input, _skipEmptyLines);
135
- else if (typeof _input[0] === 'object')
136
- return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines);
121
+
122
+ if (_config.escapeChar !== undefined) {
123
+ _escapedQuote = _config.escapeChar + _quoteChar;
137
124
  }
138
- else if (typeof _input === 'object') {
139
- if (typeof _input.data === 'string')
140
- _input.data = JSON.parse(_input.data);
141
- if (Array.isArray(_input.data)) {
142
- if (!_input.fields)
143
- _input.fields = _input.meta && _input.meta.fields;
144
- if (!_input.fields)
145
- _input.fields = Array.isArray(_input.data[0]) ? _input.fields : objectKeys(_input.data[0]);
146
- if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object')
147
- _input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
148
- }
149
- return serialize(_input.fields || [], _input.data || [], _skipEmptyLines);
125
+ }
126
+
127
+ function objectKeys(obj) {
128
+ if (typeof obj !== 'object') return [];
129
+ var keys = [];
130
+
131
+ for (var key in obj) keys.push(key);
132
+
133
+ return keys;
134
+ }
135
+
136
+ function serialize(fields, data, skipEmptyLines) {
137
+ var csv = '';
138
+ if (typeof fields === 'string') fields = JSON.parse(fields);
139
+ if (typeof data === 'string') data = JSON.parse(data);
140
+ var hasHeader = Array.isArray(fields) && fields.length > 0;
141
+ var dataKeyedByField = !Array.isArray(data[0]);
142
+
143
+ if (hasHeader && _writeHeader) {
144
+ for (var i = 0; i < fields.length; i++) {
145
+ if (i > 0) csv += _delimiter;
146
+ csv += safe(fields[i], i);
147
+ }
148
+
149
+ if (data.length > 0) csv += _newline;
150
150
  }
151
- // Default (any valid paths should return before this)
152
- throw new Error('Unable to serialize unrecognized input');
153
- function unpackConfig() {
154
- if (typeof _config !== 'object')
155
- return;
156
- if (typeof _config.delimiter === 'string' &&
157
- !Papa.BAD_DELIMITERS.filter(function (value) {
158
- return _config.delimiter.indexOf(value) !== -1;
159
- }).length) {
160
- _delimiter = _config.delimiter;
151
+
152
+ for (var row = 0; row < data.length; row++) {
153
+ var maxCol = hasHeader ? fields.length : data[row].length;
154
+ var emptyLine = false;
155
+ var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0;
156
+
157
+ if (skipEmptyLines && !hasHeader) {
158
+ emptyLine = skipEmptyLines === 'greedy' ? data[row].join('').trim() === '' : data[row].length === 1 && data[row][0].length === 0;
159
+ }
160
+
161
+ if (skipEmptyLines === 'greedy' && hasHeader) {
162
+ var line = [];
163
+
164
+ for (var c = 0; c < maxCol; c++) {
165
+ var cx = dataKeyedByField ? fields[c] : c;
166
+ line.push(data[row][cx]);
161
167
  }
162
- if (typeof _config.quotes === 'boolean' || Array.isArray(_config.quotes))
163
- _quotes = _config.quotes;
164
- if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string')
165
- _skipEmptyLines = _config.skipEmptyLines;
166
- if (typeof _config.newline === 'string')
167
- _newline = _config.newline;
168
- if (typeof _config.quoteChar === 'string')
169
- _quoteChar = _config.quoteChar;
170
- if (typeof _config.header === 'boolean')
171
- _writeHeader = _config.header;
172
- if (Array.isArray(_config.columns)) {
173
- if (_config.columns.length === 0)
174
- throw new Error('Option columns is empty');
175
- _columns = _config.columns;
168
+
169
+ emptyLine = line.join('').trim() === '';
170
+ }
171
+
172
+ if (!emptyLine) {
173
+ for (var col = 0; col < maxCol; col++) {
174
+ if (col > 0 && !nullLine) csv += _delimiter;
175
+ var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
176
+ csv += safe(data[row][colIdx], col);
176
177
  }
177
- if (_config.escapeChar !== undefined) {
178
- _escapedQuote = _config.escapeChar + _quoteChar;
178
+
179
+ if (row < data.length - 1 && (!skipEmptyLines || maxCol > 0 && !nullLine)) {
180
+ csv += _newline;
179
181
  }
182
+ }
180
183
  }
181
- /** Turns an object's keys into an array */
182
- function objectKeys(obj) {
183
- if (typeof obj !== 'object')
184
- return [];
185
- var keys = [];
186
- for (var key in obj)
187
- keys.push(key);
188
- return keys;
184
+
185
+ return csv;
186
+ }
187
+
188
+ function safe(str, col) {
189
+ if (typeof str === 'undefined' || str === null) return '';
190
+ if (str.constructor === Date) return JSON.stringify(str).slice(1, 25);
191
+ str = str.toString().replace(quoteCharRegex, _escapedQuote);
192
+ var needsQuotes = typeof _quotes === 'boolean' && _quotes || Array.isArray(_quotes) && _quotes[col] || hasAny(str, Papa.BAD_DELIMITERS) || str.indexOf(_delimiter) > -1 || str.charAt(0) === ' ' || str.charAt(str.length - 1) === ' ';
193
+ return needsQuotes ? _quoteChar + str + _quoteChar : str;
194
+ }
195
+
196
+ function hasAny(str, substrings) {
197
+ for (var i = 0; i < substrings.length; i++) if (str.indexOf(substrings[i]) > -1) return true;
198
+
199
+ return false;
200
+ }
201
+ }
202
+
203
+ function ChunkStreamer(config) {
204
+ this._handle = null;
205
+ this._finished = false;
206
+ this._completed = false;
207
+ this._input = null;
208
+ this._baseIndex = 0;
209
+ this._partialLine = '';
210
+ this._rowCount = 0;
211
+ this._start = 0;
212
+ this._nextChunk = null;
213
+ this.isFirstChunk = true;
214
+ this._completeResults = {
215
+ data: [],
216
+ errors: [],
217
+ meta: {}
218
+ };
219
+ replaceConfig.call(this, config);
220
+
221
+ this.parseChunk = function (chunk, isFakeChunk) {
222
+ if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
223
+ var modifiedChunk = this._config.beforeFirstChunk(chunk);
224
+
225
+ if (modifiedChunk !== undefined) chunk = modifiedChunk;
189
226
  }
190
- /** The double for loop that iterates the data and writes out a CSV string including header row */
191
- function serialize(fields, data, skipEmptyLines) {
192
- var csv = '';
193
- if (typeof fields === 'string')
194
- fields = JSON.parse(fields);
195
- if (typeof data === 'string')
196
- data = JSON.parse(data);
197
- var hasHeader = Array.isArray(fields) && fields.length > 0;
198
- var dataKeyedByField = !Array.isArray(data[0]);
199
- // If there a header row, write it first
200
- if (hasHeader && _writeHeader) {
201
- for (var i = 0; i < fields.length; i++) {
202
- if (i > 0)
203
- csv += _delimiter;
204
- csv += safe(fields[i], i);
205
- }
206
- if (data.length > 0)
207
- csv += _newline;
208
- }
209
- // Then write out the data
210
- for (var row = 0; row < data.length; row++) {
211
- var maxCol = hasHeader ? fields.length : data[row].length;
212
- var emptyLine = false;
213
- var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0;
214
- if (skipEmptyLines && !hasHeader) {
215
- emptyLine =
216
- skipEmptyLines === 'greedy'
217
- ? data[row].join('').trim() === ''
218
- : data[row].length === 1 && data[row][0].length === 0;
219
- }
220
- if (skipEmptyLines === 'greedy' && hasHeader) {
221
- var line = [];
222
- for (var c = 0; c < maxCol; c++) {
223
- var cx = dataKeyedByField ? fields[c] : c;
224
- line.push(data[row][cx]);
225
- }
226
- emptyLine = line.join('').trim() === '';
227
- }
228
- if (!emptyLine) {
229
- for (var col = 0; col < maxCol; col++) {
230
- if (col > 0 && !nullLine)
231
- csv += _delimiter;
232
- var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
233
- csv += safe(data[row][colIdx], col);
234
- }
235
- if (row < data.length - 1 && (!skipEmptyLines || (maxCol > 0 && !nullLine))) {
236
- csv += _newline;
237
- }
238
- }
239
- }
240
- return csv;
227
+
228
+ this.isFirstChunk = false;
229
+ var aggregate = this._partialLine + chunk;
230
+ this._partialLine = '';
231
+
232
+ var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
233
+
234
+ if (this._handle.paused() || this._handle.aborted()) return;
235
+ var lastIndex = results.meta.cursor;
236
+
237
+ if (!this._finished) {
238
+ this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
239
+ this._baseIndex = lastIndex;
241
240
  }
242
- /** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
243
- function safe(str, col) {
244
- if (typeof str === 'undefined' || str === null)
245
- return '';
246
- if (str.constructor === Date)
247
- return JSON.stringify(str).slice(1, 25);
248
- str = str.toString().replace(quoteCharRegex, _escapedQuote);
249
- var needsQuotes = (typeof _quotes === 'boolean' && _quotes) ||
250
- (Array.isArray(_quotes) && _quotes[col]) ||
251
- hasAny(str, Papa.BAD_DELIMITERS) ||
252
- str.indexOf(_delimiter) > -1 ||
253
- str.charAt(0) === ' ' ||
254
- str.charAt(str.length - 1) === ' ';
255
- return needsQuotes ? _quoteChar + str + _quoteChar : str;
241
+
242
+ if (results && results.data) this._rowCount += results.data.length;
243
+ var finishedIncludingPreview = this._finished || this._config.preview && this._rowCount >= this._config.preview;
244
+
245
+ if (isFunction(this._config.chunk) && !isFakeChunk) {
246
+ this._config.chunk(results, this._handle);
247
+
248
+ if (this._handle.paused() || this._handle.aborted()) return;
249
+ results = undefined;
250
+ this._completeResults = undefined;
256
251
  }
257
- function hasAny(str, substrings) {
258
- for (var i = 0; i < substrings.length; i++)
259
- if (str.indexOf(substrings[i]) > -1)
260
- return true;
261
- return false;
252
+
253
+ if (!this._config.step && !this._config.chunk) {
254
+ this._completeResults.data = this._completeResults.data.concat(results.data);
255
+ this._completeResults.errors = this._completeResults.errors.concat(results.errors);
256
+ this._completeResults.meta = results.meta;
262
257
  }
263
- }
264
- /** ChunkStreamer is the base prototype for various streamer implementations. */
265
- function ChunkStreamer(config) {
266
- this._handle = null;
267
- this._finished = false;
268
- this._completed = false;
269
- this._input = null;
270
- this._baseIndex = 0;
271
- this._partialLine = '';
272
- this._rowCount = 0;
273
- this._start = 0;
274
- this._nextChunk = null;
275
- this.isFirstChunk = true;
276
- this._completeResults = {
277
- data: [],
278
- errors: [],
279
- meta: {}
280
- };
281
- replaceConfig.call(this, config);
282
- this.parseChunk = function (chunk, isFakeChunk) {
283
- // First chunk pre-processing
284
- if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
285
- var modifiedChunk = this._config.beforeFirstChunk(chunk);
286
- if (modifiedChunk !== undefined)
287
- chunk = modifiedChunk;
288
- }
289
- this.isFirstChunk = false;
290
- // Rejoin the line we likely just split in two by chunking the file
291
- var aggregate = this._partialLine + chunk;
292
- this._partialLine = '';
293
- var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
294
- if (this._handle.paused() || this._handle.aborted())
295
- return;
296
- var lastIndex = results.meta.cursor;
297
- if (!this._finished) {
298
- this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
299
- this._baseIndex = lastIndex;
300
- }
301
- if (results && results.data)
302
- this._rowCount += results.data.length;
303
- var finishedIncludingPreview = this._finished || (this._config.preview && this._rowCount >= this._config.preview);
304
- if (isFunction(this._config.chunk) && !isFakeChunk) {
305
- this._config.chunk(results, this._handle);
306
- if (this._handle.paused() || this._handle.aborted())
307
- return;
308
- results = undefined;
309
- this._completeResults = undefined;
310
- }
311
- if (!this._config.step && !this._config.chunk) {
312
- this._completeResults.data = this._completeResults.data.concat(results.data);
313
- this._completeResults.errors = this._completeResults.errors.concat(results.errors);
314
- this._completeResults.meta = results.meta;
315
- }
316
- if (!this._completed &&
317
- finishedIncludingPreview &&
318
- isFunction(this._config.complete) &&
319
- (!results || !results.meta.aborted)) {
320
- this._config.complete(this._completeResults, this._input);
321
- this._completed = true;
322
- }
323
- if (!finishedIncludingPreview && (!results || !results.meta.paused))
324
- this._nextChunk();
325
- return results;
326
- };
327
- this._sendError = function (error) {
328
- if (isFunction(this._config.error))
329
- this._config.error(error);
330
- };
331
- function replaceConfig(config) {
332
- // Deep-copy the config so we can edit it
333
- var configCopy = copy(config);
334
- configCopy.chunkSize = parseInt(configCopy.chunkSize); // parseInt VERY important so we don't concatenate strings!
335
- if (!config.step && !config.chunk)
336
- configCopy.chunkSize = null; // disable Range header if not streaming; bad values break IIS - see issue #196
337
- this._handle = new ParserHandle(configCopy);
338
- this._handle.streamer = this;
339
- this._config = configCopy; // persist the copy to the caller
258
+
259
+ if (!this._completed && finishedIncludingPreview && isFunction(this._config.complete) && (!results || !results.meta.aborted)) {
260
+ this._config.complete(this._completeResults, this._input);
261
+
262
+ this._completed = true;
340
263
  }
264
+
265
+ if (!finishedIncludingPreview && (!results || !results.meta.paused)) this._nextChunk();
266
+ return results;
267
+ };
268
+
269
+ this._sendError = function (error) {
270
+ if (isFunction(this._config.error)) this._config.error(error);
271
+ };
272
+
273
+ function replaceConfig(config) {
274
+ var configCopy = copy(config);
275
+ configCopy.chunkSize = parseInt(configCopy.chunkSize);
276
+ if (!config.step && !config.chunk) configCopy.chunkSize = null;
277
+ this._handle = new ParserHandle(configCopy);
278
+ this._handle.streamer = this;
279
+ this._config = configCopy;
280
+ }
341
281
  }
282
+
342
283
  function StringStreamer(config) {
343
- config = config || {};
344
- ChunkStreamer.call(this, config);
345
- var remaining;
346
- this.stream = function (s) {
347
- remaining = s;
348
- return this._nextChunk();
349
- };
350
- this._nextChunk = function () {
351
- if (this._finished)
352
- return;
353
- var size = this._config.chunkSize;
354
- var chunk = size ? remaining.substr(0, size) : remaining;
355
- remaining = size ? remaining.substr(size) : '';
356
- this._finished = !remaining;
357
- return this.parseChunk(chunk);
358
- };
284
+ config = config || {};
285
+ ChunkStreamer.call(this, config);
286
+ var remaining;
287
+
288
+ this.stream = function (s) {
289
+ remaining = s;
290
+ return this._nextChunk();
291
+ };
292
+
293
+ this._nextChunk = function () {
294
+ if (this._finished) return;
295
+ var size = this._config.chunkSize;
296
+ var chunk = size ? remaining.substr(0, size) : remaining;
297
+ remaining = size ? remaining.substr(size) : '';
298
+ this._finished = !remaining;
299
+ return this.parseChunk(chunk);
300
+ };
359
301
  }
302
+
360
303
  StringStreamer.prototype = Object.create(StringStreamer.prototype);
361
304
  StringStreamer.prototype.constructor = StringStreamer;
362
- // Use one ParserHandle per entire CSV file or string
305
+
363
306
  function ParserHandle(_config) {
364
- // One goal is to minimize the use of regular expressions...
365
- var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
366
- var ISO_DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
367
- var self = this;
368
- var _stepCounter = 0; // Number of times step was called (number of rows parsed)
369
- var _rowCounter = 0; // Number of rows that have been parsed so far
370
- var _input; // The input being parsed
371
- var _parser; // The core parser being used
372
- var _paused = false; // Whether we are paused or not
373
- var _aborted = false; // Whether the parser has aborted or not
374
- var _delimiterError; // Temporary state between delimiter detection and processing results
375
- var _fields = []; // Fields are from the header row of the input, if there is one
376
- var _results = {
377
- // The last results returned from the parser
378
- data: [],
379
- errors: [],
380
- meta: {}
381
- };
382
- if (isFunction(_config.step)) {
383
- var userStep = _config.step;
384
- _config.step = function (results) {
385
- _results = results;
386
- if (needsHeaderRow())
387
- processResults();
388
- // only call user's step function after header row
389
- else {
390
- processResults();
391
- // It's possbile that this line was empty and there's no row here after all
392
- if (!_results.data || _results.data.length === 0)
393
- return;
394
- _stepCounter += results.data.length;
395
- if (_config.preview && _stepCounter > _config.preview)
396
- _parser.abort();
397
- else
398
- userStep(_results, self);
399
- }
400
- };
401
- }
402
- /**
403
- * Parses input. Most users won't need, and shouldn't mess with, the baseIndex
404
- * and ignoreLastRow parameters. They are used by streamers (wrapper functions)
405
- * when an input comes in multiple chunks, like from a file.
406
- */
407
- this.parse = function (input, baseIndex, ignoreLastRow) {
408
- var quoteChar = _config.quoteChar || '"';
409
- if (!_config.newline)
410
- _config.newline = guessLineEndings(input, quoteChar);
411
- _delimiterError = false;
412
- if (!_config.delimiter) {
413
- var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess);
414
- if (delimGuess.successful)
415
- _config.delimiter = delimGuess.bestDelimiter;
416
- else {
417
- _delimiterError = true; // add error after parsing (otherwise it would be overwritten)
418
- _config.delimiter = Papa.DefaultDelimiter;
419
- }
420
- _results.meta.delimiter = _config.delimiter;
307
+ var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
308
+ var ISO_DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
309
+ var self = this;
310
+ var _stepCounter = 0;
311
+ var _rowCounter = 0;
312
+
313
+ var _input;
314
+
315
+ var _parser;
316
+
317
+ var _paused = false;
318
+ var _aborted = false;
319
+
320
+ var _delimiterError;
321
+
322
+ var _fields = [];
323
+ var _results = {
324
+ data: [],
325
+ errors: [],
326
+ meta: {}
327
+ };
328
+
329
+ if (isFunction(_config.step)) {
330
+ var userStep = _config.step;
331
+
332
+ _config.step = function (results) {
333
+ _results = results;
334
+ if (needsHeaderRow()) processResults();else {
335
+ processResults();
336
+ if (!_results.data || _results.data.length === 0) return;
337
+ _stepCounter += results.data.length;
338
+ if (_config.preview && _stepCounter > _config.preview) _parser.abort();else userStep(_results, self);
421
339
  }
422
- else if (isFunction(_config.delimiter)) {
423
- _config.delimiter = _config.delimiter(input);
424
- _results.meta.delimiter = _config.delimiter;
425
- }
426
- var parserConfig = copy(_config);
427
- if (_config.preview && _config.header)
428
- parserConfig.preview++; // to compensate for header row
429
- _input = input;
430
- _parser = new Parser(parserConfig);
431
- _results = _parser.parse(_input, baseIndex, ignoreLastRow);
432
- processResults();
433
- return _paused ? { meta: { paused: true } } : _results || { meta: { paused: false } };
434
- };
435
- this.paused = function () {
436
- return _paused;
437
- };
438
- this.pause = function () {
439
- _paused = true;
440
- _parser.abort();
441
- _input = _input.substr(_parser.getCharIndex());
442
- };
443
- this.resume = function () {
444
- _paused = false;
445
- self.streamer.parseChunk(_input, true);
446
- };
447
- this.aborted = function () {
448
- return _aborted;
449
- };
450
- this.abort = function () {
451
- _aborted = true;
452
- _parser.abort();
453
- _results.meta.aborted = true;
454
- if (isFunction(_config.complete))
455
- _config.complete(_results);
456
- _input = '';
457
340
  };
458
- function testEmptyLine(s) {
459
- return _config.skipEmptyLines === 'greedy'
460
- ? s.join('').trim() === ''
461
- : s.length === 1 && s[0].length === 0;
341
+ }
342
+
343
+ this.parse = function (input, baseIndex, ignoreLastRow) {
344
+ var quoteChar = _config.quoteChar || '"';
345
+ if (!_config.newline) _config.newline = guessLineEndings(input, quoteChar);
346
+ _delimiterError = false;
347
+
348
+ if (!_config.delimiter) {
349
+ var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess);
350
+ if (delimGuess.successful) _config.delimiter = delimGuess.bestDelimiter;else {
351
+ _delimiterError = true;
352
+ _config.delimiter = Papa.DefaultDelimiter;
353
+ }
354
+ _results.meta.delimiter = _config.delimiter;
355
+ } else if (isFunction(_config.delimiter)) {
356
+ _config.delimiter = _config.delimiter(input);
357
+ _results.meta.delimiter = _config.delimiter;
462
358
  }
463
- function processResults() {
464
- if (_results && _delimiterError) {
465
- addError('Delimiter', 'UndetectableDelimiter', "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'");
466
- _delimiterError = false;
467
- }
468
- if (_config.skipEmptyLines) {
469
- for (var i = 0; i < _results.data.length; i++)
470
- if (testEmptyLine(_results.data[i]))
471
- _results.data.splice(i--, 1);
472
- }
473
- if (needsHeaderRow())
474
- fillHeaderFields();
475
- return applyHeaderAndDynamicTypingAndTransformation();
359
+
360
+ var parserConfig = copy(_config);
361
+ if (_config.preview && _config.header) parserConfig.preview++;
362
+ _input = input;
363
+ _parser = new Parser(parserConfig);
364
+ _results = _parser.parse(_input, baseIndex, ignoreLastRow);
365
+ processResults();
366
+ return _paused ? {
367
+ meta: {
368
+ paused: true
369
+ }
370
+ } : _results || {
371
+ meta: {
372
+ paused: false
373
+ }
374
+ };
375
+ };
376
+
377
+ this.paused = function () {
378
+ return _paused;
379
+ };
380
+
381
+ this.pause = function () {
382
+ _paused = true;
383
+
384
+ _parser.abort();
385
+
386
+ _input = _input.substr(_parser.getCharIndex());
387
+ };
388
+
389
+ this.resume = function () {
390
+ _paused = false;
391
+ self.streamer.parseChunk(_input, true);
392
+ };
393
+
394
+ this.aborted = function () {
395
+ return _aborted;
396
+ };
397
+
398
+ this.abort = function () {
399
+ _aborted = true;
400
+
401
+ _parser.abort();
402
+
403
+ _results.meta.aborted = true;
404
+ if (isFunction(_config.complete)) _config.complete(_results);
405
+ _input = '';
406
+ };
407
+
408
+ function testEmptyLine(s) {
409
+ return _config.skipEmptyLines === 'greedy' ? s.join('').trim() === '' : s.length === 1 && s[0].length === 0;
410
+ }
411
+
412
+ function processResults() {
413
+ if (_results && _delimiterError) {
414
+ addError('Delimiter', 'UndetectableDelimiter', "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'");
415
+ _delimiterError = false;
476
416
  }
477
- function needsHeaderRow() {
478
- return _config.header && _fields.length === 0;
417
+
418
+ if (_config.skipEmptyLines) {
419
+ for (var i = 0; i < _results.data.length; i++) if (testEmptyLine(_results.data[i])) _results.data.splice(i--, 1);
479
420
  }
480
- function fillHeaderFields() {
481
- if (!_results)
482
- return;
483
- function addHeder(header) {
484
- if (isFunction(_config.transformHeader))
485
- header = _config.transformHeader(header);
486
- _fields.push(header);
487
- }
488
- if (Array.isArray(_results.data[0])) {
489
- for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
490
- _results.data[i].forEach(addHeder);
491
- _results.data.splice(0, 1);
492
- }
493
- // if _results.data[0] is not an array, we are in a step where _results.data is the row.
494
- else
495
- _results.data.forEach(addHeder);
421
+
422
+ if (needsHeaderRow()) fillHeaderFields();
423
+ return applyHeaderAndDynamicTypingAndTransformation();
424
+ }
425
+
426
+ function needsHeaderRow() {
427
+ return _config.header && _fields.length === 0;
428
+ }
429
+
430
+ function fillHeaderFields() {
431
+ if (!_results) return;
432
+
433
+ function addHeder(header) {
434
+ if (isFunction(_config.transformHeader)) header = _config.transformHeader(header);
435
+
436
+ _fields.push(header);
496
437
  }
497
- function shouldApplyDynamicTyping(field) {
498
- // Cache function values to avoid calling it for each row
499
- if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
500
- _config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
501
- }
502
- return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
438
+
439
+ if (Array.isArray(_results.data[0])) {
440
+ for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) _results.data[i].forEach(addHeder);
441
+
442
+ _results.data.splice(0, 1);
443
+ } else _results.data.forEach(addHeder);
444
+ }
445
+
446
+ function shouldApplyDynamicTyping(field) {
447
+ if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
448
+ _config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
503
449
  }
504
- function parseDynamic(field, value) {
505
- if (shouldApplyDynamicTyping(field)) {
506
- if (value === 'true' || value === 'TRUE')
507
- return true;
508
- else if (value === 'false' || value === 'FALSE')
509
- return false;
510
- else if (FLOAT.test(value))
511
- return parseFloat(value);
512
- else if (ISO_DATE.test(value))
513
- return new Date(value);
514
- else
515
- return value === '' ? null : value;
516
- }
517
- return value;
450
+
451
+ return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
452
+ }
453
+
454
+ function parseDynamic(field, value) {
455
+ if (shouldApplyDynamicTyping(field)) {
456
+ if (value === 'true' || value === 'TRUE') return true;else if (value === 'false' || value === 'FALSE') return false;else if (FLOAT.test(value)) return parseFloat(value);else if (ISO_DATE.test(value)) return new Date(value);else return value === '' ? null : value;
518
457
  }
519
- function applyHeaderAndDynamicTypingAndTransformation() {
520
- if (!_results ||
521
- !_results.data ||
522
- (!_config.header && !_config.dynamicTyping && !_config.transform))
523
- return _results;
524
- function processRow(rowSource, i) {
525
- var row = _config.header ? {} : [];
526
- var j;
527
- for (j = 0; j < rowSource.length; j++) {
528
- var field = j;
529
- var value = rowSource[j];
530
- if (_config.header)
531
- field = j >= _fields.length ? '__parsed_extra' : _fields[j];
532
- if (_config.transform)
533
- value = _config.transform(value, field);
534
- value = parseDynamic(field, value);
535
- if (field === '__parsed_extra') {
536
- row[field] = row[field] || [];
537
- row[field].push(value);
538
- }
539
- else
540
- row[field] = value;
541
- }
542
- if (_config.header) {
543
- if (j > _fields.length)
544
- addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
545
- else if (j < _fields.length)
546
- addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
547
- }
548
- return row;
549
- }
550
- var incrementBy = 1;
551
- if (!_results.data[0] || Array.isArray(_results.data[0])) {
552
- _results.data = _results.data.map(processRow);
553
- incrementBy = _results.data.length;
554
- }
555
- else
556
- _results.data = processRow(_results.data, 0);
557
- if (_config.header && _results.meta)
558
- _results.meta.fields = _fields;
559
- _rowCounter += incrementBy;
560
- return _results;
458
+
459
+ return value;
460
+ }
461
+
462
+ function applyHeaderAndDynamicTypingAndTransformation() {
463
+ if (!_results || !_results.data || !_config.header && !_config.dynamicTyping && !_config.transform) return _results;
464
+
465
+ function processRow(rowSource, i) {
466
+ var row = _config.header ? {} : [];
467
+ var j;
468
+
469
+ for (j = 0; j < rowSource.length; j++) {
470
+ var field = j;
471
+ var value = rowSource[j];
472
+ if (_config.header) field = j >= _fields.length ? '__parsed_extra' : _fields[j];
473
+ if (_config.transform) value = _config.transform(value, field);
474
+ value = parseDynamic(field, value);
475
+
476
+ if (field === '__parsed_extra') {
477
+ row[field] = row[field] || [];
478
+ row[field].push(value);
479
+ } else row[field] = value;
480
+ }
481
+
482
+ if (_config.header) {
483
+ if (j > _fields.length) addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);else if (j < _fields.length) addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
484
+ }
485
+
486
+ return row;
561
487
  }
562
- function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
563
- var bestDelim, bestDelta, fieldCountPrevRow;
564
- delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
565
- for (var i = 0; i < delimitersToGuess.length; i++) {
566
- var delim = delimitersToGuess[i];
567
- var delta = 0, avgFieldCount = 0, emptyLinesCount = 0;
568
- fieldCountPrevRow = undefined;
569
- var preview = new Parser({
570
- comments: comments,
571
- delimiter: delim,
572
- newline: newline,
573
- preview: 10
574
- }).parse(input);
575
- for (var j = 0; j < preview.data.length; j++) {
576
- if (skipEmptyLines && testEmptyLine(preview.data[j])) {
577
- emptyLinesCount++;
578
- continue;
579
- }
580
- var fieldCount = preview.data[j].length;
581
- avgFieldCount += fieldCount;
582
- if (typeof fieldCountPrevRow === 'undefined') {
583
- fieldCountPrevRow = 0;
584
- continue;
585
- }
586
- else if (fieldCount > 1) {
587
- delta += Math.abs(fieldCount - fieldCountPrevRow);
588
- fieldCountPrevRow = fieldCount;
589
- }
590
- }
591
- if (preview.data.length > 0)
592
- avgFieldCount /= preview.data.length - emptyLinesCount;
593
- if ((typeof bestDelta === 'undefined' || delta > bestDelta) && avgFieldCount > 1.99) {
594
- bestDelta = delta;
595
- bestDelim = delim;
596
- }
488
+
489
+ var incrementBy = 1;
490
+
491
+ if (!_results.data[0] || Array.isArray(_results.data[0])) {
492
+ _results.data = _results.data.map(processRow);
493
+ incrementBy = _results.data.length;
494
+ } else _results.data = processRow(_results.data, 0);
495
+
496
+ if (_config.header && _results.meta) _results.meta.fields = _fields;
497
+ _rowCounter += incrementBy;
498
+ return _results;
499
+ }
500
+
501
+ function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
502
+ var bestDelim, bestDelta, fieldCountPrevRow;
503
+ delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
504
+
505
+ for (var i = 0; i < delimitersToGuess.length; i++) {
506
+ var delim = delimitersToGuess[i];
507
+ var delta = 0,
508
+ avgFieldCount = 0,
509
+ emptyLinesCount = 0;
510
+ fieldCountPrevRow = undefined;
511
+ var preview = new Parser({
512
+ comments: comments,
513
+ delimiter: delim,
514
+ newline: newline,
515
+ preview: 10
516
+ }).parse(input);
517
+
518
+ for (var j = 0; j < preview.data.length; j++) {
519
+ if (skipEmptyLines && testEmptyLine(preview.data[j])) {
520
+ emptyLinesCount++;
521
+ continue;
597
522
  }
598
- _config.delimiter = bestDelim;
599
- return {
600
- successful: !!bestDelim,
601
- bestDelimiter: bestDelim
602
- };
603
- }
604
- function guessLineEndings(input, quoteChar) {
605
- input = input.substr(0, 1024 * 1024); // max length 1 MB
606
- // Replace all the text inside quotes
607
- var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
608
- input = input.replace(re, '');
609
- var r = input.split('\r');
610
- var n = input.split('\n');
611
- var nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
612
- if (r.length === 1 || nAppearsFirst)
613
- return '\n';
614
- var numWithN = 0;
615
- for (var i = 0; i < r.length; i++) {
616
- if (r[i][0] === '\n')
617
- numWithN++;
523
+
524
+ var fieldCount = preview.data[j].length;
525
+ avgFieldCount += fieldCount;
526
+
527
+ if (typeof fieldCountPrevRow === 'undefined') {
528
+ fieldCountPrevRow = 0;
529
+ continue;
530
+ } else if (fieldCount > 1) {
531
+ delta += Math.abs(fieldCount - fieldCountPrevRow);
532
+ fieldCountPrevRow = fieldCount;
618
533
  }
619
- return numWithN >= r.length / 2 ? '\r\n' : '\r';
534
+ }
535
+
536
+ if (preview.data.length > 0) avgFieldCount /= preview.data.length - emptyLinesCount;
537
+
538
+ if ((typeof bestDelta === 'undefined' || delta > bestDelta) && avgFieldCount > 1.99) {
539
+ bestDelta = delta;
540
+ bestDelim = delim;
541
+ }
620
542
  }
621
- function addError(type, code, msg, row) {
622
- _results.errors.push({
623
- type: type,
624
- code: code,
625
- message: msg,
626
- row: row
627
- });
543
+
544
+ _config.delimiter = bestDelim;
545
+ return {
546
+ successful: !!bestDelim,
547
+ bestDelimiter: bestDelim
548
+ };
549
+ }
550
+
551
+ function guessLineEndings(input, quoteChar) {
552
+ input = input.substr(0, 1024 * 1024);
553
+ var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
554
+ input = input.replace(re, '');
555
+ var r = input.split('\r');
556
+ var n = input.split('\n');
557
+ var nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
558
+ if (r.length === 1 || nAppearsFirst) return '\n';
559
+ var numWithN = 0;
560
+
561
+ for (var i = 0; i < r.length; i++) {
562
+ if (r[i][0] === '\n') numWithN++;
628
563
  }
564
+
565
+ return numWithN >= r.length / 2 ? '\r\n' : '\r';
566
+ }
567
+
568
+ function addError(type, code, msg, row) {
569
+ _results.errors.push({
570
+ type: type,
571
+ code: code,
572
+ message: msg,
573
+ row: row
574
+ });
575
+ }
629
576
  }
630
- /** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
577
+
631
578
  function escapeRegExp(string) {
632
- return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
579
+ return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
633
580
  }
634
- /** The core parser implements speedy and correct CSV parsing */
581
+
635
582
  function Parser(config) {
636
- // Unpack the config object
637
- config = config || {};
638
- var delim = config.delimiter;
639
- var newline = config.newline;
640
- var comments = config.comments;
641
- var step = config.step;
642
- var preview = config.preview;
643
- var fastMode = config.fastMode;
644
- var quoteChar;
645
- /** Allows for no quoteChar by setting quoteChar to undefined in config */
646
- if (config.quoteChar === undefined) {
647
- quoteChar = '"';
648
- }
649
- else {
650
- quoteChar = config.quoteChar;
651
- }
652
- var escapeChar = quoteChar;
653
- if (config.escapeChar !== undefined) {
654
- escapeChar = config.escapeChar;
655
- }
656
- // Delimiter must be valid
657
- if (typeof delim !== 'string' || Papa.BAD_DELIMITERS.indexOf(delim) > -1)
658
- delim = ',';
659
- // Comment character must be valid
660
- if (comments === delim)
661
- throw new Error('Comment character same as delimiter');
662
- else if (comments === true)
663
- comments = '#';
664
- else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1)
665
- comments = false;
666
- // Newline must be valid: \r, \n, or \r\n
667
- if (newline !== '\n' && newline !== '\r' && newline !== '\r\n')
668
- newline = '\n';
669
- // We're gonna need these at the Parser scope
670
- var cursor = 0;
671
- var aborted = false;
672
- this.parse = function (input, baseIndex, ignoreLastRow) {
673
- // For some reason, in Chrome, this speeds things up (!?)
674
- if (typeof input !== 'string')
675
- throw new Error('Input must be a string');
676
- // We don't need to compute some of these every time parse() is called,
677
- // but having them in a more local scope seems to perform better
678
- var inputLen = input.length, delimLen = delim.length, newlineLen = newline.length, commentsLen = comments.length;
679
- var stepIsFunction = isFunction(step);
680
- // Establish starting state
681
- cursor = 0;
682
- var data = [], errors = [], row = [], lastCursor = 0;
683
- if (!input)
684
- return returnable();
685
- if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1)) {
686
- var rows = input.split(newline);
687
- for (var i = 0; i < rows.length; i++) {
688
- row = rows[i];
689
- cursor += row.length;
690
- if (i !== rows.length - 1)
691
- cursor += newline.length;
692
- else if (ignoreLastRow)
693
- return returnable();
694
- if (comments && row.substr(0, commentsLen) === comments)
695
- continue;
696
- if (stepIsFunction) {
697
- data = [];
698
- pushRow(row.split(delim));
699
- doStep();
700
- if (aborted)
701
- return returnable();
702
- }
703
- else
704
- pushRow(row.split(delim));
705
- if (preview && i >= preview) {
706
- data = data.slice(0, preview);
707
- return returnable(true);
708
- }
709
- }
710
- return returnable();
583
+ config = config || {};
584
+ var delim = config.delimiter;
585
+ var newline = config.newline;
586
+ var comments = config.comments;
587
+ var step = config.step;
588
+ var preview = config.preview;
589
+ var fastMode = config.fastMode;
590
+ var quoteChar;
591
+
592
+ if (config.quoteChar === undefined) {
593
+ quoteChar = '"';
594
+ } else {
595
+ quoteChar = config.quoteChar;
596
+ }
597
+
598
+ var escapeChar = quoteChar;
599
+
600
+ if (config.escapeChar !== undefined) {
601
+ escapeChar = config.escapeChar;
602
+ }
603
+
604
+ if (typeof delim !== 'string' || Papa.BAD_DELIMITERS.indexOf(delim) > -1) delim = ',';
605
+ if (comments === delim) throw new Error('Comment character same as delimiter');else if (comments === true) comments = '#';else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1) comments = false;
606
+ if (newline !== '\n' && newline !== '\r' && newline !== '\r\n') newline = '\n';
607
+ var cursor = 0;
608
+ var aborted = false;
609
+
610
+ this.parse = function (input, baseIndex, ignoreLastRow) {
611
+ if (typeof input !== 'string') throw new Error('Input must be a string');
612
+ var inputLen = input.length,
613
+ delimLen = delim.length,
614
+ newlineLen = newline.length,
615
+ commentsLen = comments.length;
616
+ var stepIsFunction = isFunction(step);
617
+ cursor = 0;
618
+ var data = [],
619
+ errors = [],
620
+ row = [],
621
+ lastCursor = 0;
622
+ if (!input) return returnable();
623
+
624
+ if (fastMode || fastMode !== false && input.indexOf(quoteChar) === -1) {
625
+ var rows = input.split(newline);
626
+
627
+ for (var i = 0; i < rows.length; i++) {
628
+ row = rows[i];
629
+ cursor += row.length;
630
+ if (i !== rows.length - 1) cursor += newline.length;else if (ignoreLastRow) return returnable();
631
+ if (comments && row.substr(0, commentsLen) === comments) continue;
632
+
633
+ if (stepIsFunction) {
634
+ data = [];
635
+ pushRow(row.split(delim));
636
+ doStep();
637
+ if (aborted) return returnable();
638
+ } else pushRow(row.split(delim));
639
+
640
+ if (preview && i >= preview) {
641
+ data = data.slice(0, preview);
642
+ return returnable(true);
711
643
  }
712
- var nextDelim = input.indexOf(delim, cursor);
713
- var nextNewline = input.indexOf(newline, cursor);
714
- var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
715
- var quoteSearch;
716
- // Parser loop
644
+ }
645
+
646
+ return returnable();
647
+ }
648
+
649
+ var nextDelim = input.indexOf(delim, cursor);
650
+ var nextNewline = input.indexOf(newline, cursor);
651
+ var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
652
+ var quoteSearch;
653
+
654
+ for (;;) {
655
+ if (input[cursor] === quoteChar) {
656
+ quoteSearch = cursor;
657
+ cursor++;
658
+
717
659
  for (;;) {
718
- // Field has opening quote
719
- if (input[cursor] === quoteChar) {
720
- // Start our search for the closing quote where the cursor is
721
- quoteSearch = cursor;
722
- // Skip the opening quote
723
- cursor++;
724
- for (;;) {
725
- // Find closing quote
726
- quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
727
- //No other quotes are found - no other delimiters
728
- if (quoteSearch === -1) {
729
- if (!ignoreLastRow) {
730
- // No closing quote... what a pity
731
- errors.push({
732
- type: 'Quotes',
733
- code: 'MissingQuotes',
734
- message: 'Quoted field unterminated',
735
- row: data.length,
736
- index: cursor
737
- });
738
- }
739
- return finish();
740
- }
741
- // Closing quote at EOF
742
- if (quoteSearch === inputLen - 1) {
743
- var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
744
- return finish(value);
745
- }
746
- // If this quote is escaped, it's part of the data; skip it
747
- // If the quote character is the escape character, then check if the next character is the escape character
748
- if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar) {
749
- quoteSearch++;
750
- continue;
751
- }
752
- // If the quote character is not the escape character, then check if the previous character was the escape character
753
- if (quoteChar !== escapeChar &&
754
- quoteSearch !== 0 &&
755
- input[quoteSearch - 1] === escapeChar) {
756
- continue;
757
- }
758
- // Check up to nextDelim or nextNewline, whichever is closest
759
- var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
760
- var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
761
- // Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
762
- if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) {
763
- row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
764
- cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
765
- nextDelim = input.indexOf(delim, cursor);
766
- nextNewline = input.indexOf(newline, cursor);
767
- if (stepIsFunction) {
768
- doStep();
769
- if (aborted)
770
- return returnable();
771
- }
772
- if (preview && data.length >= preview)
773
- return returnable(true);
774
- break;
775
- }
776
- var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
777
- // Closing quote followed by newline or 'unnecessary spaces + newLine'
778
- if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline) {
779
- row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
780
- saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
781
- nextDelim = input.indexOf(delim, cursor); // because we may have skipped the nextDelim in the quoted field
782
- if (stepIsFunction) {
783
- doStep();
784
- if (aborted)
785
- return returnable();
786
- }
787
- if (preview && data.length >= preview)
788
- return returnable(true);
789
- break;
790
- }
791
- // Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
792
- errors.push({
793
- type: 'Quotes',
794
- code: 'InvalidQuotes',
795
- message: 'Trailing quote on quoted field is malformed',
796
- row: data.length,
797
- index: cursor
798
- });
799
- quoteSearch++;
800
- continue;
801
- }
802
- if (stepIsFunction) {
803
- doStep();
804
- if (aborted)
805
- return returnable();
806
- }
807
- if (preview && data.length >= preview)
808
- return returnable(true);
809
- continue;
810
- }
811
- // Comment found at start of new line
812
- if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) {
813
- if (nextNewline === -1)
814
- // Comment ends at EOF
815
- return returnable();
816
- cursor = nextNewline + newlineLen;
817
- nextNewline = input.indexOf(newline, cursor);
818
- nextDelim = input.indexOf(delim, cursor);
819
- continue;
820
- }
821
- // Next delimiter comes before next newline, so we've reached end of field
822
- if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) {
823
- row.push(input.substring(cursor, nextDelim));
824
- cursor = nextDelim + delimLen;
825
- nextDelim = input.indexOf(delim, cursor);
826
- continue;
660
+ quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
661
+
662
+ if (quoteSearch === -1) {
663
+ if (!ignoreLastRow) {
664
+ errors.push({
665
+ type: 'Quotes',
666
+ code: 'MissingQuotes',
667
+ message: 'Quoted field unterminated',
668
+ row: data.length,
669
+ index: cursor
670
+ });
827
671
  }
828
- // End of row
829
- if (nextNewline !== -1) {
830
- row.push(input.substring(cursor, nextNewline));
831
- saveRow(nextNewline + newlineLen);
832
- if (stepIsFunction) {
833
- doStep();
834
- if (aborted)
835
- return returnable();
836
- }
837
- if (preview && data.length >= preview)
838
- return returnable(true);
839
- continue;
672
+
673
+ return finish();
674
+ }
675
+
676
+ if (quoteSearch === inputLen - 1) {
677
+ var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
678
+ return finish(value);
679
+ }
680
+
681
+ if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar) {
682
+ quoteSearch++;
683
+ continue;
684
+ }
685
+
686
+ if (quoteChar !== escapeChar && quoteSearch !== 0 && input[quoteSearch - 1] === escapeChar) {
687
+ continue;
688
+ }
689
+
690
+ var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
691
+ var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
692
+
693
+ if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) {
694
+ row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
695
+ cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
696
+ nextDelim = input.indexOf(delim, cursor);
697
+ nextNewline = input.indexOf(newline, cursor);
698
+
699
+ if (stepIsFunction) {
700
+ doStep();
701
+ if (aborted) return returnable();
840
702
  }
703
+
704
+ if (preview && data.length >= preview) return returnable(true);
841
705
  break;
842
- }
843
- return finish();
844
- function pushRow(row) {
845
- data.push(row);
846
- lastCursor = cursor;
847
- }
848
- /**
849
- * checks if there are extra spaces after closing quote and given index without any text
850
- * if Yes, returns the number of spaces
851
- */
852
- function extraSpaces(index) {
853
- var spaceLength = 0;
854
- if (index !== -1) {
855
- var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index);
856
- if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') {
857
- spaceLength = textBetweenClosingQuoteAndIndex.length;
858
- }
706
+ }
707
+
708
+ var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
709
+
710
+ if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline) {
711
+ row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
712
+ saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
713
+ nextDelim = input.indexOf(delim, cursor);
714
+
715
+ if (stepIsFunction) {
716
+ doStep();
717
+ if (aborted) return returnable();
859
718
  }
860
- return spaceLength;
719
+
720
+ if (preview && data.length >= preview) return returnable(true);
721
+ break;
722
+ }
723
+
724
+ errors.push({
725
+ type: 'Quotes',
726
+ code: 'InvalidQuotes',
727
+ message: 'Trailing quote on quoted field is malformed',
728
+ row: data.length,
729
+ index: cursor
730
+ });
731
+ quoteSearch++;
732
+ continue;
861
733
  }
862
- /**
863
- * Appends the remaining input from cursor to the end into
864
- * row, saves the row, calls step, and returns the results.
865
- */
866
- function finish(value) {
867
- if (ignoreLastRow)
868
- return returnable();
869
- if (typeof value === 'undefined')
870
- value = input.substr(cursor);
871
- row.push(value);
872
- cursor = inputLen; // important in case parsing is paused
873
- pushRow(row);
874
- if (stepIsFunction)
875
- doStep();
876
- return returnable();
734
+
735
+ if (stepIsFunction) {
736
+ doStep();
737
+ if (aborted) return returnable();
877
738
  }
878
- /**
879
- * Appends the current row to the results. It sets the cursor
880
- * to newCursor and finds the nextNewline. The caller should
881
- * take care to execute user's step function and check for
882
- * preview and end parsing if necessary.
883
- */
884
- function saveRow(newCursor) {
885
- cursor = newCursor;
886
- pushRow(row);
887
- row = [];
888
- nextNewline = input.indexOf(newline, cursor);
739
+
740
+ if (preview && data.length >= preview) return returnable(true);
741
+ continue;
742
+ }
743
+
744
+ if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) {
745
+ if (nextNewline === -1) return returnable();
746
+ cursor = nextNewline + newlineLen;
747
+ nextNewline = input.indexOf(newline, cursor);
748
+ nextDelim = input.indexOf(delim, cursor);
749
+ continue;
750
+ }
751
+
752
+ if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) {
753
+ row.push(input.substring(cursor, nextDelim));
754
+ cursor = nextDelim + delimLen;
755
+ nextDelim = input.indexOf(delim, cursor);
756
+ continue;
757
+ }
758
+
759
+ if (nextNewline !== -1) {
760
+ row.push(input.substring(cursor, nextNewline));
761
+ saveRow(nextNewline + newlineLen);
762
+
763
+ if (stepIsFunction) {
764
+ doStep();
765
+ if (aborted) return returnable();
889
766
  }
890
- /** Returns an object with the results, errors, and meta. */
891
- function returnable(stopped, step) {
892
- var isStep = step || false;
893
- return {
894
- data: isStep ? data[0] : data,
895
- errors: errors,
896
- meta: {
897
- delimiter: delim,
898
- linebreak: newline,
899
- aborted: aborted,
900
- truncated: !!stopped,
901
- cursor: lastCursor + (baseIndex || 0)
902
- }
903
- };
767
+
768
+ if (preview && data.length >= preview) return returnable(true);
769
+ continue;
770
+ }
771
+
772
+ break;
773
+ }
774
+
775
+ return finish();
776
+
777
+ function pushRow(row) {
778
+ data.push(row);
779
+ lastCursor = cursor;
780
+ }
781
+
782
+ function extraSpaces(index) {
783
+ var spaceLength = 0;
784
+
785
+ if (index !== -1) {
786
+ var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index);
787
+
788
+ if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') {
789
+ spaceLength = textBetweenClosingQuoteAndIndex.length;
904
790
  }
905
- /** Executes the user's step function and resets data & errors. */
906
- function doStep() {
907
- step(returnable(undefined, true));
908
- data = [];
909
- errors = [];
791
+ }
792
+
793
+ return spaceLength;
794
+ }
795
+
796
+ function finish(value) {
797
+ if (ignoreLastRow) return returnable();
798
+ if (typeof value === 'undefined') value = input.substr(cursor);
799
+ row.push(value);
800
+ cursor = inputLen;
801
+ pushRow(row);
802
+ if (stepIsFunction) doStep();
803
+ return returnable();
804
+ }
805
+
806
+ function saveRow(newCursor) {
807
+ cursor = newCursor;
808
+ pushRow(row);
809
+ row = [];
810
+ nextNewline = input.indexOf(newline, cursor);
811
+ }
812
+
813
+ function returnable(stopped, step) {
814
+ var isStep = step || false;
815
+ return {
816
+ data: isStep ? data[0] : data,
817
+ errors: errors,
818
+ meta: {
819
+ delimiter: delim,
820
+ linebreak: newline,
821
+ aborted: aborted,
822
+ truncated: !!stopped,
823
+ cursor: lastCursor + (baseIndex || 0)
910
824
  }
911
- };
912
- /** Sets the abort flag */
913
- this.abort = function () {
914
- aborted = true;
915
- };
916
- /** Gets the cursor position */
917
- this.getCharIndex = function () {
918
- return cursor;
919
- };
825
+ };
826
+ }
827
+
828
+ function doStep() {
829
+ step(returnable(undefined, true));
830
+ data = [];
831
+ errors = [];
832
+ }
833
+ };
834
+
835
+ this.abort = function () {
836
+ aborted = true;
837
+ };
838
+
839
+ this.getCharIndex = function () {
840
+ return cursor;
841
+ };
920
842
  }
843
+
921
844
  function notImplemented() {
922
- throw new Error('Not implemented.');
845
+ throw new Error('Not implemented.');
923
846
  }
924
- /** Makes a deep copy of an array or object (mostly) */
847
+
925
848
  function copy(obj) {
926
- if (typeof obj !== 'object' || obj === null)
927
- return obj;
928
- var cpy = Array.isArray(obj) ? [] : {};
929
- for (var key in obj)
930
- cpy[key] = copy(obj[key]);
931
- return cpy;
849
+ if (typeof obj !== 'object' || obj === null) return obj;
850
+ var cpy = Array.isArray(obj) ? [] : {};
851
+
852
+ for (var key in obj) cpy[key] = copy(obj[key]);
853
+
854
+ return cpy;
932
855
  }
856
+
933
857
  function isFunction(func) {
934
- return typeof func === 'function';
858
+ return typeof func === 'function';
935
859
  }
860
+ //# sourceMappingURL=papaparse.js.map