@loaders.gl/csv 4.0.0-alpha.4 → 4.0.0-alpha.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bundle.d.ts +2 -0
- package/dist/bundle.d.ts.map +1 -0
- package/dist/csv-loader.d.ts +34 -0
- package/dist/csv-loader.d.ts.map +1 -0
- package/dist/csv-loader.js +3 -3
- package/dist/csv-loader.js.map +1 -1
- package/dist/csv-writer.d.ts +1 -0
- package/dist/csv-writer.d.ts.map +1 -0
- package/dist/dist.min.js +1503 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/papaparse/async-iterator-streamer.d.ts +6 -0
- package/dist/papaparse/async-iterator-streamer.d.ts.map +1 -0
- package/dist/papaparse/async-iterator-streamer.js +1 -1
- package/dist/papaparse/async-iterator-streamer.js.map +1 -1
- package/dist/papaparse/papaparse.d.ts +30 -0
- package/dist/papaparse/papaparse.d.ts.map +1 -0
- package/dist/{libs → papaparse}/papaparse.js +162 -404
- package/dist/papaparse/papaparse.js.map +1 -0
- package/package.json +6 -6
- package/src/csv-loader.ts +2 -2
- package/src/{lib → papaparse}/async-iterator-streamer.ts +2 -2
- package/src/{libs/papaparse.js → papaparse/papaparse.ts} +48 -73
- package/dist/lib/async-iterator-streamer.js +0 -35
- package/dist/lib/async-iterator-streamer.js.map +0 -1
- package/src/papaparse/async-iterator-streamer.js +0 -71
|
@@ -1,231 +1,121 @@
|
|
|
1
|
-
// This is a fork of papaparse
|
|
2
|
-
// https://github.com/mholt/PapaParse
|
|
3
1
|
/* @license
|
|
4
2
|
Papa Parse
|
|
5
3
|
v5.0.0-beta.0
|
|
6
4
|
https://github.com/mholt/PapaParse
|
|
7
5
|
License: MIT
|
|
8
6
|
*/
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
return global;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
// When running tests none of the above have been defined
|
|
34
|
-
return {};
|
|
35
|
-
})();
|
|
36
|
-
|
|
37
|
-
var IS_PAPA_WORKER = false;
|
|
38
|
-
|
|
39
|
-
var Papa = {};
|
|
40
|
-
module.exports = Papa;
|
|
41
|
-
Papa.parse = CsvToJson;
|
|
42
|
-
Papa.unparse = JsonToCsv;
|
|
43
|
-
|
|
44
|
-
Papa.RECORD_SEP = String.fromCharCode(30);
|
|
45
|
-
Papa.UNIT_SEP = String.fromCharCode(31);
|
|
46
|
-
Papa.BYTE_ORDER_MARK = '\ufeff';
|
|
47
|
-
Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK];
|
|
48
|
-
Papa.WORKERS_SUPPORTED = false; // !IS_WORKER && !!global.Worker;
|
|
49
|
-
Papa.NODE_STREAM_INPUT = 1;
|
|
50
|
-
|
|
51
|
-
// Configurable chunk sizes for local and remote files, respectively
|
|
52
|
-
Papa.LocalChunkSize = 1024 * 1024 * 10; // 10 MB
|
|
53
|
-
Papa.RemoteChunkSize = 1024 * 1024 * 5; // 5 MB
|
|
54
|
-
Papa.DefaultDelimiter = ','; // Used if not specified and detection fails
|
|
55
|
-
|
|
56
|
-
// Exposed for testing and development only
|
|
57
|
-
Papa.Parser = Parser;
|
|
58
|
-
Papa.ParserHandle = ParserHandle;
|
|
59
|
-
|
|
60
|
-
// BEGIN FORK
|
|
61
|
-
Papa.ChunkStreamer = ChunkStreamer;
|
|
62
|
-
Papa.StringStreamer = StringStreamer;
|
|
63
|
-
/*
|
|
64
|
-
Papa.NetworkStreamer = NetworkStreamer;
|
|
65
|
-
Papa.FileStreamer = FileStreamer;
|
|
66
|
-
Papa.ReadableStreamStreamer = ReadableStreamStreamer;
|
|
67
|
-
if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
|
|
68
|
-
Papa.DuplexStreamStreamer = DuplexStreamStreamer;
|
|
69
|
-
}
|
|
70
|
-
*/
|
|
71
|
-
// END FORK
|
|
72
|
-
|
|
73
|
-
// BEGIN FORK
|
|
74
|
-
// Adds an argument to papa.parse
|
|
75
|
-
// function CsvToJson(_input, _config)
|
|
76
|
-
function CsvToJson(
|
|
77
|
-
_input,
|
|
78
|
-
_config,
|
|
79
|
-
UserDefinedStreamer // BEGIN FORK
|
|
80
|
-
) {
|
|
7
|
+
const BYTE_ORDER_MARK = '\ufeff';
|
|
8
|
+
const Papa = {
|
|
9
|
+
parse: CsvToJson,
|
|
10
|
+
unparse: JsonToCsv,
|
|
11
|
+
RECORD_SEP: String.fromCharCode(30),
|
|
12
|
+
UNIT_SEP: String.fromCharCode(31),
|
|
13
|
+
BYTE_ORDER_MARK,
|
|
14
|
+
BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
|
|
15
|
+
WORKERS_SUPPORTED: false,
|
|
16
|
+
NODE_STREAM_INPUT: 1,
|
|
17
|
+
LocalChunkSize: 1024 * 1024 * 10,
|
|
18
|
+
RemoteChunkSize: 1024 * 1024 * 5,
|
|
19
|
+
DefaultDelimiter: ',',
|
|
20
|
+
Parser: Parser,
|
|
21
|
+
ParserHandle: ParserHandle,
|
|
22
|
+
ChunkStreamer: ChunkStreamer,
|
|
23
|
+
StringStreamer: StringStreamer
|
|
24
|
+
};
|
|
25
|
+
export default Papa;
|
|
26
|
+
|
|
27
|
+
function CsvToJson(_input, _config, UserDefinedStreamer) {
|
|
81
28
|
_config = _config || {};
|
|
82
29
|
var dynamicTyping = _config.dynamicTyping || false;
|
|
30
|
+
|
|
83
31
|
if (isFunction(dynamicTyping)) {
|
|
84
32
|
_config.dynamicTypingFunction = dynamicTyping;
|
|
85
|
-
// Will be filled on first row call
|
|
86
33
|
dynamicTyping = {};
|
|
87
34
|
}
|
|
88
|
-
_config.dynamicTyping = dynamicTyping;
|
|
89
35
|
|
|
36
|
+
_config.dynamicTyping = dynamicTyping;
|
|
90
37
|
_config.transform = isFunction(_config.transform) ? _config.transform : false;
|
|
91
38
|
|
|
92
39
|
if (_config.worker && Papa.WORKERS_SUPPORTED) {
|
|
93
40
|
var w = newWorker();
|
|
94
|
-
|
|
95
41
|
w.userStep = _config.step;
|
|
96
42
|
w.userChunk = _config.chunk;
|
|
97
43
|
w.userComplete = _config.complete;
|
|
98
44
|
w.userError = _config.error;
|
|
99
|
-
|
|
100
45
|
_config.step = isFunction(_config.step);
|
|
101
46
|
_config.chunk = isFunction(_config.chunk);
|
|
102
47
|
_config.complete = isFunction(_config.complete);
|
|
103
48
|
_config.error = isFunction(_config.error);
|
|
104
|
-
delete _config.worker;
|
|
105
|
-
|
|
49
|
+
delete _config.worker;
|
|
106
50
|
w.postMessage({
|
|
107
51
|
input: _input,
|
|
108
52
|
config: _config,
|
|
109
53
|
workerId: w.id
|
|
110
54
|
});
|
|
111
|
-
|
|
112
55
|
return;
|
|
113
56
|
}
|
|
114
57
|
|
|
115
58
|
var streamer = null;
|
|
116
|
-
|
|
117
|
-
if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined') {
|
|
118
|
-
// create a node Duplex stream for use
|
|
119
|
-
// with .pipe
|
|
120
|
-
streamer = new DuplexStreamStreamer(_config);
|
|
121
|
-
return streamer.getStream();
|
|
122
|
-
} else
|
|
123
|
-
*/
|
|
59
|
+
|
|
124
60
|
if (typeof _input === 'string') {
|
|
125
|
-
// if (_config.download) streamer = new NetworkStreamer(_config);
|
|
126
|
-
// else
|
|
127
61
|
streamer = new StringStreamer(_config);
|
|
128
62
|
}
|
|
129
|
-
|
|
130
|
-
else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on)) {
|
|
131
|
-
streamer = new ReadableStreamStreamer(_config);
|
|
132
|
-
} else if ((global.File && _input instanceof File) || _input instanceof Object)
|
|
133
|
-
// ...Safari. (see issue #106)
|
|
134
|
-
streamer = new FileStreamer(_config);
|
|
135
|
-
*/
|
|
136
|
-
|
|
137
|
-
// BEGIN FORK
|
|
63
|
+
|
|
138
64
|
if (!streamer) {
|
|
139
65
|
streamer = new UserDefinedStreamer(_config);
|
|
140
66
|
}
|
|
141
|
-
// END FORK
|
|
142
67
|
|
|
143
68
|
return streamer.stream(_input);
|
|
144
69
|
}
|
|
145
70
|
|
|
146
71
|
function JsonToCsv(_input, _config) {
|
|
147
|
-
// Default configuration
|
|
148
|
-
|
|
149
|
-
/** whether to surround every datum with quotes */
|
|
150
72
|
var _quotes = false;
|
|
151
|
-
|
|
152
|
-
/** whether to write headers */
|
|
153
73
|
var _writeHeader = true;
|
|
154
|
-
|
|
155
|
-
/** delimiting character(s) */
|
|
156
74
|
var _delimiter = ',';
|
|
157
|
-
|
|
158
|
-
/** newline character(s) */
|
|
159
75
|
var _newline = '\r\n';
|
|
160
|
-
|
|
161
|
-
/** quote character */
|
|
162
76
|
var _quoteChar = '"';
|
|
163
77
|
|
|
164
|
-
/** escaped quote character, either "" or <config.escapeChar>" */
|
|
165
78
|
var _escapedQuote = _quoteChar + _quoteChar;
|
|
166
79
|
|
|
167
|
-
/** whether to skip empty lines */
|
|
168
80
|
var _skipEmptyLines = false;
|
|
169
|
-
|
|
170
|
-
/** the columns (keys) we expect when we unparse objects */
|
|
171
81
|
var _columns = null;
|
|
172
|
-
|
|
173
82
|
unpackConfig();
|
|
174
|
-
|
|
175
83
|
var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g');
|
|
176
|
-
|
|
177
84
|
if (typeof _input === 'string') _input = JSON.parse(_input);
|
|
178
85
|
|
|
179
86
|
if (Array.isArray(_input)) {
|
|
180
|
-
if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines);
|
|
181
|
-
else if (typeof _input[0] === 'object')
|
|
182
|
-
return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines);
|
|
87
|
+
if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines);else if (typeof _input[0] === 'object') return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines);
|
|
183
88
|
} else if (typeof _input === 'object') {
|
|
184
89
|
if (typeof _input.data === 'string') _input.data = JSON.parse(_input.data);
|
|
185
90
|
|
|
186
91
|
if (Array.isArray(_input.data)) {
|
|
187
92
|
if (!_input.fields) _input.fields = _input.meta && _input.meta.fields;
|
|
188
|
-
|
|
189
|
-
if (!_input.
|
|
190
|
-
_input.fields = Array.isArray(_input.data[0]) ? _input.fields : objectKeys(_input.data[0]);
|
|
191
|
-
|
|
192
|
-
if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object')
|
|
193
|
-
_input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
|
|
93
|
+
if (!_input.fields) _input.fields = Array.isArray(_input.data[0]) ? _input.fields : objectKeys(_input.data[0]);
|
|
94
|
+
if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object') _input.data = [_input.data];
|
|
194
95
|
}
|
|
195
96
|
|
|
196
97
|
return serialize(_input.fields || [], _input.data || [], _skipEmptyLines);
|
|
197
98
|
}
|
|
198
99
|
|
|
199
|
-
// Default (any valid paths should return before this)
|
|
200
100
|
throw new Error('Unable to serialize unrecognized input');
|
|
201
101
|
|
|
202
102
|
function unpackConfig() {
|
|
203
103
|
if (typeof _config !== 'object') return;
|
|
204
104
|
|
|
205
|
-
if (
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
return _config.delimiter.indexOf(value) !== -1;
|
|
209
|
-
}).length
|
|
210
|
-
) {
|
|
105
|
+
if (typeof _config.delimiter === 'string' && !Papa.BAD_DELIMITERS.filter(function (value) {
|
|
106
|
+
return _config.delimiter.indexOf(value) !== -1;
|
|
107
|
+
}).length) {
|
|
211
108
|
_delimiter = _config.delimiter;
|
|
212
109
|
}
|
|
213
110
|
|
|
214
|
-
if (typeof _config.quotes === 'boolean' || Array.isArray(_config.quotes))
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string')
|
|
218
|
-
_skipEmptyLines = _config.skipEmptyLines;
|
|
219
|
-
|
|
111
|
+
if (typeof _config.quotes === 'boolean' || Array.isArray(_config.quotes)) _quotes = _config.quotes;
|
|
112
|
+
if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string') _skipEmptyLines = _config.skipEmptyLines;
|
|
220
113
|
if (typeof _config.newline === 'string') _newline = _config.newline;
|
|
221
|
-
|
|
222
114
|
if (typeof _config.quoteChar === 'string') _quoteChar = _config.quoteChar;
|
|
223
|
-
|
|
224
115
|
if (typeof _config.header === 'boolean') _writeHeader = _config.header;
|
|
225
116
|
|
|
226
117
|
if (Array.isArray(_config.columns)) {
|
|
227
118
|
if (_config.columns.length === 0) throw new Error('Option columns is empty');
|
|
228
|
-
|
|
229
119
|
_columns = _config.columns;
|
|
230
120
|
}
|
|
231
121
|
|
|
@@ -234,93 +124,82 @@ function JsonToCsv(_input, _config) {
|
|
|
234
124
|
}
|
|
235
125
|
}
|
|
236
126
|
|
|
237
|
-
/** Turns an object's keys into an array */
|
|
238
127
|
function objectKeys(obj) {
|
|
239
128
|
if (typeof obj !== 'object') return [];
|
|
240
129
|
var keys = [];
|
|
130
|
+
|
|
241
131
|
for (var key in obj) keys.push(key);
|
|
132
|
+
|
|
242
133
|
return keys;
|
|
243
134
|
}
|
|
244
135
|
|
|
245
|
-
/** The double for loop that iterates the data and writes out a CSV string including header row */
|
|
246
136
|
function serialize(fields, data, skipEmptyLines) {
|
|
247
137
|
var csv = '';
|
|
248
|
-
|
|
249
138
|
if (typeof fields === 'string') fields = JSON.parse(fields);
|
|
250
139
|
if (typeof data === 'string') data = JSON.parse(data);
|
|
251
|
-
|
|
252
140
|
var hasHeader = Array.isArray(fields) && fields.length > 0;
|
|
253
141
|
var dataKeyedByField = !Array.isArray(data[0]);
|
|
254
142
|
|
|
255
|
-
// If there a header row, write it first
|
|
256
143
|
if (hasHeader && _writeHeader) {
|
|
257
144
|
for (var i = 0; i < fields.length; i++) {
|
|
258
145
|
if (i > 0) csv += _delimiter;
|
|
259
146
|
csv += safe(fields[i], i);
|
|
260
147
|
}
|
|
148
|
+
|
|
261
149
|
if (data.length > 0) csv += _newline;
|
|
262
150
|
}
|
|
263
151
|
|
|
264
|
-
// Then write out the data
|
|
265
152
|
for (var row = 0; row < data.length; row++) {
|
|
266
153
|
var maxCol = hasHeader ? fields.length : data[row].length;
|
|
267
|
-
|
|
268
154
|
var emptyLine = false;
|
|
269
155
|
var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0;
|
|
156
|
+
|
|
270
157
|
if (skipEmptyLines && !hasHeader) {
|
|
271
|
-
emptyLine =
|
|
272
|
-
skipEmptyLines === 'greedy'
|
|
273
|
-
? data[row].join('').trim() === ''
|
|
274
|
-
: data[row].length === 1 && data[row][0].length === 0;
|
|
158
|
+
emptyLine = skipEmptyLines === 'greedy' ? data[row].join('').trim() === '' : data[row].length === 1 && data[row][0].length === 0;
|
|
275
159
|
}
|
|
160
|
+
|
|
276
161
|
if (skipEmptyLines === 'greedy' && hasHeader) {
|
|
277
162
|
var line = [];
|
|
163
|
+
|
|
278
164
|
for (var c = 0; c < maxCol; c++) {
|
|
279
165
|
var cx = dataKeyedByField ? fields[c] : c;
|
|
280
166
|
line.push(data[row][cx]);
|
|
281
167
|
}
|
|
168
|
+
|
|
282
169
|
emptyLine = line.join('').trim() === '';
|
|
283
170
|
}
|
|
171
|
+
|
|
284
172
|
if (!emptyLine) {
|
|
285
173
|
for (var col = 0; col < maxCol; col++) {
|
|
286
174
|
if (col > 0 && !nullLine) csv += _delimiter;
|
|
287
175
|
var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
|
|
288
176
|
csv += safe(data[row][colIdx], col);
|
|
289
177
|
}
|
|
290
|
-
|
|
178
|
+
|
|
179
|
+
if (row < data.length - 1 && (!skipEmptyLines || maxCol > 0 && !nullLine)) {
|
|
291
180
|
csv += _newline;
|
|
292
181
|
}
|
|
293
182
|
}
|
|
294
183
|
}
|
|
184
|
+
|
|
295
185
|
return csv;
|
|
296
186
|
}
|
|
297
187
|
|
|
298
|
-
/** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
|
|
299
188
|
function safe(str, col) {
|
|
300
189
|
if (typeof str === 'undefined' || str === null) return '';
|
|
301
|
-
|
|
302
190
|
if (str.constructor === Date) return JSON.stringify(str).slice(1, 25);
|
|
303
|
-
|
|
304
191
|
str = str.toString().replace(quoteCharRegex, _escapedQuote);
|
|
305
|
-
|
|
306
|
-
var needsQuotes =
|
|
307
|
-
(typeof _quotes === 'boolean' && _quotes) ||
|
|
308
|
-
(Array.isArray(_quotes) && _quotes[col]) ||
|
|
309
|
-
hasAny(str, Papa.BAD_DELIMITERS) ||
|
|
310
|
-
str.indexOf(_delimiter) > -1 ||
|
|
311
|
-
str.charAt(0) === ' ' ||
|
|
312
|
-
str.charAt(str.length - 1) === ' ';
|
|
313
|
-
|
|
192
|
+
var needsQuotes = typeof _quotes === 'boolean' && _quotes || Array.isArray(_quotes) && _quotes[col] || hasAny(str, Papa.BAD_DELIMITERS) || str.indexOf(_delimiter) > -1 || str.charAt(0) === ' ' || str.charAt(str.length - 1) === ' ';
|
|
314
193
|
return needsQuotes ? _quoteChar + str + _quoteChar : str;
|
|
315
194
|
}
|
|
316
195
|
|
|
317
196
|
function hasAny(str, substrings) {
|
|
318
197
|
for (var i = 0; i < substrings.length; i++) if (str.indexOf(substrings[i]) > -1) return true;
|
|
198
|
+
|
|
319
199
|
return false;
|
|
320
200
|
}
|
|
321
201
|
}
|
|
322
202
|
|
|
323
|
-
/** ChunkStreamer is the base prototype for various streamer implementations. */
|
|
324
203
|
function ChunkStreamer(config) {
|
|
325
204
|
this._handle = null;
|
|
326
205
|
this._finished = false;
|
|
@@ -339,22 +218,20 @@ function ChunkStreamer(config) {
|
|
|
339
218
|
};
|
|
340
219
|
replaceConfig.call(this, config);
|
|
341
220
|
|
|
342
|
-
this.parseChunk = function(chunk, isFakeChunk) {
|
|
343
|
-
// First chunk pre-processing
|
|
221
|
+
this.parseChunk = function (chunk, isFakeChunk) {
|
|
344
222
|
if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
|
|
345
223
|
var modifiedChunk = this._config.beforeFirstChunk(chunk);
|
|
224
|
+
|
|
346
225
|
if (modifiedChunk !== undefined) chunk = modifiedChunk;
|
|
347
226
|
}
|
|
348
|
-
this.isFirstChunk = false;
|
|
349
227
|
|
|
350
|
-
|
|
228
|
+
this.isFirstChunk = false;
|
|
351
229
|
var aggregate = this._partialLine + chunk;
|
|
352
230
|
this._partialLine = '';
|
|
353
231
|
|
|
354
232
|
var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
|
|
355
233
|
|
|
356
234
|
if (this._handle.paused() || this._handle.aborted()) return;
|
|
357
|
-
|
|
358
235
|
var lastIndex = results.meta.cursor;
|
|
359
236
|
|
|
360
237
|
if (!this._finished) {
|
|
@@ -363,18 +240,11 @@ function ChunkStreamer(config) {
|
|
|
363
240
|
}
|
|
364
241
|
|
|
365
242
|
if (results && results.data) this._rowCount += results.data.length;
|
|
243
|
+
var finishedIncludingPreview = this._finished || this._config.preview && this._rowCount >= this._config.preview;
|
|
366
244
|
|
|
367
|
-
|
|
368
|
-
this._finished || (this._config.preview && this._rowCount >= this._config.preview);
|
|
369
|
-
|
|
370
|
-
if (IS_PAPA_WORKER) {
|
|
371
|
-
global.postMessage({
|
|
372
|
-
results: results,
|
|
373
|
-
workerId: Papa.WORKER_ID,
|
|
374
|
-
finished: finishedIncludingPreview
|
|
375
|
-
});
|
|
376
|
-
} else if (isFunction(this._config.chunk) && !isFakeChunk) {
|
|
245
|
+
if (isFunction(this._config.chunk) && !isFakeChunk) {
|
|
377
246
|
this._config.chunk(results, this._handle);
|
|
247
|
+
|
|
378
248
|
if (this._handle.paused() || this._handle.aborted()) return;
|
|
379
249
|
results = undefined;
|
|
380
250
|
this._completeResults = undefined;
|
|
@@ -386,52 +256,41 @@ function ChunkStreamer(config) {
|
|
|
386
256
|
this._completeResults.meta = results.meta;
|
|
387
257
|
}
|
|
388
258
|
|
|
389
|
-
if (
|
|
390
|
-
!this._completed &&
|
|
391
|
-
finishedIncludingPreview &&
|
|
392
|
-
isFunction(this._config.complete) &&
|
|
393
|
-
(!results || !results.meta.aborted)
|
|
394
|
-
) {
|
|
259
|
+
if (!this._completed && finishedIncludingPreview && isFunction(this._config.complete) && (!results || !results.meta.aborted)) {
|
|
395
260
|
this._config.complete(this._completeResults, this._input);
|
|
261
|
+
|
|
396
262
|
this._completed = true;
|
|
397
263
|
}
|
|
398
264
|
|
|
399
265
|
if (!finishedIncludingPreview && (!results || !results.meta.paused)) this._nextChunk();
|
|
400
|
-
|
|
401
266
|
return results;
|
|
402
267
|
};
|
|
403
268
|
|
|
404
|
-
this._sendError = function(error) {
|
|
269
|
+
this._sendError = function (error) {
|
|
405
270
|
if (isFunction(this._config.error)) this._config.error(error);
|
|
406
|
-
else if (IS_PAPA_WORKER && this._config.error) {
|
|
407
|
-
global.postMessage({
|
|
408
|
-
workerId: Papa.WORKER_ID,
|
|
409
|
-
error: error,
|
|
410
|
-
finished: false
|
|
411
|
-
});
|
|
412
|
-
}
|
|
413
271
|
};
|
|
414
272
|
|
|
415
273
|
function replaceConfig(config) {
|
|
416
|
-
// Deep-copy the config so we can edit it
|
|
417
274
|
var configCopy = copy(config);
|
|
418
|
-
configCopy.chunkSize = parseInt(configCopy.chunkSize);
|
|
419
|
-
if (!config.step && !config.chunk) configCopy.chunkSize = null;
|
|
275
|
+
configCopy.chunkSize = parseInt(configCopy.chunkSize);
|
|
276
|
+
if (!config.step && !config.chunk) configCopy.chunkSize = null;
|
|
420
277
|
this._handle = new ParserHandle(configCopy);
|
|
421
278
|
this._handle.streamer = this;
|
|
422
|
-
this._config = configCopy;
|
|
279
|
+
this._config = configCopy;
|
|
423
280
|
}
|
|
424
281
|
}
|
|
282
|
+
|
|
425
283
|
function StringStreamer(config) {
|
|
426
284
|
config = config || {};
|
|
427
285
|
ChunkStreamer.call(this, config);
|
|
428
|
-
|
|
429
286
|
var remaining;
|
|
430
|
-
|
|
287
|
+
|
|
288
|
+
this.stream = function (s) {
|
|
431
289
|
remaining = s;
|
|
432
290
|
return this._nextChunk();
|
|
433
291
|
};
|
|
434
|
-
|
|
292
|
+
|
|
293
|
+
this._nextChunk = function () {
|
|
435
294
|
if (this._finished) return;
|
|
436
295
|
var size = this._config.chunkSize;
|
|
437
296
|
var chunk = size ? remaining.substr(0, size) : remaining;
|
|
@@ -440,26 +299,28 @@ function StringStreamer(config) {
|
|
|
440
299
|
return this.parseChunk(chunk);
|
|
441
300
|
};
|
|
442
301
|
}
|
|
302
|
+
|
|
443
303
|
StringStreamer.prototype = Object.create(StringStreamer.prototype);
|
|
444
304
|
StringStreamer.prototype.constructor = StringStreamer;
|
|
445
305
|
|
|
446
|
-
// Use one ParserHandle per entire CSV file or string
|
|
447
306
|
function ParserHandle(_config) {
|
|
448
|
-
// One goal is to minimize the use of regular expressions...
|
|
449
307
|
var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
|
|
450
308
|
var ISO_DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
|
|
451
|
-
|
|
452
309
|
var self = this;
|
|
453
|
-
var _stepCounter = 0;
|
|
454
|
-
var _rowCounter = 0;
|
|
455
|
-
|
|
456
|
-
var
|
|
457
|
-
|
|
458
|
-
var
|
|
459
|
-
|
|
460
|
-
var
|
|
310
|
+
var _stepCounter = 0;
|
|
311
|
+
var _rowCounter = 0;
|
|
312
|
+
|
|
313
|
+
var _input;
|
|
314
|
+
|
|
315
|
+
var _parser;
|
|
316
|
+
|
|
317
|
+
var _paused = false;
|
|
318
|
+
var _aborted = false;
|
|
319
|
+
|
|
320
|
+
var _delimiterError;
|
|
321
|
+
|
|
322
|
+
var _fields = [];
|
|
461
323
|
var _results = {
|
|
462
|
-
// The last results returned from the parser
|
|
463
324
|
data: [],
|
|
464
325
|
errors: [],
|
|
465
326
|
meta: {}
|
|
@@ -467,45 +328,27 @@ function ParserHandle(_config) {
|
|
|
467
328
|
|
|
468
329
|
if (isFunction(_config.step)) {
|
|
469
330
|
var userStep = _config.step;
|
|
470
|
-
_config.step = function(results) {
|
|
471
|
-
_results = results;
|
|
472
|
-
|
|
473
|
-
if (needsHeaderRow()) processResults();
|
|
474
|
-
// only call user's step function after header row
|
|
475
|
-
else {
|
|
476
|
-
processResults();
|
|
477
331
|
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
332
|
+
_config.step = function (results) {
|
|
333
|
+
_results = results;
|
|
334
|
+
if (needsHeaderRow()) processResults();else {
|
|
335
|
+
processResults();
|
|
336
|
+
if (!_results.data || _results.data.length === 0) return;
|
|
337
|
+
_stepCounter += results.data.length;
|
|
338
|
+
if (_config.preview && _stepCounter > _config.preview) _parser.abort();else userStep(_results, self);
|
|
339
|
+
}
|
|
485
340
|
};
|
|
486
341
|
}
|
|
487
342
|
|
|
488
|
-
|
|
489
|
-
* Parses input. Most users won't need, and shouldn't mess with, the baseIndex
|
|
490
|
-
* and ignoreLastRow parameters. They are used by streamers (wrapper functions)
|
|
491
|
-
* when an input comes in multiple chunks, like from a file.
|
|
492
|
-
*/
|
|
493
|
-
this.parse = function(input, baseIndex, ignoreLastRow) {
|
|
343
|
+
this.parse = function (input, baseIndex, ignoreLastRow) {
|
|
494
344
|
var quoteChar = _config.quoteChar || '"';
|
|
495
345
|
if (!_config.newline) _config.newline = guessLineEndings(input, quoteChar);
|
|
496
|
-
|
|
497
346
|
_delimiterError = false;
|
|
347
|
+
|
|
498
348
|
if (!_config.delimiter) {
|
|
499
|
-
var delimGuess = guessDelimiter(
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
_config.skipEmptyLines,
|
|
503
|
-
_config.comments,
|
|
504
|
-
_config.delimitersToGuess
|
|
505
|
-
);
|
|
506
|
-
if (delimGuess.successful) _config.delimiter = delimGuess.bestDelimiter;
|
|
507
|
-
else {
|
|
508
|
-
_delimiterError = true; // add error after parsing (otherwise it would be overwritten)
|
|
349
|
+
var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess);
|
|
350
|
+
if (delimGuess.successful) _config.delimiter = delimGuess.bestDelimiter;else {
|
|
351
|
+
_delimiterError = true;
|
|
509
352
|
_config.delimiter = Papa.DefaultDelimiter;
|
|
510
353
|
}
|
|
511
354
|
_results.meta.delimiter = _config.delimiter;
|
|
@@ -515,65 +358,68 @@ function ParserHandle(_config) {
|
|
|
515
358
|
}
|
|
516
359
|
|
|
517
360
|
var parserConfig = copy(_config);
|
|
518
|
-
if (_config.preview && _config.header) parserConfig.preview++;
|
|
519
|
-
|
|
361
|
+
if (_config.preview && _config.header) parserConfig.preview++;
|
|
520
362
|
_input = input;
|
|
521
363
|
_parser = new Parser(parserConfig);
|
|
522
364
|
_results = _parser.parse(_input, baseIndex, ignoreLastRow);
|
|
523
365
|
processResults();
|
|
524
|
-
return _paused ? {
|
|
366
|
+
return _paused ? {
|
|
367
|
+
meta: {
|
|
368
|
+
paused: true
|
|
369
|
+
}
|
|
370
|
+
} : _results || {
|
|
371
|
+
meta: {
|
|
372
|
+
paused: false
|
|
373
|
+
}
|
|
374
|
+
};
|
|
525
375
|
};
|
|
526
376
|
|
|
527
|
-
this.paused = function() {
|
|
377
|
+
this.paused = function () {
|
|
528
378
|
return _paused;
|
|
529
379
|
};
|
|
530
380
|
|
|
531
|
-
this.pause = function() {
|
|
381
|
+
this.pause = function () {
|
|
532
382
|
_paused = true;
|
|
383
|
+
|
|
533
384
|
_parser.abort();
|
|
385
|
+
|
|
534
386
|
_input = _input.substr(_parser.getCharIndex());
|
|
535
387
|
};
|
|
536
388
|
|
|
537
|
-
this.resume = function() {
|
|
389
|
+
this.resume = function () {
|
|
538
390
|
_paused = false;
|
|
539
391
|
self.streamer.parseChunk(_input, true);
|
|
540
392
|
};
|
|
541
393
|
|
|
542
|
-
this.aborted = function() {
|
|
394
|
+
this.aborted = function () {
|
|
543
395
|
return _aborted;
|
|
544
396
|
};
|
|
545
397
|
|
|
546
|
-
this.abort = function() {
|
|
398
|
+
this.abort = function () {
|
|
547
399
|
_aborted = true;
|
|
400
|
+
|
|
548
401
|
_parser.abort();
|
|
402
|
+
|
|
549
403
|
_results.meta.aborted = true;
|
|
550
404
|
if (isFunction(_config.complete)) _config.complete(_results);
|
|
551
405
|
_input = '';
|
|
552
406
|
};
|
|
553
407
|
|
|
554
408
|
function testEmptyLine(s) {
|
|
555
|
-
return _config.skipEmptyLines === 'greedy'
|
|
556
|
-
? s.join('').trim() === ''
|
|
557
|
-
: s.length === 1 && s[0].length === 0;
|
|
409
|
+
return _config.skipEmptyLines === 'greedy' ? s.join('').trim() === '' : s.length === 1 && s[0].length === 0;
|
|
558
410
|
}
|
|
559
411
|
|
|
560
412
|
function processResults() {
|
|
561
413
|
if (_results && _delimiterError) {
|
|
562
|
-
addError(
|
|
563
|
-
'Delimiter',
|
|
564
|
-
'UndetectableDelimiter',
|
|
565
|
-
"Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'"
|
|
566
|
-
);
|
|
414
|
+
addError('Delimiter', 'UndetectableDelimiter', "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'");
|
|
567
415
|
_delimiterError = false;
|
|
568
416
|
}
|
|
569
417
|
|
|
570
418
|
if (_config.skipEmptyLines) {
|
|
571
|
-
for (var i = 0; i < _results.data.length; i++)
|
|
572
|
-
if (testEmptyLine(_results.data[i])) _results.data.splice(i--, 1);
|
|
419
|
+
for (var i = 0; i < _results.data.length; i++) if (testEmptyLine(_results.data[i])) _results.data.splice(i--, 1);
|
|
573
420
|
}
|
|
574
421
|
|
|
575
422
|
if (needsHeaderRow()) fillHeaderFields();
|
|
576
|
-
|
|
577
423
|
return applyHeaderAndDynamicTypingAndTransformation();
|
|
578
424
|
}
|
|
579
425
|
|
|
@@ -591,50 +437,40 @@ function ParserHandle(_config) {
|
|
|
591
437
|
}
|
|
592
438
|
|
|
593
439
|
if (Array.isArray(_results.data[0])) {
|
|
594
|
-
for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
|
|
595
|
-
_results.data[i].forEach(addHeder);
|
|
440
|
+
for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) _results.data[i].forEach(addHeder);
|
|
596
441
|
|
|
597
442
|
_results.data.splice(0, 1);
|
|
598
|
-
}
|
|
599
|
-
// if _results.data[0] is not an array, we are in a step where _results.data is the row.
|
|
600
|
-
else _results.data.forEach(addHeder);
|
|
443
|
+
} else _results.data.forEach(addHeder);
|
|
601
444
|
}
|
|
602
445
|
|
|
603
446
|
function shouldApplyDynamicTyping(field) {
|
|
604
|
-
// Cache function values to avoid calling it for each row
|
|
605
447
|
if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
|
|
606
448
|
_config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
|
|
607
449
|
}
|
|
450
|
+
|
|
608
451
|
return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
|
|
609
452
|
}
|
|
610
453
|
|
|
611
454
|
function parseDynamic(field, value) {
|
|
612
455
|
if (shouldApplyDynamicTyping(field)) {
|
|
613
|
-
if (value === 'true' || value === 'TRUE') return true;
|
|
614
|
-
else if (value === 'false' || value === 'FALSE') return false;
|
|
615
|
-
else if (FLOAT.test(value)) return parseFloat(value);
|
|
616
|
-
else if (ISO_DATE.test(value)) return new Date(value);
|
|
617
|
-
else return value === '' ? null : value;
|
|
456
|
+
if (value === 'true' || value === 'TRUE') return true;else if (value === 'false' || value === 'FALSE') return false;else if (FLOAT.test(value)) return parseFloat(value);else if (ISO_DATE.test(value)) return new Date(value);else return value === '' ? null : value;
|
|
618
457
|
}
|
|
458
|
+
|
|
619
459
|
return value;
|
|
620
460
|
}
|
|
621
461
|
|
|
622
462
|
function applyHeaderAndDynamicTypingAndTransformation() {
|
|
623
|
-
if (!_results || !_results.data ||
|
|
624
|
-
return _results;
|
|
463
|
+
if (!_results || !_results.data || !_config.header && !_config.dynamicTyping && !_config.transform) return _results;
|
|
625
464
|
|
|
626
465
|
function processRow(rowSource, i) {
|
|
627
466
|
var row = _config.header ? {} : [];
|
|
628
|
-
|
|
629
467
|
var j;
|
|
468
|
+
|
|
630
469
|
for (j = 0; j < rowSource.length; j++) {
|
|
631
470
|
var field = j;
|
|
632
471
|
var value = rowSource[j];
|
|
633
|
-
|
|
634
472
|
if (_config.header) field = j >= _fields.length ? '__parsed_extra' : _fields[j];
|
|
635
|
-
|
|
636
473
|
if (_config.transform) value = _config.transform(value, field);
|
|
637
|
-
|
|
638
474
|
value = parseDynamic(field, value);
|
|
639
475
|
|
|
640
476
|
if (field === '__parsed_extra') {
|
|
@@ -644,49 +480,34 @@ function ParserHandle(_config) {
|
|
|
644
480
|
}
|
|
645
481
|
|
|
646
482
|
if (_config.header) {
|
|
647
|
-
if (j > _fields.length)
|
|
648
|
-
addError(
|
|
649
|
-
'FieldMismatch',
|
|
650
|
-
'TooManyFields',
|
|
651
|
-
'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j,
|
|
652
|
-
_rowCounter + i
|
|
653
|
-
);
|
|
654
|
-
else if (j < _fields.length)
|
|
655
|
-
addError(
|
|
656
|
-
'FieldMismatch',
|
|
657
|
-
'TooFewFields',
|
|
658
|
-
'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j,
|
|
659
|
-
_rowCounter + i
|
|
660
|
-
);
|
|
483
|
+
if (j > _fields.length) addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);else if (j < _fields.length) addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
|
|
661
484
|
}
|
|
662
485
|
|
|
663
486
|
return row;
|
|
664
487
|
}
|
|
665
488
|
|
|
666
489
|
var incrementBy = 1;
|
|
490
|
+
|
|
667
491
|
if (!_results.data[0] || Array.isArray(_results.data[0])) {
|
|
668
492
|
_results.data = _results.data.map(processRow);
|
|
669
493
|
incrementBy = _results.data.length;
|
|
670
494
|
} else _results.data = processRow(_results.data, 0);
|
|
671
495
|
|
|
672
496
|
if (_config.header && _results.meta) _results.meta.fields = _fields;
|
|
673
|
-
|
|
674
497
|
_rowCounter += incrementBy;
|
|
675
498
|
return _results;
|
|
676
499
|
}
|
|
677
500
|
|
|
678
501
|
function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
|
|
679
502
|
var bestDelim, bestDelta, fieldCountPrevRow;
|
|
680
|
-
|
|
681
503
|
delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
|
|
682
504
|
|
|
683
505
|
for (var i = 0; i < delimitersToGuess.length; i++) {
|
|
684
506
|
var delim = delimitersToGuess[i];
|
|
685
507
|
var delta = 0,
|
|
686
|
-
|
|
687
|
-
|
|
508
|
+
avgFieldCount = 0,
|
|
509
|
+
emptyLinesCount = 0;
|
|
688
510
|
fieldCountPrevRow = undefined;
|
|
689
|
-
|
|
690
511
|
var preview = new Parser({
|
|
691
512
|
comments: comments,
|
|
692
513
|
delimiter: delim,
|
|
@@ -699,6 +520,7 @@ function ParserHandle(_config) {
|
|
|
699
520
|
emptyLinesCount++;
|
|
700
521
|
continue;
|
|
701
522
|
}
|
|
523
|
+
|
|
702
524
|
var fieldCount = preview.data[j].length;
|
|
703
525
|
avgFieldCount += fieldCount;
|
|
704
526
|
|
|
@@ -720,7 +542,6 @@ function ParserHandle(_config) {
|
|
|
720
542
|
}
|
|
721
543
|
|
|
722
544
|
_config.delimiter = bestDelim;
|
|
723
|
-
|
|
724
545
|
return {
|
|
725
546
|
successful: !!bestDelim,
|
|
726
547
|
bestDelimiter: bestDelim
|
|
@@ -728,20 +549,15 @@ function ParserHandle(_config) {
|
|
|
728
549
|
}
|
|
729
550
|
|
|
730
551
|
function guessLineEndings(input, quoteChar) {
|
|
731
|
-
input = input.substr(0, 1024 * 1024);
|
|
732
|
-
// Replace all the text inside quotes
|
|
552
|
+
input = input.substr(0, 1024 * 1024);
|
|
733
553
|
var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
|
|
734
554
|
input = input.replace(re, '');
|
|
735
|
-
|
|
736
555
|
var r = input.split('\r');
|
|
737
|
-
|
|
738
556
|
var n = input.split('\n');
|
|
739
|
-
|
|
740
557
|
var nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
|
|
741
|
-
|
|
742
558
|
if (r.length === 1 || nAppearsFirst) return '\n';
|
|
743
|
-
|
|
744
559
|
var numWithN = 0;
|
|
560
|
+
|
|
745
561
|
for (var i = 0; i < r.length; i++) {
|
|
746
562
|
if (r[i][0] === '\n') numWithN++;
|
|
747
563
|
}
|
|
@@ -759,14 +575,11 @@ function ParserHandle(_config) {
|
|
|
759
575
|
}
|
|
760
576
|
}
|
|
761
577
|
|
|
762
|
-
/** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
|
|
763
578
|
function escapeRegExp(string) {
|
|
764
|
-
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
579
|
+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
765
580
|
}
|
|
766
581
|
|
|
767
|
-
/** The core parser implements speedy and correct CSV parsing */
|
|
768
582
|
function Parser(config) {
|
|
769
|
-
// Unpack the config object
|
|
770
583
|
config = config || {};
|
|
771
584
|
var delim = config.delimiter;
|
|
772
585
|
var newline = config.newline;
|
|
@@ -775,73 +588,61 @@ function Parser(config) {
|
|
|
775
588
|
var preview = config.preview;
|
|
776
589
|
var fastMode = config.fastMode;
|
|
777
590
|
var quoteChar;
|
|
778
|
-
|
|
591
|
+
|
|
779
592
|
if (config.quoteChar === undefined) {
|
|
780
593
|
quoteChar = '"';
|
|
781
594
|
} else {
|
|
782
595
|
quoteChar = config.quoteChar;
|
|
783
596
|
}
|
|
597
|
+
|
|
784
598
|
var escapeChar = quoteChar;
|
|
599
|
+
|
|
785
600
|
if (config.escapeChar !== undefined) {
|
|
786
601
|
escapeChar = config.escapeChar;
|
|
787
602
|
}
|
|
788
603
|
|
|
789
|
-
// Delimiter must be valid
|
|
790
604
|
if (typeof delim !== 'string' || Papa.BAD_DELIMITERS.indexOf(delim) > -1) delim = ',';
|
|
791
|
-
|
|
792
|
-
// Comment character must be valid
|
|
793
|
-
if (comments === delim) throw new Error('Comment character same as delimiter');
|
|
794
|
-
else if (comments === true) comments = '#';
|
|
795
|
-
else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1)
|
|
796
|
-
comments = false;
|
|
797
|
-
|
|
798
|
-
// Newline must be valid: \r, \n, or \r\n
|
|
605
|
+
if (comments === delim) throw new Error('Comment character same as delimiter');else if (comments === true) comments = '#';else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1) comments = false;
|
|
799
606
|
if (newline !== '\n' && newline !== '\r' && newline !== '\r\n') newline = '\n';
|
|
800
|
-
|
|
801
|
-
// We're gonna need these at the Parser scope
|
|
802
607
|
var cursor = 0;
|
|
803
608
|
var aborted = false;
|
|
804
609
|
|
|
805
|
-
this.parse = function(input, baseIndex, ignoreLastRow) {
|
|
806
|
-
// For some reason, in Chrome, this speeds things up (!?)
|
|
610
|
+
this.parse = function (input, baseIndex, ignoreLastRow) {
|
|
807
611
|
if (typeof input !== 'string') throw new Error('Input must be a string');
|
|
808
|
-
|
|
809
|
-
// We don't need to compute some of these every time parse() is called,
|
|
810
|
-
// but having them in a more local scope seems to perform better
|
|
811
612
|
var inputLen = input.length,
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
613
|
+
delimLen = delim.length,
|
|
614
|
+
newlineLen = newline.length,
|
|
615
|
+
commentsLen = comments.length;
|
|
815
616
|
var stepIsFunction = isFunction(step);
|
|
816
|
-
|
|
817
|
-
// Establish starting state
|
|
818
617
|
cursor = 0;
|
|
819
618
|
var data = [],
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
619
|
+
errors = [],
|
|
620
|
+
row = [],
|
|
621
|
+
lastCursor = 0;
|
|
824
622
|
if (!input) return returnable();
|
|
825
623
|
|
|
826
|
-
if (fastMode ||
|
|
624
|
+
if (fastMode || fastMode !== false && input.indexOf(quoteChar) === -1) {
|
|
827
625
|
var rows = input.split(newline);
|
|
626
|
+
|
|
828
627
|
for (var i = 0; i < rows.length; i++) {
|
|
829
628
|
row = rows[i];
|
|
830
629
|
cursor += row.length;
|
|
831
|
-
if (i !== rows.length - 1) cursor += newline.length;
|
|
832
|
-
else if (ignoreLastRow) return returnable();
|
|
630
|
+
if (i !== rows.length - 1) cursor += newline.length;else if (ignoreLastRow) return returnable();
|
|
833
631
|
if (comments && row.substr(0, commentsLen) === comments) continue;
|
|
632
|
+
|
|
834
633
|
if (stepIsFunction) {
|
|
835
634
|
data = [];
|
|
836
635
|
pushRow(row.split(delim));
|
|
837
636
|
doStep();
|
|
838
637
|
if (aborted) return returnable();
|
|
839
638
|
} else pushRow(row.split(delim));
|
|
639
|
+
|
|
840
640
|
if (preview && i >= preview) {
|
|
841
641
|
data = data.slice(0, preview);
|
|
842
642
|
return returnable(true);
|
|
843
643
|
}
|
|
844
644
|
}
|
|
645
|
+
|
|
845
646
|
return returnable();
|
|
846
647
|
}
|
|
847
648
|
|
|
@@ -850,62 +651,45 @@ function Parser(config) {
|
|
|
850
651
|
var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
|
|
851
652
|
var quoteSearch;
|
|
852
653
|
|
|
853
|
-
// Parser loop
|
|
854
654
|
for (;;) {
|
|
855
|
-
// Field has opening quote
|
|
856
655
|
if (input[cursor] === quoteChar) {
|
|
857
|
-
// Start our search for the closing quote where the cursor is
|
|
858
656
|
quoteSearch = cursor;
|
|
859
|
-
|
|
860
|
-
// Skip the opening quote
|
|
861
657
|
cursor++;
|
|
862
658
|
|
|
863
659
|
for (;;) {
|
|
864
|
-
// Find closing quote
|
|
865
660
|
quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
|
|
866
661
|
|
|
867
|
-
//No other quotes are found - no other delimiters
|
|
868
662
|
if (quoteSearch === -1) {
|
|
869
663
|
if (!ignoreLastRow) {
|
|
870
|
-
// No closing quote... what a pity
|
|
871
664
|
errors.push({
|
|
872
665
|
type: 'Quotes',
|
|
873
666
|
code: 'MissingQuotes',
|
|
874
667
|
message: 'Quoted field unterminated',
|
|
875
|
-
row: data.length,
|
|
668
|
+
row: data.length,
|
|
876
669
|
index: cursor
|
|
877
670
|
});
|
|
878
671
|
}
|
|
672
|
+
|
|
879
673
|
return finish();
|
|
880
674
|
}
|
|
881
675
|
|
|
882
|
-
// Closing quote at EOF
|
|
883
676
|
if (quoteSearch === inputLen - 1) {
|
|
884
677
|
var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
|
|
885
678
|
return finish(value);
|
|
886
679
|
}
|
|
887
680
|
|
|
888
|
-
// If this quote is escaped, it's part of the data; skip it
|
|
889
|
-
// If the quote character is the escape character, then check if the next character is the escape character
|
|
890
681
|
if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar) {
|
|
891
682
|
quoteSearch++;
|
|
892
683
|
continue;
|
|
893
684
|
}
|
|
894
685
|
|
|
895
|
-
|
|
896
|
-
if (
|
|
897
|
-
quoteChar !== escapeChar &&
|
|
898
|
-
quoteSearch !== 0 &&
|
|
899
|
-
input[quoteSearch - 1] === escapeChar
|
|
900
|
-
) {
|
|
686
|
+
if (quoteChar !== escapeChar && quoteSearch !== 0 && input[quoteSearch - 1] === escapeChar) {
|
|
901
687
|
continue;
|
|
902
688
|
}
|
|
903
689
|
|
|
904
|
-
// Check up to nextDelim or nextNewline, whichever is closest
|
|
905
690
|
var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
|
|
906
691
|
var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
|
|
907
692
|
|
|
908
|
-
// Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
|
|
909
693
|
if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) {
|
|
910
694
|
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
|
|
911
695
|
cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
|
|
@@ -918,19 +702,15 @@ function Parser(config) {
|
|
|
918
702
|
}
|
|
919
703
|
|
|
920
704
|
if (preview && data.length >= preview) return returnable(true);
|
|
921
|
-
|
|
922
705
|
break;
|
|
923
706
|
}
|
|
924
707
|
|
|
925
708
|
var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
|
|
926
709
|
|
|
927
|
-
|
|
928
|
-
if (
|
|
929
|
-
input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline
|
|
930
|
-
) {
|
|
710
|
+
if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline) {
|
|
931
711
|
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
|
|
932
712
|
saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
|
|
933
|
-
nextDelim = input.indexOf(delim, cursor);
|
|
713
|
+
nextDelim = input.indexOf(delim, cursor);
|
|
934
714
|
|
|
935
715
|
if (stepIsFunction) {
|
|
936
716
|
doStep();
|
|
@@ -938,19 +718,16 @@ function Parser(config) {
|
|
|
938
718
|
}
|
|
939
719
|
|
|
940
720
|
if (preview && data.length >= preview) return returnable(true);
|
|
941
|
-
|
|
942
721
|
break;
|
|
943
722
|
}
|
|
944
723
|
|
|
945
|
-
// Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
|
|
946
724
|
errors.push({
|
|
947
725
|
type: 'Quotes',
|
|
948
726
|
code: 'InvalidQuotes',
|
|
949
727
|
message: 'Trailing quote on quoted field is malformed',
|
|
950
|
-
row: data.length,
|
|
728
|
+
row: data.length,
|
|
951
729
|
index: cursor
|
|
952
730
|
});
|
|
953
|
-
|
|
954
731
|
quoteSearch++;
|
|
955
732
|
continue;
|
|
956
733
|
}
|
|
@@ -964,18 +741,14 @@ function Parser(config) {
|
|
|
964
741
|
continue;
|
|
965
742
|
}
|
|
966
743
|
|
|
967
|
-
// Comment found at start of new line
|
|
968
744
|
if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) {
|
|
969
|
-
if (nextNewline === -1)
|
|
970
|
-
// Comment ends at EOF
|
|
971
|
-
return returnable();
|
|
745
|
+
if (nextNewline === -1) return returnable();
|
|
972
746
|
cursor = nextNewline + newlineLen;
|
|
973
747
|
nextNewline = input.indexOf(newline, cursor);
|
|
974
748
|
nextDelim = input.indexOf(delim, cursor);
|
|
975
749
|
continue;
|
|
976
750
|
}
|
|
977
751
|
|
|
978
|
-
// Next delimiter comes before next newline, so we've reached end of field
|
|
979
752
|
if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) {
|
|
980
753
|
row.push(input.substring(cursor, nextDelim));
|
|
981
754
|
cursor = nextDelim + delimLen;
|
|
@@ -983,7 +756,6 @@ function Parser(config) {
|
|
|
983
756
|
continue;
|
|
984
757
|
}
|
|
985
758
|
|
|
986
|
-
// End of row
|
|
987
759
|
if (nextNewline !== -1) {
|
|
988
760
|
row.push(input.substring(cursor, nextNewline));
|
|
989
761
|
saveRow(nextNewline + newlineLen);
|
|
@@ -994,7 +766,6 @@ function Parser(config) {
|
|
|
994
766
|
}
|
|
995
767
|
|
|
996
768
|
if (preview && data.length >= preview) return returnable(true);
|
|
997
|
-
|
|
998
769
|
continue;
|
|
999
770
|
}
|
|
1000
771
|
|
|
@@ -1008,41 +779,30 @@ function Parser(config) {
|
|
|
1008
779
|
lastCursor = cursor;
|
|
1009
780
|
}
|
|
1010
781
|
|
|
1011
|
-
/**
|
|
1012
|
-
* checks if there are extra spaces after closing quote and given index without any text
|
|
1013
|
-
* if Yes, returns the number of spaces
|
|
1014
|
-
*/
|
|
1015
782
|
function extraSpaces(index) {
|
|
1016
783
|
var spaceLength = 0;
|
|
784
|
+
|
|
1017
785
|
if (index !== -1) {
|
|
1018
786
|
var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index);
|
|
787
|
+
|
|
1019
788
|
if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') {
|
|
1020
789
|
spaceLength = textBetweenClosingQuoteAndIndex.length;
|
|
1021
790
|
}
|
|
1022
791
|
}
|
|
792
|
+
|
|
1023
793
|
return spaceLength;
|
|
1024
794
|
}
|
|
1025
795
|
|
|
1026
|
-
/**
|
|
1027
|
-
* Appends the remaining input from cursor to the end into
|
|
1028
|
-
* row, saves the row, calls step, and returns the results.
|
|
1029
|
-
*/
|
|
1030
796
|
function finish(value) {
|
|
1031
797
|
if (ignoreLastRow) return returnable();
|
|
1032
798
|
if (typeof value === 'undefined') value = input.substr(cursor);
|
|
1033
799
|
row.push(value);
|
|
1034
|
-
cursor = inputLen;
|
|
800
|
+
cursor = inputLen;
|
|
1035
801
|
pushRow(row);
|
|
1036
802
|
if (stepIsFunction) doStep();
|
|
1037
803
|
return returnable();
|
|
1038
804
|
}
|
|
1039
805
|
|
|
1040
|
-
/**
|
|
1041
|
-
* Appends the current row to the results. It sets the cursor
|
|
1042
|
-
* to newCursor and finds the nextNewline. The caller should
|
|
1043
|
-
* take care to execute user's step function and check for
|
|
1044
|
-
* preview and end parsing if necessary.
|
|
1045
|
-
*/
|
|
1046
806
|
function saveRow(newCursor) {
|
|
1047
807
|
cursor = newCursor;
|
|
1048
808
|
pushRow(row);
|
|
@@ -1050,7 +810,6 @@ function Parser(config) {
|
|
|
1050
810
|
nextNewline = input.indexOf(newline, cursor);
|
|
1051
811
|
}
|
|
1052
812
|
|
|
1053
|
-
/** Returns an object with the results, errors, and meta. */
|
|
1054
813
|
function returnable(stopped, step) {
|
|
1055
814
|
var isStep = step || false;
|
|
1056
815
|
return {
|
|
@@ -1066,7 +825,6 @@ function Parser(config) {
|
|
|
1066
825
|
};
|
|
1067
826
|
}
|
|
1068
827
|
|
|
1069
|
-
/** Executes the user's step function and resets data & errors. */
|
|
1070
828
|
function doStep() {
|
|
1071
829
|
step(returnable(undefined, true));
|
|
1072
830
|
data = [];
|
|
@@ -1074,13 +832,11 @@ function Parser(config) {
|
|
|
1074
832
|
}
|
|
1075
833
|
};
|
|
1076
834
|
|
|
1077
|
-
|
|
1078
|
-
this.abort = function() {
|
|
835
|
+
this.abort = function () {
|
|
1079
836
|
aborted = true;
|
|
1080
837
|
};
|
|
1081
838
|
|
|
1082
|
-
|
|
1083
|
-
this.getCharIndex = function() {
|
|
839
|
+
this.getCharIndex = function () {
|
|
1084
840
|
return cursor;
|
|
1085
841
|
};
|
|
1086
842
|
}
|
|
@@ -1089,14 +845,16 @@ function notImplemented() {
|
|
|
1089
845
|
throw new Error('Not implemented.');
|
|
1090
846
|
}
|
|
1091
847
|
|
|
1092
|
-
/** Makes a deep copy of an array or object (mostly) */
|
|
1093
848
|
function copy(obj) {
|
|
1094
849
|
if (typeof obj !== 'object' || obj === null) return obj;
|
|
1095
850
|
var cpy = Array.isArray(obj) ? [] : {};
|
|
851
|
+
|
|
1096
852
|
for (var key in obj) cpy[key] = copy(obj[key]);
|
|
853
|
+
|
|
1097
854
|
return cpy;
|
|
1098
855
|
}
|
|
1099
856
|
|
|
1100
857
|
function isFunction(func) {
|
|
1101
858
|
return typeof func === 'function';
|
|
1102
859
|
}
|
|
860
|
+
//# sourceMappingURL=papaparse.js.map
|