@loaders.gl/csv 3.1.0-alpha.2 → 3.1.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bundle.d.ts +2 -0
- package/dist/bundle.d.ts.map +1 -0
- package/dist/bundle.js +1501 -5
- package/dist/csv-loader.d.ts +34 -0
- package/dist/csv-loader.d.ts.map +1 -0
- package/dist/csv-loader.js +247 -220
- package/dist/csv-writer.d.ts +1 -0
- package/dist/csv-writer.d.ts.map +1 -0
- package/dist/csv-writer.js +6 -2
- package/dist/es5/bundle.js +7 -0
- package/dist/es5/bundle.js.map +1 -0
- package/dist/es5/csv-loader.js +259 -0
- package/dist/es5/csv-loader.js.map +1 -0
- package/dist/es5/csv-writer.js +2 -0
- package/dist/{csv-writer.js.map → es5/csv-writer.js.map} +0 -0
- package/dist/es5/index.js +14 -0
- package/dist/es5/index.js.map +1 -0
- package/dist/es5/papaparse/async-iterator-streamer.js +47 -0
- package/dist/es5/papaparse/async-iterator-streamer.js.map +1 -0
- package/{src/libs → dist/es5/papaparse}/papaparse.js +170 -404
- package/dist/es5/papaparse/papaparse.js.map +1 -0
- package/dist/esm/bundle.js +5 -0
- package/dist/esm/bundle.js.map +1 -0
- package/dist/esm/csv-loader.js +240 -0
- package/dist/esm/csv-loader.js.map +1 -0
- package/dist/esm/csv-writer.js +2 -0
- package/dist/esm/csv-writer.js.map +1 -0
- package/dist/esm/index.js +2 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/{lib → esm/papaparse}/async-iterator-streamer.js +1 -1
- package/dist/esm/papaparse/async-iterator-streamer.js.map +1 -0
- package/dist/esm/papaparse/papaparse.js +860 -0
- package/dist/esm/papaparse/papaparse.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +5 -2
- package/dist/papaparse/async-iterator-streamer.d.ts +6 -0
- package/dist/papaparse/async-iterator-streamer.d.ts.map +1 -0
- package/dist/papaparse/async-iterator-streamer.js +60 -32
- package/dist/papaparse/papaparse.d.ts +30 -0
- package/dist/papaparse/papaparse.d.ts.map +1 -0
- package/dist/papaparse/papaparse.js +935 -0
- package/package.json +8 -8
- package/src/bundle.ts +2 -3
- package/src/csv-loader.ts +4 -4
- package/src/{lib → papaparse}/async-iterator-streamer.ts +2 -2
- package/{dist/libs/papaparse.js → src/papaparse/papaparse.ts} +48 -73
- package/dist/bundle.js.map +0 -1
- package/dist/csv-loader.js.map +0 -1
- package/dist/dist.min.js +0 -9
- package/dist/dist.min.js.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/lib/async-iterator-streamer.js.map +0 -1
- package/dist/papaparse/async-iterator-streamer.js.map +0 -1
- package/src/papaparse/async-iterator-streamer.js +0 -71
|
@@ -1,231 +1,129 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.default = void 0;
|
|
7
|
+
|
|
3
8
|
/* @license
|
|
4
9
|
Papa Parse
|
|
5
10
|
v5.0.0-beta.0
|
|
6
11
|
https://github.com/mholt/PapaParse
|
|
7
12
|
License: MIT
|
|
8
13
|
*/
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
// When running tests none of the above have been defined
|
|
34
|
-
return {};
|
|
35
|
-
})();
|
|
36
|
-
|
|
37
|
-
var IS_PAPA_WORKER = false;
|
|
38
|
-
|
|
39
|
-
var Papa = {};
|
|
40
|
-
module.exports = Papa;
|
|
41
|
-
Papa.parse = CsvToJson;
|
|
42
|
-
Papa.unparse = JsonToCsv;
|
|
43
|
-
|
|
44
|
-
Papa.RECORD_SEP = String.fromCharCode(30);
|
|
45
|
-
Papa.UNIT_SEP = String.fromCharCode(31);
|
|
46
|
-
Papa.BYTE_ORDER_MARK = '\ufeff';
|
|
47
|
-
Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK];
|
|
48
|
-
Papa.WORKERS_SUPPORTED = false; // !IS_WORKER && !!global.Worker;
|
|
49
|
-
Papa.NODE_STREAM_INPUT = 1;
|
|
50
|
-
|
|
51
|
-
// Configurable chunk sizes for local and remote files, respectively
|
|
52
|
-
Papa.LocalChunkSize = 1024 * 1024 * 10; // 10 MB
|
|
53
|
-
Papa.RemoteChunkSize = 1024 * 1024 * 5; // 5 MB
|
|
54
|
-
Papa.DefaultDelimiter = ','; // Used if not specified and detection fails
|
|
55
|
-
|
|
56
|
-
// Exposed for testing and development only
|
|
57
|
-
Papa.Parser = Parser;
|
|
58
|
-
Papa.ParserHandle = ParserHandle;
|
|
59
|
-
|
|
60
|
-
// BEGIN FORK
|
|
61
|
-
Papa.ChunkStreamer = ChunkStreamer;
|
|
62
|
-
Papa.StringStreamer = StringStreamer;
|
|
63
|
-
/*
|
|
64
|
-
Papa.NetworkStreamer = NetworkStreamer;
|
|
65
|
-
Papa.FileStreamer = FileStreamer;
|
|
66
|
-
Papa.ReadableStreamStreamer = ReadableStreamStreamer;
|
|
67
|
-
if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
|
|
68
|
-
Papa.DuplexStreamStreamer = DuplexStreamStreamer;
|
|
69
|
-
}
|
|
70
|
-
*/
|
|
71
|
-
// END FORK
|
|
72
|
-
|
|
73
|
-
// BEGIN FORK
|
|
74
|
-
// Adds an argument to papa.parse
|
|
75
|
-
// function CsvToJson(_input, _config)
|
|
76
|
-
function CsvToJson(
|
|
77
|
-
_input,
|
|
78
|
-
_config,
|
|
79
|
-
UserDefinedStreamer // BEGIN FORK
|
|
80
|
-
) {
|
|
14
|
+
const BYTE_ORDER_MARK = '\ufeff';
|
|
15
|
+
const Papa = {
|
|
16
|
+
parse: CsvToJson,
|
|
17
|
+
unparse: JsonToCsv,
|
|
18
|
+
RECORD_SEP: String.fromCharCode(30),
|
|
19
|
+
UNIT_SEP: String.fromCharCode(31),
|
|
20
|
+
BYTE_ORDER_MARK,
|
|
21
|
+
BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
|
|
22
|
+
WORKERS_SUPPORTED: false,
|
|
23
|
+
NODE_STREAM_INPUT: 1,
|
|
24
|
+
LocalChunkSize: 1024 * 1024 * 10,
|
|
25
|
+
RemoteChunkSize: 1024 * 1024 * 5,
|
|
26
|
+
DefaultDelimiter: ',',
|
|
27
|
+
Parser: Parser,
|
|
28
|
+
ParserHandle: ParserHandle,
|
|
29
|
+
ChunkStreamer: ChunkStreamer,
|
|
30
|
+
StringStreamer: StringStreamer
|
|
31
|
+
};
|
|
32
|
+
var _default = Papa;
|
|
33
|
+
exports.default = _default;
|
|
34
|
+
|
|
35
|
+
function CsvToJson(_input, _config, UserDefinedStreamer) {
|
|
81
36
|
_config = _config || {};
|
|
82
37
|
var dynamicTyping = _config.dynamicTyping || false;
|
|
38
|
+
|
|
83
39
|
if (isFunction(dynamicTyping)) {
|
|
84
40
|
_config.dynamicTypingFunction = dynamicTyping;
|
|
85
|
-
// Will be filled on first row call
|
|
86
41
|
dynamicTyping = {};
|
|
87
42
|
}
|
|
88
|
-
_config.dynamicTyping = dynamicTyping;
|
|
89
43
|
|
|
44
|
+
_config.dynamicTyping = dynamicTyping;
|
|
90
45
|
_config.transform = isFunction(_config.transform) ? _config.transform : false;
|
|
91
46
|
|
|
92
47
|
if (_config.worker && Papa.WORKERS_SUPPORTED) {
|
|
93
48
|
var w = newWorker();
|
|
94
|
-
|
|
95
49
|
w.userStep = _config.step;
|
|
96
50
|
w.userChunk = _config.chunk;
|
|
97
51
|
w.userComplete = _config.complete;
|
|
98
52
|
w.userError = _config.error;
|
|
99
|
-
|
|
100
53
|
_config.step = isFunction(_config.step);
|
|
101
54
|
_config.chunk = isFunction(_config.chunk);
|
|
102
55
|
_config.complete = isFunction(_config.complete);
|
|
103
56
|
_config.error = isFunction(_config.error);
|
|
104
|
-
delete _config.worker;
|
|
105
|
-
|
|
57
|
+
delete _config.worker;
|
|
106
58
|
w.postMessage({
|
|
107
59
|
input: _input,
|
|
108
60
|
config: _config,
|
|
109
61
|
workerId: w.id
|
|
110
62
|
});
|
|
111
|
-
|
|
112
63
|
return;
|
|
113
64
|
}
|
|
114
65
|
|
|
115
66
|
var streamer = null;
|
|
116
|
-
|
|
117
|
-
if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined') {
|
|
118
|
-
// create a node Duplex stream for use
|
|
119
|
-
// with .pipe
|
|
120
|
-
streamer = new DuplexStreamStreamer(_config);
|
|
121
|
-
return streamer.getStream();
|
|
122
|
-
} else
|
|
123
|
-
*/
|
|
67
|
+
|
|
124
68
|
if (typeof _input === 'string') {
|
|
125
|
-
// if (_config.download) streamer = new NetworkStreamer(_config);
|
|
126
|
-
// else
|
|
127
69
|
streamer = new StringStreamer(_config);
|
|
128
70
|
}
|
|
129
|
-
|
|
130
|
-
else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on)) {
|
|
131
|
-
streamer = new ReadableStreamStreamer(_config);
|
|
132
|
-
} else if ((global.File && _input instanceof File) || _input instanceof Object)
|
|
133
|
-
// ...Safari. (see issue #106)
|
|
134
|
-
streamer = new FileStreamer(_config);
|
|
135
|
-
*/
|
|
136
|
-
|
|
137
|
-
// BEGIN FORK
|
|
71
|
+
|
|
138
72
|
if (!streamer) {
|
|
139
73
|
streamer = new UserDefinedStreamer(_config);
|
|
140
74
|
}
|
|
141
|
-
// END FORK
|
|
142
75
|
|
|
143
76
|
return streamer.stream(_input);
|
|
144
77
|
}
|
|
145
78
|
|
|
146
79
|
function JsonToCsv(_input, _config) {
|
|
147
|
-
// Default configuration
|
|
148
|
-
|
|
149
|
-
/** whether to surround every datum with quotes */
|
|
150
80
|
var _quotes = false;
|
|
151
|
-
|
|
152
|
-
/** whether to write headers */
|
|
153
81
|
var _writeHeader = true;
|
|
154
|
-
|
|
155
|
-
/** delimiting character(s) */
|
|
156
82
|
var _delimiter = ',';
|
|
157
|
-
|
|
158
|
-
/** newline character(s) */
|
|
159
83
|
var _newline = '\r\n';
|
|
160
|
-
|
|
161
|
-
/** quote character */
|
|
162
84
|
var _quoteChar = '"';
|
|
163
85
|
|
|
164
|
-
/** escaped quote character, either "" or <config.escapeChar>" */
|
|
165
86
|
var _escapedQuote = _quoteChar + _quoteChar;
|
|
166
87
|
|
|
167
|
-
/** whether to skip empty lines */
|
|
168
88
|
var _skipEmptyLines = false;
|
|
169
|
-
|
|
170
|
-
/** the columns (keys) we expect when we unparse objects */
|
|
171
89
|
var _columns = null;
|
|
172
|
-
|
|
173
90
|
unpackConfig();
|
|
174
|
-
|
|
175
91
|
var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g');
|
|
176
|
-
|
|
177
92
|
if (typeof _input === 'string') _input = JSON.parse(_input);
|
|
178
93
|
|
|
179
94
|
if (Array.isArray(_input)) {
|
|
180
|
-
if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines);
|
|
181
|
-
else if (typeof _input[0] === 'object')
|
|
182
|
-
return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines);
|
|
95
|
+
if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines);else if (typeof _input[0] === 'object') return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines);
|
|
183
96
|
} else if (typeof _input === 'object') {
|
|
184
97
|
if (typeof _input.data === 'string') _input.data = JSON.parse(_input.data);
|
|
185
98
|
|
|
186
99
|
if (Array.isArray(_input.data)) {
|
|
187
100
|
if (!_input.fields) _input.fields = _input.meta && _input.meta.fields;
|
|
188
|
-
|
|
189
|
-
if (!_input.
|
|
190
|
-
_input.fields = Array.isArray(_input.data[0]) ? _input.fields : objectKeys(_input.data[0]);
|
|
191
|
-
|
|
192
|
-
if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object')
|
|
193
|
-
_input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
|
|
101
|
+
if (!_input.fields) _input.fields = Array.isArray(_input.data[0]) ? _input.fields : objectKeys(_input.data[0]);
|
|
102
|
+
if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object') _input.data = [_input.data];
|
|
194
103
|
}
|
|
195
104
|
|
|
196
105
|
return serialize(_input.fields || [], _input.data || [], _skipEmptyLines);
|
|
197
106
|
}
|
|
198
107
|
|
|
199
|
-
// Default (any valid paths should return before this)
|
|
200
108
|
throw new Error('Unable to serialize unrecognized input');
|
|
201
109
|
|
|
202
110
|
function unpackConfig() {
|
|
203
111
|
if (typeof _config !== 'object') return;
|
|
204
112
|
|
|
205
|
-
if (
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
return _config.delimiter.indexOf(value) !== -1;
|
|
209
|
-
}).length
|
|
210
|
-
) {
|
|
113
|
+
if (typeof _config.delimiter === 'string' && !Papa.BAD_DELIMITERS.filter(function (value) {
|
|
114
|
+
return _config.delimiter.indexOf(value) !== -1;
|
|
115
|
+
}).length) {
|
|
211
116
|
_delimiter = _config.delimiter;
|
|
212
117
|
}
|
|
213
118
|
|
|
214
|
-
if (typeof _config.quotes === 'boolean' || Array.isArray(_config.quotes))
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string')
|
|
218
|
-
_skipEmptyLines = _config.skipEmptyLines;
|
|
219
|
-
|
|
119
|
+
if (typeof _config.quotes === 'boolean' || Array.isArray(_config.quotes)) _quotes = _config.quotes;
|
|
120
|
+
if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string') _skipEmptyLines = _config.skipEmptyLines;
|
|
220
121
|
if (typeof _config.newline === 'string') _newline = _config.newline;
|
|
221
|
-
|
|
222
122
|
if (typeof _config.quoteChar === 'string') _quoteChar = _config.quoteChar;
|
|
223
|
-
|
|
224
123
|
if (typeof _config.header === 'boolean') _writeHeader = _config.header;
|
|
225
124
|
|
|
226
125
|
if (Array.isArray(_config.columns)) {
|
|
227
126
|
if (_config.columns.length === 0) throw new Error('Option columns is empty');
|
|
228
|
-
|
|
229
127
|
_columns = _config.columns;
|
|
230
128
|
}
|
|
231
129
|
|
|
@@ -234,93 +132,82 @@ function JsonToCsv(_input, _config) {
|
|
|
234
132
|
}
|
|
235
133
|
}
|
|
236
134
|
|
|
237
|
-
/** Turns an object's keys into an array */
|
|
238
135
|
function objectKeys(obj) {
|
|
239
136
|
if (typeof obj !== 'object') return [];
|
|
240
137
|
var keys = [];
|
|
138
|
+
|
|
241
139
|
for (var key in obj) keys.push(key);
|
|
140
|
+
|
|
242
141
|
return keys;
|
|
243
142
|
}
|
|
244
143
|
|
|
245
|
-
/** The double for loop that iterates the data and writes out a CSV string including header row */
|
|
246
144
|
function serialize(fields, data, skipEmptyLines) {
|
|
247
145
|
var csv = '';
|
|
248
|
-
|
|
249
146
|
if (typeof fields === 'string') fields = JSON.parse(fields);
|
|
250
147
|
if (typeof data === 'string') data = JSON.parse(data);
|
|
251
|
-
|
|
252
148
|
var hasHeader = Array.isArray(fields) && fields.length > 0;
|
|
253
149
|
var dataKeyedByField = !Array.isArray(data[0]);
|
|
254
150
|
|
|
255
|
-
// If there a header row, write it first
|
|
256
151
|
if (hasHeader && _writeHeader) {
|
|
257
152
|
for (var i = 0; i < fields.length; i++) {
|
|
258
153
|
if (i > 0) csv += _delimiter;
|
|
259
154
|
csv += safe(fields[i], i);
|
|
260
155
|
}
|
|
156
|
+
|
|
261
157
|
if (data.length > 0) csv += _newline;
|
|
262
158
|
}
|
|
263
159
|
|
|
264
|
-
// Then write out the data
|
|
265
160
|
for (var row = 0; row < data.length; row++) {
|
|
266
161
|
var maxCol = hasHeader ? fields.length : data[row].length;
|
|
267
|
-
|
|
268
162
|
var emptyLine = false;
|
|
269
163
|
var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0;
|
|
164
|
+
|
|
270
165
|
if (skipEmptyLines && !hasHeader) {
|
|
271
|
-
emptyLine =
|
|
272
|
-
skipEmptyLines === 'greedy'
|
|
273
|
-
? data[row].join('').trim() === ''
|
|
274
|
-
: data[row].length === 1 && data[row][0].length === 0;
|
|
166
|
+
emptyLine = skipEmptyLines === 'greedy' ? data[row].join('').trim() === '' : data[row].length === 1 && data[row][0].length === 0;
|
|
275
167
|
}
|
|
168
|
+
|
|
276
169
|
if (skipEmptyLines === 'greedy' && hasHeader) {
|
|
277
170
|
var line = [];
|
|
171
|
+
|
|
278
172
|
for (var c = 0; c < maxCol; c++) {
|
|
279
173
|
var cx = dataKeyedByField ? fields[c] : c;
|
|
280
174
|
line.push(data[row][cx]);
|
|
281
175
|
}
|
|
176
|
+
|
|
282
177
|
emptyLine = line.join('').trim() === '';
|
|
283
178
|
}
|
|
179
|
+
|
|
284
180
|
if (!emptyLine) {
|
|
285
181
|
for (var col = 0; col < maxCol; col++) {
|
|
286
182
|
if (col > 0 && !nullLine) csv += _delimiter;
|
|
287
183
|
var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
|
|
288
184
|
csv += safe(data[row][colIdx], col);
|
|
289
185
|
}
|
|
290
|
-
|
|
186
|
+
|
|
187
|
+
if (row < data.length - 1 && (!skipEmptyLines || maxCol > 0 && !nullLine)) {
|
|
291
188
|
csv += _newline;
|
|
292
189
|
}
|
|
293
190
|
}
|
|
294
191
|
}
|
|
192
|
+
|
|
295
193
|
return csv;
|
|
296
194
|
}
|
|
297
195
|
|
|
298
|
-
/** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
|
|
299
196
|
function safe(str, col) {
|
|
300
197
|
if (typeof str === 'undefined' || str === null) return '';
|
|
301
|
-
|
|
302
198
|
if (str.constructor === Date) return JSON.stringify(str).slice(1, 25);
|
|
303
|
-
|
|
304
199
|
str = str.toString().replace(quoteCharRegex, _escapedQuote);
|
|
305
|
-
|
|
306
|
-
var needsQuotes =
|
|
307
|
-
(typeof _quotes === 'boolean' && _quotes) ||
|
|
308
|
-
(Array.isArray(_quotes) && _quotes[col]) ||
|
|
309
|
-
hasAny(str, Papa.BAD_DELIMITERS) ||
|
|
310
|
-
str.indexOf(_delimiter) > -1 ||
|
|
311
|
-
str.charAt(0) === ' ' ||
|
|
312
|
-
str.charAt(str.length - 1) === ' ';
|
|
313
|
-
|
|
200
|
+
var needsQuotes = typeof _quotes === 'boolean' && _quotes || Array.isArray(_quotes) && _quotes[col] || hasAny(str, Papa.BAD_DELIMITERS) || str.indexOf(_delimiter) > -1 || str.charAt(0) === ' ' || str.charAt(str.length - 1) === ' ';
|
|
314
201
|
return needsQuotes ? _quoteChar + str + _quoteChar : str;
|
|
315
202
|
}
|
|
316
203
|
|
|
317
204
|
function hasAny(str, substrings) {
|
|
318
205
|
for (var i = 0; i < substrings.length; i++) if (str.indexOf(substrings[i]) > -1) return true;
|
|
206
|
+
|
|
319
207
|
return false;
|
|
320
208
|
}
|
|
321
209
|
}
|
|
322
210
|
|
|
323
|
-
/** ChunkStreamer is the base prototype for various streamer implementations. */
|
|
324
211
|
function ChunkStreamer(config) {
|
|
325
212
|
this._handle = null;
|
|
326
213
|
this._finished = false;
|
|
@@ -339,22 +226,20 @@ function ChunkStreamer(config) {
|
|
|
339
226
|
};
|
|
340
227
|
replaceConfig.call(this, config);
|
|
341
228
|
|
|
342
|
-
this.parseChunk = function(chunk, isFakeChunk) {
|
|
343
|
-
// First chunk pre-processing
|
|
229
|
+
this.parseChunk = function (chunk, isFakeChunk) {
|
|
344
230
|
if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
|
|
345
231
|
var modifiedChunk = this._config.beforeFirstChunk(chunk);
|
|
232
|
+
|
|
346
233
|
if (modifiedChunk !== undefined) chunk = modifiedChunk;
|
|
347
234
|
}
|
|
348
|
-
this.isFirstChunk = false;
|
|
349
235
|
|
|
350
|
-
|
|
236
|
+
this.isFirstChunk = false;
|
|
351
237
|
var aggregate = this._partialLine + chunk;
|
|
352
238
|
this._partialLine = '';
|
|
353
239
|
|
|
354
240
|
var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
|
|
355
241
|
|
|
356
242
|
if (this._handle.paused() || this._handle.aborted()) return;
|
|
357
|
-
|
|
358
243
|
var lastIndex = results.meta.cursor;
|
|
359
244
|
|
|
360
245
|
if (!this._finished) {
|
|
@@ -363,18 +248,11 @@ function ChunkStreamer(config) {
|
|
|
363
248
|
}
|
|
364
249
|
|
|
365
250
|
if (results && results.data) this._rowCount += results.data.length;
|
|
251
|
+
var finishedIncludingPreview = this._finished || this._config.preview && this._rowCount >= this._config.preview;
|
|
366
252
|
|
|
367
|
-
|
|
368
|
-
this._finished || (this._config.preview && this._rowCount >= this._config.preview);
|
|
369
|
-
|
|
370
|
-
if (IS_PAPA_WORKER) {
|
|
371
|
-
global.postMessage({
|
|
372
|
-
results: results,
|
|
373
|
-
workerId: Papa.WORKER_ID,
|
|
374
|
-
finished: finishedIncludingPreview
|
|
375
|
-
});
|
|
376
|
-
} else if (isFunction(this._config.chunk) && !isFakeChunk) {
|
|
253
|
+
if (isFunction(this._config.chunk) && !isFakeChunk) {
|
|
377
254
|
this._config.chunk(results, this._handle);
|
|
255
|
+
|
|
378
256
|
if (this._handle.paused() || this._handle.aborted()) return;
|
|
379
257
|
results = undefined;
|
|
380
258
|
this._completeResults = undefined;
|
|
@@ -386,52 +264,41 @@ function ChunkStreamer(config) {
|
|
|
386
264
|
this._completeResults.meta = results.meta;
|
|
387
265
|
}
|
|
388
266
|
|
|
389
|
-
if (
|
|
390
|
-
!this._completed &&
|
|
391
|
-
finishedIncludingPreview &&
|
|
392
|
-
isFunction(this._config.complete) &&
|
|
393
|
-
(!results || !results.meta.aborted)
|
|
394
|
-
) {
|
|
267
|
+
if (!this._completed && finishedIncludingPreview && isFunction(this._config.complete) && (!results || !results.meta.aborted)) {
|
|
395
268
|
this._config.complete(this._completeResults, this._input);
|
|
269
|
+
|
|
396
270
|
this._completed = true;
|
|
397
271
|
}
|
|
398
272
|
|
|
399
273
|
if (!finishedIncludingPreview && (!results || !results.meta.paused)) this._nextChunk();
|
|
400
|
-
|
|
401
274
|
return results;
|
|
402
275
|
};
|
|
403
276
|
|
|
404
|
-
this._sendError = function(error) {
|
|
277
|
+
this._sendError = function (error) {
|
|
405
278
|
if (isFunction(this._config.error)) this._config.error(error);
|
|
406
|
-
else if (IS_PAPA_WORKER && this._config.error) {
|
|
407
|
-
global.postMessage({
|
|
408
|
-
workerId: Papa.WORKER_ID,
|
|
409
|
-
error: error,
|
|
410
|
-
finished: false
|
|
411
|
-
});
|
|
412
|
-
}
|
|
413
279
|
};
|
|
414
280
|
|
|
415
281
|
function replaceConfig(config) {
|
|
416
|
-
// Deep-copy the config so we can edit it
|
|
417
282
|
var configCopy = copy(config);
|
|
418
|
-
configCopy.chunkSize = parseInt(configCopy.chunkSize);
|
|
419
|
-
if (!config.step && !config.chunk) configCopy.chunkSize = null;
|
|
283
|
+
configCopy.chunkSize = parseInt(configCopy.chunkSize);
|
|
284
|
+
if (!config.step && !config.chunk) configCopy.chunkSize = null;
|
|
420
285
|
this._handle = new ParserHandle(configCopy);
|
|
421
286
|
this._handle.streamer = this;
|
|
422
|
-
this._config = configCopy;
|
|
287
|
+
this._config = configCopy;
|
|
423
288
|
}
|
|
424
289
|
}
|
|
290
|
+
|
|
425
291
|
function StringStreamer(config) {
|
|
426
292
|
config = config || {};
|
|
427
293
|
ChunkStreamer.call(this, config);
|
|
428
|
-
|
|
429
294
|
var remaining;
|
|
430
|
-
|
|
295
|
+
|
|
296
|
+
this.stream = function (s) {
|
|
431
297
|
remaining = s;
|
|
432
298
|
return this._nextChunk();
|
|
433
299
|
};
|
|
434
|
-
|
|
300
|
+
|
|
301
|
+
this._nextChunk = function () {
|
|
435
302
|
if (this._finished) return;
|
|
436
303
|
var size = this._config.chunkSize;
|
|
437
304
|
var chunk = size ? remaining.substr(0, size) : remaining;
|
|
@@ -440,26 +307,28 @@ function StringStreamer(config) {
|
|
|
440
307
|
return this.parseChunk(chunk);
|
|
441
308
|
};
|
|
442
309
|
}
|
|
310
|
+
|
|
443
311
|
StringStreamer.prototype = Object.create(StringStreamer.prototype);
|
|
444
312
|
StringStreamer.prototype.constructor = StringStreamer;
|
|
445
313
|
|
|
446
|
-
// Use one ParserHandle per entire CSV file or string
|
|
447
314
|
function ParserHandle(_config) {
|
|
448
|
-
// One goal is to minimize the use of regular expressions...
|
|
449
315
|
var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
|
|
450
316
|
var ISO_DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
|
|
451
|
-
|
|
452
317
|
var self = this;
|
|
453
|
-
var _stepCounter = 0;
|
|
454
|
-
var _rowCounter = 0;
|
|
455
|
-
|
|
456
|
-
var
|
|
457
|
-
|
|
458
|
-
var
|
|
459
|
-
|
|
460
|
-
var
|
|
318
|
+
var _stepCounter = 0;
|
|
319
|
+
var _rowCounter = 0;
|
|
320
|
+
|
|
321
|
+
var _input;
|
|
322
|
+
|
|
323
|
+
var _parser;
|
|
324
|
+
|
|
325
|
+
var _paused = false;
|
|
326
|
+
var _aborted = false;
|
|
327
|
+
|
|
328
|
+
var _delimiterError;
|
|
329
|
+
|
|
330
|
+
var _fields = [];
|
|
461
331
|
var _results = {
|
|
462
|
-
// The last results returned from the parser
|
|
463
332
|
data: [],
|
|
464
333
|
errors: [],
|
|
465
334
|
meta: {}
|
|
@@ -467,45 +336,27 @@ function ParserHandle(_config) {
|
|
|
467
336
|
|
|
468
337
|
if (isFunction(_config.step)) {
|
|
469
338
|
var userStep = _config.step;
|
|
470
|
-
_config.step = function(results) {
|
|
471
|
-
_results = results;
|
|
472
339
|
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
else {
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
_stepCounter += results.data.length;
|
|
482
|
-
if (_config.preview && _stepCounter > _config.preview) _parser.abort();
|
|
483
|
-
else userStep(_results, self);
|
|
484
|
-
}
|
|
340
|
+
_config.step = function (results) {
|
|
341
|
+
_results = results;
|
|
342
|
+
if (needsHeaderRow()) processResults();else {
|
|
343
|
+
processResults();
|
|
344
|
+
if (!_results.data || _results.data.length === 0) return;
|
|
345
|
+
_stepCounter += results.data.length;
|
|
346
|
+
if (_config.preview && _stepCounter > _config.preview) _parser.abort();else userStep(_results, self);
|
|
347
|
+
}
|
|
485
348
|
};
|
|
486
349
|
}
|
|
487
350
|
|
|
488
|
-
|
|
489
|
-
* Parses input. Most users won't need, and shouldn't mess with, the baseIndex
|
|
490
|
-
* and ignoreLastRow parameters. They are used by streamers (wrapper functions)
|
|
491
|
-
* when an input comes in multiple chunks, like from a file.
|
|
492
|
-
*/
|
|
493
|
-
this.parse = function(input, baseIndex, ignoreLastRow) {
|
|
351
|
+
this.parse = function (input, baseIndex, ignoreLastRow) {
|
|
494
352
|
var quoteChar = _config.quoteChar || '"';
|
|
495
353
|
if (!_config.newline) _config.newline = guessLineEndings(input, quoteChar);
|
|
496
|
-
|
|
497
354
|
_delimiterError = false;
|
|
355
|
+
|
|
498
356
|
if (!_config.delimiter) {
|
|
499
|
-
var delimGuess = guessDelimiter(
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
_config.skipEmptyLines,
|
|
503
|
-
_config.comments,
|
|
504
|
-
_config.delimitersToGuess
|
|
505
|
-
);
|
|
506
|
-
if (delimGuess.successful) _config.delimiter = delimGuess.bestDelimiter;
|
|
507
|
-
else {
|
|
508
|
-
_delimiterError = true; // add error after parsing (otherwise it would be overwritten)
|
|
357
|
+
var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess);
|
|
358
|
+
if (delimGuess.successful) _config.delimiter = delimGuess.bestDelimiter;else {
|
|
359
|
+
_delimiterError = true;
|
|
509
360
|
_config.delimiter = Papa.DefaultDelimiter;
|
|
510
361
|
}
|
|
511
362
|
_results.meta.delimiter = _config.delimiter;
|
|
@@ -515,65 +366,68 @@ function ParserHandle(_config) {
|
|
|
515
366
|
}
|
|
516
367
|
|
|
517
368
|
var parserConfig = copy(_config);
|
|
518
|
-
if (_config.preview && _config.header) parserConfig.preview++;
|
|
519
|
-
|
|
369
|
+
if (_config.preview && _config.header) parserConfig.preview++;
|
|
520
370
|
_input = input;
|
|
521
371
|
_parser = new Parser(parserConfig);
|
|
522
372
|
_results = _parser.parse(_input, baseIndex, ignoreLastRow);
|
|
523
373
|
processResults();
|
|
524
|
-
return _paused ? {
|
|
374
|
+
return _paused ? {
|
|
375
|
+
meta: {
|
|
376
|
+
paused: true
|
|
377
|
+
}
|
|
378
|
+
} : _results || {
|
|
379
|
+
meta: {
|
|
380
|
+
paused: false
|
|
381
|
+
}
|
|
382
|
+
};
|
|
525
383
|
};
|
|
526
384
|
|
|
527
|
-
this.paused = function() {
|
|
385
|
+
this.paused = function () {
|
|
528
386
|
return _paused;
|
|
529
387
|
};
|
|
530
388
|
|
|
531
|
-
this.pause = function() {
|
|
389
|
+
this.pause = function () {
|
|
532
390
|
_paused = true;
|
|
391
|
+
|
|
533
392
|
_parser.abort();
|
|
393
|
+
|
|
534
394
|
_input = _input.substr(_parser.getCharIndex());
|
|
535
395
|
};
|
|
536
396
|
|
|
537
|
-
this.resume = function() {
|
|
397
|
+
this.resume = function () {
|
|
538
398
|
_paused = false;
|
|
539
399
|
self.streamer.parseChunk(_input, true);
|
|
540
400
|
};
|
|
541
401
|
|
|
542
|
-
this.aborted = function() {
|
|
402
|
+
this.aborted = function () {
|
|
543
403
|
return _aborted;
|
|
544
404
|
};
|
|
545
405
|
|
|
546
|
-
this.abort = function() {
|
|
406
|
+
this.abort = function () {
|
|
547
407
|
_aborted = true;
|
|
408
|
+
|
|
548
409
|
_parser.abort();
|
|
410
|
+
|
|
549
411
|
_results.meta.aborted = true;
|
|
550
412
|
if (isFunction(_config.complete)) _config.complete(_results);
|
|
551
413
|
_input = '';
|
|
552
414
|
};
|
|
553
415
|
|
|
554
416
|
function testEmptyLine(s) {
|
|
555
|
-
return _config.skipEmptyLines === 'greedy'
|
|
556
|
-
? s.join('').trim() === ''
|
|
557
|
-
: s.length === 1 && s[0].length === 0;
|
|
417
|
+
return _config.skipEmptyLines === 'greedy' ? s.join('').trim() === '' : s.length === 1 && s[0].length === 0;
|
|
558
418
|
}
|
|
559
419
|
|
|
560
420
|
function processResults() {
|
|
561
421
|
if (_results && _delimiterError) {
|
|
562
|
-
addError(
|
|
563
|
-
'Delimiter',
|
|
564
|
-
'UndetectableDelimiter',
|
|
565
|
-
"Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'"
|
|
566
|
-
);
|
|
422
|
+
addError('Delimiter', 'UndetectableDelimiter', "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'");
|
|
567
423
|
_delimiterError = false;
|
|
568
424
|
}
|
|
569
425
|
|
|
570
426
|
if (_config.skipEmptyLines) {
|
|
571
|
-
for (var i = 0; i < _results.data.length; i++)
|
|
572
|
-
if (testEmptyLine(_results.data[i])) _results.data.splice(i--, 1);
|
|
427
|
+
for (var i = 0; i < _results.data.length; i++) if (testEmptyLine(_results.data[i])) _results.data.splice(i--, 1);
|
|
573
428
|
}
|
|
574
429
|
|
|
575
430
|
if (needsHeaderRow()) fillHeaderFields();
|
|
576
|
-
|
|
577
431
|
return applyHeaderAndDynamicTypingAndTransformation();
|
|
578
432
|
}
|
|
579
433
|
|
|
@@ -591,50 +445,40 @@ function ParserHandle(_config) {
|
|
|
591
445
|
}
|
|
592
446
|
|
|
593
447
|
if (Array.isArray(_results.data[0])) {
|
|
594
|
-
for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
|
|
595
|
-
_results.data[i].forEach(addHeder);
|
|
448
|
+
for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) _results.data[i].forEach(addHeder);
|
|
596
449
|
|
|
597
450
|
_results.data.splice(0, 1);
|
|
598
|
-
}
|
|
599
|
-
// if _results.data[0] is not an array, we are in a step where _results.data is the row.
|
|
600
|
-
else _results.data.forEach(addHeder);
|
|
451
|
+
} else _results.data.forEach(addHeder);
|
|
601
452
|
}
|
|
602
453
|
|
|
603
454
|
function shouldApplyDynamicTyping(field) {
|
|
604
|
-
// Cache function values to avoid calling it for each row
|
|
605
455
|
if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
|
|
606
456
|
_config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
|
|
607
457
|
}
|
|
458
|
+
|
|
608
459
|
return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
|
|
609
460
|
}
|
|
610
461
|
|
|
611
462
|
function parseDynamic(field, value) {
|
|
612
463
|
if (shouldApplyDynamicTyping(field)) {
|
|
613
|
-
if (value === 'true' || value === 'TRUE') return true;
|
|
614
|
-
else if (value === 'false' || value === 'FALSE') return false;
|
|
615
|
-
else if (FLOAT.test(value)) return parseFloat(value);
|
|
616
|
-
else if (ISO_DATE.test(value)) return new Date(value);
|
|
617
|
-
else return value === '' ? null : value;
|
|
464
|
+
if (value === 'true' || value === 'TRUE') return true;else if (value === 'false' || value === 'FALSE') return false;else if (FLOAT.test(value)) return parseFloat(value);else if (ISO_DATE.test(value)) return new Date(value);else return value === '' ? null : value;
|
|
618
465
|
}
|
|
466
|
+
|
|
619
467
|
return value;
|
|
620
468
|
}
|
|
621
469
|
|
|
622
470
|
function applyHeaderAndDynamicTypingAndTransformation() {
|
|
623
|
-
if (!_results || !_results.data ||
|
|
624
|
-
return _results;
|
|
471
|
+
if (!_results || !_results.data || !_config.header && !_config.dynamicTyping && !_config.transform) return _results;
|
|
625
472
|
|
|
626
473
|
function processRow(rowSource, i) {
|
|
627
474
|
var row = _config.header ? {} : [];
|
|
628
|
-
|
|
629
475
|
var j;
|
|
476
|
+
|
|
630
477
|
for (j = 0; j < rowSource.length; j++) {
|
|
631
478
|
var field = j;
|
|
632
479
|
var value = rowSource[j];
|
|
633
|
-
|
|
634
480
|
if (_config.header) field = j >= _fields.length ? '__parsed_extra' : _fields[j];
|
|
635
|
-
|
|
636
481
|
if (_config.transform) value = _config.transform(value, field);
|
|
637
|
-
|
|
638
482
|
value = parseDynamic(field, value);
|
|
639
483
|
|
|
640
484
|
if (field === '__parsed_extra') {
|
|
@@ -644,49 +488,34 @@ function ParserHandle(_config) {
|
|
|
644
488
|
}
|
|
645
489
|
|
|
646
490
|
if (_config.header) {
|
|
647
|
-
if (j > _fields.length)
|
|
648
|
-
addError(
|
|
649
|
-
'FieldMismatch',
|
|
650
|
-
'TooManyFields',
|
|
651
|
-
'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j,
|
|
652
|
-
_rowCounter + i
|
|
653
|
-
);
|
|
654
|
-
else if (j < _fields.length)
|
|
655
|
-
addError(
|
|
656
|
-
'FieldMismatch',
|
|
657
|
-
'TooFewFields',
|
|
658
|
-
'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j,
|
|
659
|
-
_rowCounter + i
|
|
660
|
-
);
|
|
491
|
+
if (j > _fields.length) addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);else if (j < _fields.length) addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
|
|
661
492
|
}
|
|
662
493
|
|
|
663
494
|
return row;
|
|
664
495
|
}
|
|
665
496
|
|
|
666
497
|
var incrementBy = 1;
|
|
498
|
+
|
|
667
499
|
if (!_results.data[0] || Array.isArray(_results.data[0])) {
|
|
668
500
|
_results.data = _results.data.map(processRow);
|
|
669
501
|
incrementBy = _results.data.length;
|
|
670
502
|
} else _results.data = processRow(_results.data, 0);
|
|
671
503
|
|
|
672
504
|
if (_config.header && _results.meta) _results.meta.fields = _fields;
|
|
673
|
-
|
|
674
505
|
_rowCounter += incrementBy;
|
|
675
506
|
return _results;
|
|
676
507
|
}
|
|
677
508
|
|
|
678
509
|
function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
|
|
679
510
|
var bestDelim, bestDelta, fieldCountPrevRow;
|
|
680
|
-
|
|
681
511
|
delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
|
|
682
512
|
|
|
683
513
|
for (var i = 0; i < delimitersToGuess.length; i++) {
|
|
684
514
|
var delim = delimitersToGuess[i];
|
|
685
515
|
var delta = 0,
|
|
686
|
-
|
|
687
|
-
|
|
516
|
+
avgFieldCount = 0,
|
|
517
|
+
emptyLinesCount = 0;
|
|
688
518
|
fieldCountPrevRow = undefined;
|
|
689
|
-
|
|
690
519
|
var preview = new Parser({
|
|
691
520
|
comments: comments,
|
|
692
521
|
delimiter: delim,
|
|
@@ -699,6 +528,7 @@ function ParserHandle(_config) {
|
|
|
699
528
|
emptyLinesCount++;
|
|
700
529
|
continue;
|
|
701
530
|
}
|
|
531
|
+
|
|
702
532
|
var fieldCount = preview.data[j].length;
|
|
703
533
|
avgFieldCount += fieldCount;
|
|
704
534
|
|
|
@@ -720,7 +550,6 @@ function ParserHandle(_config) {
|
|
|
720
550
|
}
|
|
721
551
|
|
|
722
552
|
_config.delimiter = bestDelim;
|
|
723
|
-
|
|
724
553
|
return {
|
|
725
554
|
successful: !!bestDelim,
|
|
726
555
|
bestDelimiter: bestDelim
|
|
@@ -728,20 +557,15 @@ function ParserHandle(_config) {
|
|
|
728
557
|
}
|
|
729
558
|
|
|
730
559
|
function guessLineEndings(input, quoteChar) {
|
|
731
|
-
input = input.substr(0, 1024 * 1024);
|
|
732
|
-
// Replace all the text inside quotes
|
|
560
|
+
input = input.substr(0, 1024 * 1024);
|
|
733
561
|
var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
|
|
734
562
|
input = input.replace(re, '');
|
|
735
|
-
|
|
736
563
|
var r = input.split('\r');
|
|
737
|
-
|
|
738
564
|
var n = input.split('\n');
|
|
739
|
-
|
|
740
565
|
var nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
|
|
741
|
-
|
|
742
566
|
if (r.length === 1 || nAppearsFirst) return '\n';
|
|
743
|
-
|
|
744
567
|
var numWithN = 0;
|
|
568
|
+
|
|
745
569
|
for (var i = 0; i < r.length; i++) {
|
|
746
570
|
if (r[i][0] === '\n') numWithN++;
|
|
747
571
|
}
|
|
@@ -759,14 +583,11 @@ function ParserHandle(_config) {
|
|
|
759
583
|
}
|
|
760
584
|
}
|
|
761
585
|
|
|
762
|
-
/** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
|
|
763
586
|
function escapeRegExp(string) {
|
|
764
|
-
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
587
|
+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
765
588
|
}
|
|
766
589
|
|
|
767
|
-
/** The core parser implements speedy and correct CSV parsing */
|
|
768
590
|
function Parser(config) {
|
|
769
|
-
// Unpack the config object
|
|
770
591
|
config = config || {};
|
|
771
592
|
var delim = config.delimiter;
|
|
772
593
|
var newline = config.newline;
|
|
@@ -775,73 +596,61 @@ function Parser(config) {
|
|
|
775
596
|
var preview = config.preview;
|
|
776
597
|
var fastMode = config.fastMode;
|
|
777
598
|
var quoteChar;
|
|
778
|
-
|
|
599
|
+
|
|
779
600
|
if (config.quoteChar === undefined) {
|
|
780
601
|
quoteChar = '"';
|
|
781
602
|
} else {
|
|
782
603
|
quoteChar = config.quoteChar;
|
|
783
604
|
}
|
|
605
|
+
|
|
784
606
|
var escapeChar = quoteChar;
|
|
607
|
+
|
|
785
608
|
if (config.escapeChar !== undefined) {
|
|
786
609
|
escapeChar = config.escapeChar;
|
|
787
610
|
}
|
|
788
611
|
|
|
789
|
-
// Delimiter must be valid
|
|
790
612
|
if (typeof delim !== 'string' || Papa.BAD_DELIMITERS.indexOf(delim) > -1) delim = ',';
|
|
791
|
-
|
|
792
|
-
// Comment character must be valid
|
|
793
|
-
if (comments === delim) throw new Error('Comment character same as delimiter');
|
|
794
|
-
else if (comments === true) comments = '#';
|
|
795
|
-
else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1)
|
|
796
|
-
comments = false;
|
|
797
|
-
|
|
798
|
-
// Newline must be valid: \r, \n, or \r\n
|
|
613
|
+
if (comments === delim) throw new Error('Comment character same as delimiter');else if (comments === true) comments = '#';else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1) comments = false;
|
|
799
614
|
if (newline !== '\n' && newline !== '\r' && newline !== '\r\n') newline = '\n';
|
|
800
|
-
|
|
801
|
-
// We're gonna need these at the Parser scope
|
|
802
615
|
var cursor = 0;
|
|
803
616
|
var aborted = false;
|
|
804
617
|
|
|
805
|
-
this.parse = function(input, baseIndex, ignoreLastRow) {
|
|
806
|
-
// For some reason, in Chrome, this speeds things up (!?)
|
|
618
|
+
this.parse = function (input, baseIndex, ignoreLastRow) {
|
|
807
619
|
if (typeof input !== 'string') throw new Error('Input must be a string');
|
|
808
|
-
|
|
809
|
-
// We don't need to compute some of these every time parse() is called,
|
|
810
|
-
// but having them in a more local scope seems to perform better
|
|
811
620
|
var inputLen = input.length,
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
621
|
+
delimLen = delim.length,
|
|
622
|
+
newlineLen = newline.length,
|
|
623
|
+
commentsLen = comments.length;
|
|
815
624
|
var stepIsFunction = isFunction(step);
|
|
816
|
-
|
|
817
|
-
// Establish starting state
|
|
818
625
|
cursor = 0;
|
|
819
626
|
var data = [],
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
627
|
+
errors = [],
|
|
628
|
+
row = [],
|
|
629
|
+
lastCursor = 0;
|
|
824
630
|
if (!input) return returnable();
|
|
825
631
|
|
|
826
|
-
if (fastMode ||
|
|
632
|
+
if (fastMode || fastMode !== false && input.indexOf(quoteChar) === -1) {
|
|
827
633
|
var rows = input.split(newline);
|
|
634
|
+
|
|
828
635
|
for (var i = 0; i < rows.length; i++) {
|
|
829
636
|
row = rows[i];
|
|
830
637
|
cursor += row.length;
|
|
831
|
-
if (i !== rows.length - 1) cursor += newline.length;
|
|
832
|
-
else if (ignoreLastRow) return returnable();
|
|
638
|
+
if (i !== rows.length - 1) cursor += newline.length;else if (ignoreLastRow) return returnable();
|
|
833
639
|
if (comments && row.substr(0, commentsLen) === comments) continue;
|
|
640
|
+
|
|
834
641
|
if (stepIsFunction) {
|
|
835
642
|
data = [];
|
|
836
643
|
pushRow(row.split(delim));
|
|
837
644
|
doStep();
|
|
838
645
|
if (aborted) return returnable();
|
|
839
646
|
} else pushRow(row.split(delim));
|
|
647
|
+
|
|
840
648
|
if (preview && i >= preview) {
|
|
841
649
|
data = data.slice(0, preview);
|
|
842
650
|
return returnable(true);
|
|
843
651
|
}
|
|
844
652
|
}
|
|
653
|
+
|
|
845
654
|
return returnable();
|
|
846
655
|
}
|
|
847
656
|
|
|
@@ -850,62 +659,45 @@ function Parser(config) {
|
|
|
850
659
|
var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
|
|
851
660
|
var quoteSearch;
|
|
852
661
|
|
|
853
|
-
// Parser loop
|
|
854
662
|
for (;;) {
|
|
855
|
-
// Field has opening quote
|
|
856
663
|
if (input[cursor] === quoteChar) {
|
|
857
|
-
// Start our search for the closing quote where the cursor is
|
|
858
664
|
quoteSearch = cursor;
|
|
859
|
-
|
|
860
|
-
// Skip the opening quote
|
|
861
665
|
cursor++;
|
|
862
666
|
|
|
863
667
|
for (;;) {
|
|
864
|
-
// Find closing quote
|
|
865
668
|
quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
|
|
866
669
|
|
|
867
|
-
//No other quotes are found - no other delimiters
|
|
868
670
|
if (quoteSearch === -1) {
|
|
869
671
|
if (!ignoreLastRow) {
|
|
870
|
-
// No closing quote... what a pity
|
|
871
672
|
errors.push({
|
|
872
673
|
type: 'Quotes',
|
|
873
674
|
code: 'MissingQuotes',
|
|
874
675
|
message: 'Quoted field unterminated',
|
|
875
|
-
row: data.length,
|
|
676
|
+
row: data.length,
|
|
876
677
|
index: cursor
|
|
877
678
|
});
|
|
878
679
|
}
|
|
680
|
+
|
|
879
681
|
return finish();
|
|
880
682
|
}
|
|
881
683
|
|
|
882
|
-
// Closing quote at EOF
|
|
883
684
|
if (quoteSearch === inputLen - 1) {
|
|
884
685
|
var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
|
|
885
686
|
return finish(value);
|
|
886
687
|
}
|
|
887
688
|
|
|
888
|
-
// If this quote is escaped, it's part of the data; skip it
|
|
889
|
-
// If the quote character is the escape character, then check if the next character is the escape character
|
|
890
689
|
if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar) {
|
|
891
690
|
quoteSearch++;
|
|
892
691
|
continue;
|
|
893
692
|
}
|
|
894
693
|
|
|
895
|
-
|
|
896
|
-
if (
|
|
897
|
-
quoteChar !== escapeChar &&
|
|
898
|
-
quoteSearch !== 0 &&
|
|
899
|
-
input[quoteSearch - 1] === escapeChar
|
|
900
|
-
) {
|
|
694
|
+
if (quoteChar !== escapeChar && quoteSearch !== 0 && input[quoteSearch - 1] === escapeChar) {
|
|
901
695
|
continue;
|
|
902
696
|
}
|
|
903
697
|
|
|
904
|
-
// Check up to nextDelim or nextNewline, whichever is closest
|
|
905
698
|
var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
|
|
906
699
|
var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
|
|
907
700
|
|
|
908
|
-
// Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
|
|
909
701
|
if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) {
|
|
910
702
|
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
|
|
911
703
|
cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
|
|
@@ -918,19 +710,15 @@ function Parser(config) {
|
|
|
918
710
|
}
|
|
919
711
|
|
|
920
712
|
if (preview && data.length >= preview) return returnable(true);
|
|
921
|
-
|
|
922
713
|
break;
|
|
923
714
|
}
|
|
924
715
|
|
|
925
716
|
var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
|
|
926
717
|
|
|
927
|
-
|
|
928
|
-
if (
|
|
929
|
-
input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline
|
|
930
|
-
) {
|
|
718
|
+
if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline) {
|
|
931
719
|
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
|
|
932
720
|
saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
|
|
933
|
-
nextDelim = input.indexOf(delim, cursor);
|
|
721
|
+
nextDelim = input.indexOf(delim, cursor);
|
|
934
722
|
|
|
935
723
|
if (stepIsFunction) {
|
|
936
724
|
doStep();
|
|
@@ -938,19 +726,16 @@ function Parser(config) {
|
|
|
938
726
|
}
|
|
939
727
|
|
|
940
728
|
if (preview && data.length >= preview) return returnable(true);
|
|
941
|
-
|
|
942
729
|
break;
|
|
943
730
|
}
|
|
944
731
|
|
|
945
|
-
// Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
|
|
946
732
|
errors.push({
|
|
947
733
|
type: 'Quotes',
|
|
948
734
|
code: 'InvalidQuotes',
|
|
949
735
|
message: 'Trailing quote on quoted field is malformed',
|
|
950
|
-
row: data.length,
|
|
736
|
+
row: data.length,
|
|
951
737
|
index: cursor
|
|
952
738
|
});
|
|
953
|
-
|
|
954
739
|
quoteSearch++;
|
|
955
740
|
continue;
|
|
956
741
|
}
|
|
@@ -964,18 +749,14 @@ function Parser(config) {
|
|
|
964
749
|
continue;
|
|
965
750
|
}
|
|
966
751
|
|
|
967
|
-
// Comment found at start of new line
|
|
968
752
|
if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) {
|
|
969
|
-
if (nextNewline === -1)
|
|
970
|
-
// Comment ends at EOF
|
|
971
|
-
return returnable();
|
|
753
|
+
if (nextNewline === -1) return returnable();
|
|
972
754
|
cursor = nextNewline + newlineLen;
|
|
973
755
|
nextNewline = input.indexOf(newline, cursor);
|
|
974
756
|
nextDelim = input.indexOf(delim, cursor);
|
|
975
757
|
continue;
|
|
976
758
|
}
|
|
977
759
|
|
|
978
|
-
// Next delimiter comes before next newline, so we've reached end of field
|
|
979
760
|
if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) {
|
|
980
761
|
row.push(input.substring(cursor, nextDelim));
|
|
981
762
|
cursor = nextDelim + delimLen;
|
|
@@ -983,7 +764,6 @@ function Parser(config) {
|
|
|
983
764
|
continue;
|
|
984
765
|
}
|
|
985
766
|
|
|
986
|
-
// End of row
|
|
987
767
|
if (nextNewline !== -1) {
|
|
988
768
|
row.push(input.substring(cursor, nextNewline));
|
|
989
769
|
saveRow(nextNewline + newlineLen);
|
|
@@ -994,7 +774,6 @@ function Parser(config) {
|
|
|
994
774
|
}
|
|
995
775
|
|
|
996
776
|
if (preview && data.length >= preview) return returnable(true);
|
|
997
|
-
|
|
998
777
|
continue;
|
|
999
778
|
}
|
|
1000
779
|
|
|
@@ -1008,41 +787,30 @@ function Parser(config) {
|
|
|
1008
787
|
lastCursor = cursor;
|
|
1009
788
|
}
|
|
1010
789
|
|
|
1011
|
-
/**
|
|
1012
|
-
* checks if there are extra spaces after closing quote and given index without any text
|
|
1013
|
-
* if Yes, returns the number of spaces
|
|
1014
|
-
*/
|
|
1015
790
|
function extraSpaces(index) {
|
|
1016
791
|
var spaceLength = 0;
|
|
792
|
+
|
|
1017
793
|
if (index !== -1) {
|
|
1018
794
|
var textBetweenClosingQuoteAndIndex = input.substring(quoteSearch + 1, index);
|
|
795
|
+
|
|
1019
796
|
if (textBetweenClosingQuoteAndIndex && textBetweenClosingQuoteAndIndex.trim() === '') {
|
|
1020
797
|
spaceLength = textBetweenClosingQuoteAndIndex.length;
|
|
1021
798
|
}
|
|
1022
799
|
}
|
|
800
|
+
|
|
1023
801
|
return spaceLength;
|
|
1024
802
|
}
|
|
1025
803
|
|
|
1026
|
-
/**
|
|
1027
|
-
* Appends the remaining input from cursor to the end into
|
|
1028
|
-
* row, saves the row, calls step, and returns the results.
|
|
1029
|
-
*/
|
|
1030
804
|
function finish(value) {
|
|
1031
805
|
if (ignoreLastRow) return returnable();
|
|
1032
806
|
if (typeof value === 'undefined') value = input.substr(cursor);
|
|
1033
807
|
row.push(value);
|
|
1034
|
-
cursor = inputLen;
|
|
808
|
+
cursor = inputLen;
|
|
1035
809
|
pushRow(row);
|
|
1036
810
|
if (stepIsFunction) doStep();
|
|
1037
811
|
return returnable();
|
|
1038
812
|
}
|
|
1039
813
|
|
|
1040
|
-
/**
|
|
1041
|
-
* Appends the current row to the results. It sets the cursor
|
|
1042
|
-
* to newCursor and finds the nextNewline. The caller should
|
|
1043
|
-
* take care to execute user's step function and check for
|
|
1044
|
-
* preview and end parsing if necessary.
|
|
1045
|
-
*/
|
|
1046
814
|
function saveRow(newCursor) {
|
|
1047
815
|
cursor = newCursor;
|
|
1048
816
|
pushRow(row);
|
|
@@ -1050,7 +818,6 @@ function Parser(config) {
|
|
|
1050
818
|
nextNewline = input.indexOf(newline, cursor);
|
|
1051
819
|
}
|
|
1052
820
|
|
|
1053
|
-
/** Returns an object with the results, errors, and meta. */
|
|
1054
821
|
function returnable(stopped, step) {
|
|
1055
822
|
var isStep = step || false;
|
|
1056
823
|
return {
|
|
@@ -1066,7 +833,6 @@ function Parser(config) {
|
|
|
1066
833
|
};
|
|
1067
834
|
}
|
|
1068
835
|
|
|
1069
|
-
/** Executes the user's step function and resets data & errors. */
|
|
1070
836
|
function doStep() {
|
|
1071
837
|
step(returnable(undefined, true));
|
|
1072
838
|
data = [];
|
|
@@ -1074,13 +840,11 @@ function Parser(config) {
|
|
|
1074
840
|
}
|
|
1075
841
|
};
|
|
1076
842
|
|
|
1077
|
-
|
|
1078
|
-
this.abort = function() {
|
|
843
|
+
this.abort = function () {
|
|
1079
844
|
aborted = true;
|
|
1080
845
|
};
|
|
1081
846
|
|
|
1082
|
-
|
|
1083
|
-
this.getCharIndex = function() {
|
|
847
|
+
this.getCharIndex = function () {
|
|
1084
848
|
return cursor;
|
|
1085
849
|
};
|
|
1086
850
|
}
|
|
@@ -1089,14 +853,16 @@ function notImplemented() {
|
|
|
1089
853
|
throw new Error('Not implemented.');
|
|
1090
854
|
}
|
|
1091
855
|
|
|
1092
|
-
/** Makes a deep copy of an array or object (mostly) */
|
|
1093
856
|
function copy(obj) {
|
|
1094
857
|
if (typeof obj !== 'object' || obj === null) return obj;
|
|
1095
858
|
var cpy = Array.isArray(obj) ? [] : {};
|
|
859
|
+
|
|
1096
860
|
for (var key in obj) cpy[key] = copy(obj[key]);
|
|
861
|
+
|
|
1097
862
|
return cpy;
|
|
1098
863
|
}
|
|
1099
864
|
|
|
1100
865
|
function isFunction(func) {
|
|
1101
866
|
return typeof func === 'function';
|
|
1102
867
|
}
|
|
868
|
+
//# sourceMappingURL=papaparse.js.map
|