@loaders.gl/csv 4.0.0-alpha.4 → 4.0.0-alpha.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bundle.d.ts +2 -0
- package/dist/bundle.d.ts.map +1 -0
- package/dist/bundle.js +2 -2
- package/dist/csv-loader.d.ts +20 -0
- package/dist/csv-loader.d.ts.map +1 -0
- package/dist/csv-loader.js +248 -220
- package/dist/csv-writer.d.ts +6 -0
- package/dist/csv-writer.d.ts.map +1 -0
- package/dist/csv-writer.js +23 -2
- package/dist/dist.min.js +1827 -0
- package/dist/es5/bundle.js +6 -0
- package/dist/es5/bundle.js.map +1 -0
- package/dist/es5/csv-loader.js +246 -0
- package/dist/es5/csv-loader.js.map +1 -0
- package/dist/es5/csv-writer.js +48 -0
- package/dist/es5/csv-writer.js.map +1 -0
- package/dist/es5/index.js +20 -0
- package/dist/es5/index.js.map +1 -0
- package/dist/es5/lib/encoders/encode-csv.js +60 -0
- package/dist/es5/lib/encoders/encode-csv.js.map +1 -0
- package/dist/es5/papaparse/async-iterator-streamer.js +100 -0
- package/dist/es5/papaparse/async-iterator-streamer.js.map +1 -0
- package/dist/es5/papaparse/papaparse.js +703 -0
- package/dist/es5/papaparse/papaparse.js.map +1 -0
- package/dist/esm/bundle.js +4 -0
- package/dist/esm/bundle.js.map +1 -0
- package/dist/esm/csv-loader.js +205 -0
- package/dist/esm/csv-loader.js.map +1 -0
- package/dist/esm/csv-writer.js +20 -0
- package/dist/esm/csv-writer.js.map +1 -0
- package/dist/esm/index.js +3 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/lib/encoders/encode-csv.js +40 -0
- package/dist/esm/lib/encoders/encode-csv.js.map +1 -0
- package/dist/{lib → esm/papaparse}/async-iterator-streamer.js +1 -6
- package/dist/esm/papaparse/async-iterator-streamer.js.map +1 -0
- package/{src/libs → dist/esm/papaparse}/papaparse.js +96 -504
- package/dist/esm/papaparse/papaparse.js.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +8 -2
- package/dist/lib/encoders/encode-csv.d.ts +13 -0
- package/dist/lib/encoders/encode-csv.d.ts.map +1 -0
- package/dist/lib/encoders/encode-csv.js +50 -0
- package/dist/papaparse/async-iterator-streamer.d.ts +6 -0
- package/dist/papaparse/async-iterator-streamer.d.ts.map +1 -0
- package/dist/papaparse/async-iterator-streamer.js +60 -32
- package/dist/papaparse/papaparse.d.ts +30 -0
- package/dist/papaparse/papaparse.d.ts.map +1 -0
- package/dist/papaparse/papaparse.js +935 -0
- package/package.json +8 -8
- package/src/csv-loader.ts +21 -15
- package/src/csv-writer.ts +29 -5
- package/src/index.ts +5 -0
- package/src/lib/encoders/encode-csv.ts +66 -0
- package/src/{lib → papaparse}/async-iterator-streamer.ts +2 -2
- package/{dist/libs/papaparse.js → src/papaparse/papaparse.ts} +48 -73
- package/dist/bundle.js.map +0 -1
- package/dist/csv-loader.js.map +0 -1
- package/dist/csv-writer.js.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/lib/async-iterator-streamer.js.map +0 -1
- package/dist/papaparse/async-iterator-streamer.js.map +0 -1
- package/src/papaparse/async-iterator-streamer.js +0 -71
|
@@ -1,258 +1,120 @@
|
|
|
1
|
-
// This is a fork of papaparse
|
|
2
|
-
// https://github.com/mholt/PapaParse
|
|
3
1
|
/* @license
|
|
4
2
|
Papa Parse
|
|
5
3
|
v5.0.0-beta.0
|
|
6
4
|
https://github.com/mholt/PapaParse
|
|
7
5
|
License: MIT
|
|
8
6
|
*/
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
if (typeof global !== 'undefined') {
|
|
30
|
-
return global;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
// When running tests none of the above have been defined
|
|
34
|
-
return {};
|
|
35
|
-
})();
|
|
36
|
-
|
|
37
|
-
var IS_PAPA_WORKER = false;
|
|
38
|
-
|
|
39
|
-
var Papa = {};
|
|
40
|
-
module.exports = Papa;
|
|
41
|
-
Papa.parse = CsvToJson;
|
|
42
|
-
Papa.unparse = JsonToCsv;
|
|
43
|
-
|
|
44
|
-
Papa.RECORD_SEP = String.fromCharCode(30);
|
|
45
|
-
Papa.UNIT_SEP = String.fromCharCode(31);
|
|
46
|
-
Papa.BYTE_ORDER_MARK = '\ufeff';
|
|
47
|
-
Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK];
|
|
48
|
-
Papa.WORKERS_SUPPORTED = false; // !IS_WORKER && !!global.Worker;
|
|
49
|
-
Papa.NODE_STREAM_INPUT = 1;
|
|
50
|
-
|
|
51
|
-
// Configurable chunk sizes for local and remote files, respectively
|
|
52
|
-
Papa.LocalChunkSize = 1024 * 1024 * 10; // 10 MB
|
|
53
|
-
Papa.RemoteChunkSize = 1024 * 1024 * 5; // 5 MB
|
|
54
|
-
Papa.DefaultDelimiter = ','; // Used if not specified and detection fails
|
|
55
|
-
|
|
56
|
-
// Exposed for testing and development only
|
|
57
|
-
Papa.Parser = Parser;
|
|
58
|
-
Papa.ParserHandle = ParserHandle;
|
|
59
|
-
|
|
60
|
-
// BEGIN FORK
|
|
61
|
-
Papa.ChunkStreamer = ChunkStreamer;
|
|
62
|
-
Papa.StringStreamer = StringStreamer;
|
|
63
|
-
/*
|
|
64
|
-
Papa.NetworkStreamer = NetworkStreamer;
|
|
65
|
-
Papa.FileStreamer = FileStreamer;
|
|
66
|
-
Papa.ReadableStreamStreamer = ReadableStreamStreamer;
|
|
67
|
-
if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
|
|
68
|
-
Papa.DuplexStreamStreamer = DuplexStreamStreamer;
|
|
69
|
-
}
|
|
70
|
-
*/
|
|
71
|
-
// END FORK
|
|
72
|
-
|
|
73
|
-
// BEGIN FORK
|
|
74
|
-
// Adds an argument to papa.parse
|
|
75
|
-
// function CsvToJson(_input, _config)
|
|
76
|
-
function CsvToJson(
|
|
77
|
-
_input,
|
|
78
|
-
_config,
|
|
79
|
-
UserDefinedStreamer // BEGIN FORK
|
|
80
|
-
) {
|
|
7
|
+
const BYTE_ORDER_MARK = '\ufeff';
|
|
8
|
+
const Papa = {
|
|
9
|
+
parse: CsvToJson,
|
|
10
|
+
unparse: JsonToCsv,
|
|
11
|
+
RECORD_SEP: String.fromCharCode(30),
|
|
12
|
+
UNIT_SEP: String.fromCharCode(31),
|
|
13
|
+
BYTE_ORDER_MARK,
|
|
14
|
+
BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
|
|
15
|
+
WORKERS_SUPPORTED: false,
|
|
16
|
+
NODE_STREAM_INPUT: 1,
|
|
17
|
+
LocalChunkSize: 1024 * 1024 * 10,
|
|
18
|
+
RemoteChunkSize: 1024 * 1024 * 5,
|
|
19
|
+
DefaultDelimiter: ',',
|
|
20
|
+
Parser: Parser,
|
|
21
|
+
ParserHandle: ParserHandle,
|
|
22
|
+
ChunkStreamer: ChunkStreamer,
|
|
23
|
+
StringStreamer: StringStreamer
|
|
24
|
+
};
|
|
25
|
+
export default Papa;
|
|
26
|
+
function CsvToJson(_input, _config, UserDefinedStreamer) {
|
|
81
27
|
_config = _config || {};
|
|
82
28
|
var dynamicTyping = _config.dynamicTyping || false;
|
|
83
29
|
if (isFunction(dynamicTyping)) {
|
|
84
30
|
_config.dynamicTypingFunction = dynamicTyping;
|
|
85
|
-
// Will be filled on first row call
|
|
86
31
|
dynamicTyping = {};
|
|
87
32
|
}
|
|
88
33
|
_config.dynamicTyping = dynamicTyping;
|
|
89
|
-
|
|
90
34
|
_config.transform = isFunction(_config.transform) ? _config.transform : false;
|
|
91
|
-
|
|
92
35
|
if (_config.worker && Papa.WORKERS_SUPPORTED) {
|
|
93
36
|
var w = newWorker();
|
|
94
|
-
|
|
95
37
|
w.userStep = _config.step;
|
|
96
38
|
w.userChunk = _config.chunk;
|
|
97
39
|
w.userComplete = _config.complete;
|
|
98
40
|
w.userError = _config.error;
|
|
99
|
-
|
|
100
41
|
_config.step = isFunction(_config.step);
|
|
101
42
|
_config.chunk = isFunction(_config.chunk);
|
|
102
43
|
_config.complete = isFunction(_config.complete);
|
|
103
44
|
_config.error = isFunction(_config.error);
|
|
104
|
-
delete _config.worker;
|
|
105
|
-
|
|
45
|
+
delete _config.worker;
|
|
106
46
|
w.postMessage({
|
|
107
47
|
input: _input,
|
|
108
48
|
config: _config,
|
|
109
49
|
workerId: w.id
|
|
110
50
|
});
|
|
111
|
-
|
|
112
51
|
return;
|
|
113
52
|
}
|
|
114
|
-
|
|
115
53
|
var streamer = null;
|
|
116
|
-
/*
|
|
117
|
-
if (_input === Papa.NODE_STREAM_INPUT && typeof PAPA_BROWSER_CONTEXT === 'undefined') {
|
|
118
|
-
// create a node Duplex stream for use
|
|
119
|
-
// with .pipe
|
|
120
|
-
streamer = new DuplexStreamStreamer(_config);
|
|
121
|
-
return streamer.getStream();
|
|
122
|
-
} else
|
|
123
|
-
*/
|
|
124
54
|
if (typeof _input === 'string') {
|
|
125
|
-
// if (_config.download) streamer = new NetworkStreamer(_config);
|
|
126
|
-
// else
|
|
127
55
|
streamer = new StringStreamer(_config);
|
|
128
56
|
}
|
|
129
|
-
/*
|
|
130
|
-
else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on)) {
|
|
131
|
-
streamer = new ReadableStreamStreamer(_config);
|
|
132
|
-
} else if ((global.File && _input instanceof File) || _input instanceof Object)
|
|
133
|
-
// ...Safari. (see issue #106)
|
|
134
|
-
streamer = new FileStreamer(_config);
|
|
135
|
-
*/
|
|
136
|
-
|
|
137
|
-
// BEGIN FORK
|
|
138
57
|
if (!streamer) {
|
|
139
58
|
streamer = new UserDefinedStreamer(_config);
|
|
140
59
|
}
|
|
141
|
-
// END FORK
|
|
142
|
-
|
|
143
60
|
return streamer.stream(_input);
|
|
144
61
|
}
|
|
145
|
-
|
|
146
62
|
function JsonToCsv(_input, _config) {
|
|
147
|
-
// Default configuration
|
|
148
|
-
|
|
149
|
-
/** whether to surround every datum with quotes */
|
|
150
63
|
var _quotes = false;
|
|
151
|
-
|
|
152
|
-
/** whether to write headers */
|
|
153
64
|
var _writeHeader = true;
|
|
154
|
-
|
|
155
|
-
/** delimiting character(s) */
|
|
156
65
|
var _delimiter = ',';
|
|
157
|
-
|
|
158
|
-
/** newline character(s) */
|
|
159
66
|
var _newline = '\r\n';
|
|
160
|
-
|
|
161
|
-
/** quote character */
|
|
162
67
|
var _quoteChar = '"';
|
|
163
|
-
|
|
164
|
-
/** escaped quote character, either "" or <config.escapeChar>" */
|
|
165
68
|
var _escapedQuote = _quoteChar + _quoteChar;
|
|
166
|
-
|
|
167
|
-
/** whether to skip empty lines */
|
|
168
69
|
var _skipEmptyLines = false;
|
|
169
|
-
|
|
170
|
-
/** the columns (keys) we expect when we unparse objects */
|
|
171
70
|
var _columns = null;
|
|
172
|
-
|
|
173
71
|
unpackConfig();
|
|
174
|
-
|
|
175
72
|
var quoteCharRegex = new RegExp(escapeRegExp(_quoteChar), 'g');
|
|
176
|
-
|
|
177
73
|
if (typeof _input === 'string') _input = JSON.parse(_input);
|
|
178
|
-
|
|
179
74
|
if (Array.isArray(_input)) {
|
|
180
|
-
if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines);
|
|
181
|
-
else if (typeof _input[0] === 'object')
|
|
182
|
-
return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines);
|
|
75
|
+
if (!_input.length || Array.isArray(_input[0])) return serialize(null, _input, _skipEmptyLines);else if (typeof _input[0] === 'object') return serialize(_columns || objectKeys(_input[0]), _input, _skipEmptyLines);
|
|
183
76
|
} else if (typeof _input === 'object') {
|
|
184
77
|
if (typeof _input.data === 'string') _input.data = JSON.parse(_input.data);
|
|
185
|
-
|
|
186
78
|
if (Array.isArray(_input.data)) {
|
|
187
79
|
if (!_input.fields) _input.fields = _input.meta && _input.meta.fields;
|
|
188
|
-
|
|
189
|
-
if (!_input.
|
|
190
|
-
_input.fields = Array.isArray(_input.data[0]) ? _input.fields : objectKeys(_input.data[0]);
|
|
191
|
-
|
|
192
|
-
if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object')
|
|
193
|
-
_input.data = [_input.data]; // handles input like [1,2,3] or ['asdf']
|
|
80
|
+
if (!_input.fields) _input.fields = Array.isArray(_input.data[0]) ? _input.fields : objectKeys(_input.data[0]);
|
|
81
|
+
if (!Array.isArray(_input.data[0]) && typeof _input.data[0] !== 'object') _input.data = [_input.data];
|
|
194
82
|
}
|
|
195
|
-
|
|
196
83
|
return serialize(_input.fields || [], _input.data || [], _skipEmptyLines);
|
|
197
84
|
}
|
|
198
|
-
|
|
199
|
-
// Default (any valid paths should return before this)
|
|
200
85
|
throw new Error('Unable to serialize unrecognized input');
|
|
201
|
-
|
|
202
86
|
function unpackConfig() {
|
|
203
87
|
if (typeof _config !== 'object') return;
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
!Papa.BAD_DELIMITERS.filter(function(value) {
|
|
208
|
-
return _config.delimiter.indexOf(value) !== -1;
|
|
209
|
-
}).length
|
|
210
|
-
) {
|
|
88
|
+
if (typeof _config.delimiter === 'string' && !Papa.BAD_DELIMITERS.filter(function (value) {
|
|
89
|
+
return _config.delimiter.indexOf(value) !== -1;
|
|
90
|
+
}).length) {
|
|
211
91
|
_delimiter = _config.delimiter;
|
|
212
92
|
}
|
|
213
|
-
|
|
214
|
-
if (typeof _config.
|
|
215
|
-
_quotes = _config.quotes;
|
|
216
|
-
|
|
217
|
-
if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string')
|
|
218
|
-
_skipEmptyLines = _config.skipEmptyLines;
|
|
219
|
-
|
|
93
|
+
if (typeof _config.quotes === 'boolean' || Array.isArray(_config.quotes)) _quotes = _config.quotes;
|
|
94
|
+
if (typeof _config.skipEmptyLines === 'boolean' || typeof _config.skipEmptyLines === 'string') _skipEmptyLines = _config.skipEmptyLines;
|
|
220
95
|
if (typeof _config.newline === 'string') _newline = _config.newline;
|
|
221
|
-
|
|
222
96
|
if (typeof _config.quoteChar === 'string') _quoteChar = _config.quoteChar;
|
|
223
|
-
|
|
224
97
|
if (typeof _config.header === 'boolean') _writeHeader = _config.header;
|
|
225
|
-
|
|
226
98
|
if (Array.isArray(_config.columns)) {
|
|
227
99
|
if (_config.columns.length === 0) throw new Error('Option columns is empty');
|
|
228
|
-
|
|
229
100
|
_columns = _config.columns;
|
|
230
101
|
}
|
|
231
|
-
|
|
232
102
|
if (_config.escapeChar !== undefined) {
|
|
233
103
|
_escapedQuote = _config.escapeChar + _quoteChar;
|
|
234
104
|
}
|
|
235
105
|
}
|
|
236
|
-
|
|
237
|
-
/** Turns an object's keys into an array */
|
|
238
106
|
function objectKeys(obj) {
|
|
239
107
|
if (typeof obj !== 'object') return [];
|
|
240
108
|
var keys = [];
|
|
241
109
|
for (var key in obj) keys.push(key);
|
|
242
110
|
return keys;
|
|
243
111
|
}
|
|
244
|
-
|
|
245
|
-
/** The double for loop that iterates the data and writes out a CSV string including header row */
|
|
246
112
|
function serialize(fields, data, skipEmptyLines) {
|
|
247
113
|
var csv = '';
|
|
248
|
-
|
|
249
114
|
if (typeof fields === 'string') fields = JSON.parse(fields);
|
|
250
115
|
if (typeof data === 'string') data = JSON.parse(data);
|
|
251
|
-
|
|
252
116
|
var hasHeader = Array.isArray(fields) && fields.length > 0;
|
|
253
117
|
var dataKeyedByField = !Array.isArray(data[0]);
|
|
254
|
-
|
|
255
|
-
// If there a header row, write it first
|
|
256
118
|
if (hasHeader && _writeHeader) {
|
|
257
119
|
for (var i = 0; i < fields.length; i++) {
|
|
258
120
|
if (i > 0) csv += _delimiter;
|
|
@@ -260,18 +122,12 @@ function JsonToCsv(_input, _config) {
|
|
|
260
122
|
}
|
|
261
123
|
if (data.length > 0) csv += _newline;
|
|
262
124
|
}
|
|
263
|
-
|
|
264
|
-
// Then write out the data
|
|
265
125
|
for (var row = 0; row < data.length; row++) {
|
|
266
126
|
var maxCol = hasHeader ? fields.length : data[row].length;
|
|
267
|
-
|
|
268
127
|
var emptyLine = false;
|
|
269
128
|
var nullLine = hasHeader ? Object.keys(data[row]).length === 0 : data[row].length === 0;
|
|
270
129
|
if (skipEmptyLines && !hasHeader) {
|
|
271
|
-
emptyLine =
|
|
272
|
-
skipEmptyLines === 'greedy'
|
|
273
|
-
? data[row].join('').trim() === ''
|
|
274
|
-
: data[row].length === 1 && data[row][0].length === 0;
|
|
130
|
+
emptyLine = skipEmptyLines === 'greedy' ? data[row].join('').trim() === '' : data[row].length === 1 && data[row][0].length === 0;
|
|
275
131
|
}
|
|
276
132
|
if (skipEmptyLines === 'greedy' && hasHeader) {
|
|
277
133
|
var line = [];
|
|
@@ -287,40 +143,25 @@ function JsonToCsv(_input, _config) {
|
|
|
287
143
|
var colIdx = hasHeader && dataKeyedByField ? fields[col] : col;
|
|
288
144
|
csv += safe(data[row][colIdx], col);
|
|
289
145
|
}
|
|
290
|
-
if (row < data.length - 1 && (!skipEmptyLines ||
|
|
146
|
+
if (row < data.length - 1 && (!skipEmptyLines || maxCol > 0 && !nullLine)) {
|
|
291
147
|
csv += _newline;
|
|
292
148
|
}
|
|
293
149
|
}
|
|
294
150
|
}
|
|
295
151
|
return csv;
|
|
296
152
|
}
|
|
297
|
-
|
|
298
|
-
/** Encloses a value around quotes if needed (makes a value safe for CSV insertion) */
|
|
299
153
|
function safe(str, col) {
|
|
300
154
|
if (typeof str === 'undefined' || str === null) return '';
|
|
301
|
-
|
|
302
155
|
if (str.constructor === Date) return JSON.stringify(str).slice(1, 25);
|
|
303
|
-
|
|
304
156
|
str = str.toString().replace(quoteCharRegex, _escapedQuote);
|
|
305
|
-
|
|
306
|
-
var needsQuotes =
|
|
307
|
-
(typeof _quotes === 'boolean' && _quotes) ||
|
|
308
|
-
(Array.isArray(_quotes) && _quotes[col]) ||
|
|
309
|
-
hasAny(str, Papa.BAD_DELIMITERS) ||
|
|
310
|
-
str.indexOf(_delimiter) > -1 ||
|
|
311
|
-
str.charAt(0) === ' ' ||
|
|
312
|
-
str.charAt(str.length - 1) === ' ';
|
|
313
|
-
|
|
157
|
+
var needsQuotes = typeof _quotes === 'boolean' && _quotes || Array.isArray(_quotes) && _quotes[col] || hasAny(str, Papa.BAD_DELIMITERS) || str.indexOf(_delimiter) > -1 || str.charAt(0) === ' ' || str.charAt(str.length - 1) === ' ';
|
|
314
158
|
return needsQuotes ? _quoteChar + str + _quoteChar : str;
|
|
315
159
|
}
|
|
316
|
-
|
|
317
160
|
function hasAny(str, substrings) {
|
|
318
161
|
for (var i = 0; i < substrings.length; i++) if (str.indexOf(substrings[i]) > -1) return true;
|
|
319
162
|
return false;
|
|
320
163
|
}
|
|
321
164
|
}
|
|
322
|
-
|
|
323
|
-
/** ChunkStreamer is the base prototype for various streamer implementations. */
|
|
324
165
|
function ChunkStreamer(config) {
|
|
325
166
|
this._handle = null;
|
|
326
167
|
this._finished = false;
|
|
@@ -338,100 +179,62 @@ function ChunkStreamer(config) {
|
|
|
338
179
|
meta: {}
|
|
339
180
|
};
|
|
340
181
|
replaceConfig.call(this, config);
|
|
341
|
-
|
|
342
|
-
this.parseChunk = function(chunk, isFakeChunk) {
|
|
343
|
-
// First chunk pre-processing
|
|
182
|
+
this.parseChunk = function (chunk, isFakeChunk) {
|
|
344
183
|
if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
|
|
345
184
|
var modifiedChunk = this._config.beforeFirstChunk(chunk);
|
|
346
185
|
if (modifiedChunk !== undefined) chunk = modifiedChunk;
|
|
347
186
|
}
|
|
348
187
|
this.isFirstChunk = false;
|
|
349
|
-
|
|
350
|
-
// Rejoin the line we likely just split in two by chunking the file
|
|
351
188
|
var aggregate = this._partialLine + chunk;
|
|
352
189
|
this._partialLine = '';
|
|
353
|
-
|
|
354
190
|
var results = this._handle.parse(aggregate, this._baseIndex, !this._finished);
|
|
355
|
-
|
|
356
191
|
if (this._handle.paused() || this._handle.aborted()) return;
|
|
357
|
-
|
|
358
192
|
var lastIndex = results.meta.cursor;
|
|
359
|
-
|
|
360
193
|
if (!this._finished) {
|
|
361
194
|
this._partialLine = aggregate.substring(lastIndex - this._baseIndex);
|
|
362
195
|
this._baseIndex = lastIndex;
|
|
363
196
|
}
|
|
364
|
-
|
|
365
197
|
if (results && results.data) this._rowCount += results.data.length;
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
this._finished || (this._config.preview && this._rowCount >= this._config.preview);
|
|
369
|
-
|
|
370
|
-
if (IS_PAPA_WORKER) {
|
|
371
|
-
global.postMessage({
|
|
372
|
-
results: results,
|
|
373
|
-
workerId: Papa.WORKER_ID,
|
|
374
|
-
finished: finishedIncludingPreview
|
|
375
|
-
});
|
|
376
|
-
} else if (isFunction(this._config.chunk) && !isFakeChunk) {
|
|
198
|
+
var finishedIncludingPreview = this._finished || this._config.preview && this._rowCount >= this._config.preview;
|
|
199
|
+
if (isFunction(this._config.chunk) && !isFakeChunk) {
|
|
377
200
|
this._config.chunk(results, this._handle);
|
|
378
201
|
if (this._handle.paused() || this._handle.aborted()) return;
|
|
379
202
|
results = undefined;
|
|
380
203
|
this._completeResults = undefined;
|
|
381
204
|
}
|
|
382
|
-
|
|
383
205
|
if (!this._config.step && !this._config.chunk) {
|
|
384
206
|
this._completeResults.data = this._completeResults.data.concat(results.data);
|
|
385
207
|
this._completeResults.errors = this._completeResults.errors.concat(results.errors);
|
|
386
208
|
this._completeResults.meta = results.meta;
|
|
387
209
|
}
|
|
388
|
-
|
|
389
|
-
if (
|
|
390
|
-
!this._completed &&
|
|
391
|
-
finishedIncludingPreview &&
|
|
392
|
-
isFunction(this._config.complete) &&
|
|
393
|
-
(!results || !results.meta.aborted)
|
|
394
|
-
) {
|
|
210
|
+
if (!this._completed && finishedIncludingPreview && isFunction(this._config.complete) && (!results || !results.meta.aborted)) {
|
|
395
211
|
this._config.complete(this._completeResults, this._input);
|
|
396
212
|
this._completed = true;
|
|
397
213
|
}
|
|
398
|
-
|
|
399
214
|
if (!finishedIncludingPreview && (!results || !results.meta.paused)) this._nextChunk();
|
|
400
|
-
|
|
401
215
|
return results;
|
|
402
216
|
};
|
|
403
|
-
|
|
404
|
-
this._sendError = function(error) {
|
|
217
|
+
this._sendError = function (error) {
|
|
405
218
|
if (isFunction(this._config.error)) this._config.error(error);
|
|
406
|
-
else if (IS_PAPA_WORKER && this._config.error) {
|
|
407
|
-
global.postMessage({
|
|
408
|
-
workerId: Papa.WORKER_ID,
|
|
409
|
-
error: error,
|
|
410
|
-
finished: false
|
|
411
|
-
});
|
|
412
|
-
}
|
|
413
219
|
};
|
|
414
|
-
|
|
415
220
|
function replaceConfig(config) {
|
|
416
|
-
// Deep-copy the config so we can edit it
|
|
417
221
|
var configCopy = copy(config);
|
|
418
|
-
configCopy.chunkSize = parseInt(configCopy.chunkSize);
|
|
419
|
-
if (!config.step && !config.chunk) configCopy.chunkSize = null;
|
|
222
|
+
configCopy.chunkSize = parseInt(configCopy.chunkSize);
|
|
223
|
+
if (!config.step && !config.chunk) configCopy.chunkSize = null;
|
|
420
224
|
this._handle = new ParserHandle(configCopy);
|
|
421
225
|
this._handle.streamer = this;
|
|
422
|
-
this._config = configCopy;
|
|
226
|
+
this._config = configCopy;
|
|
423
227
|
}
|
|
424
228
|
}
|
|
425
229
|
function StringStreamer(config) {
|
|
426
230
|
config = config || {};
|
|
427
231
|
ChunkStreamer.call(this, config);
|
|
428
|
-
|
|
429
232
|
var remaining;
|
|
430
|
-
this.stream = function(s) {
|
|
233
|
+
this.stream = function (s) {
|
|
431
234
|
remaining = s;
|
|
432
235
|
return this._nextChunk();
|
|
433
236
|
};
|
|
434
|
-
this._nextChunk = function() {
|
|
237
|
+
this._nextChunk = function () {
|
|
435
238
|
if (this._finished) return;
|
|
436
239
|
var size = this._config.chunkSize;
|
|
437
240
|
var chunk = size ? remaining.substr(0, size) : remaining;
|
|
@@ -442,70 +245,43 @@ function StringStreamer(config) {
|
|
|
442
245
|
}
|
|
443
246
|
StringStreamer.prototype = Object.create(StringStreamer.prototype);
|
|
444
247
|
StringStreamer.prototype.constructor = StringStreamer;
|
|
445
|
-
|
|
446
|
-
// Use one ParserHandle per entire CSV file or string
|
|
447
248
|
function ParserHandle(_config) {
|
|
448
|
-
// One goal is to minimize the use of regular expressions...
|
|
449
249
|
var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
|
|
450
250
|
var ISO_DATE = /(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
|
|
451
|
-
|
|
452
251
|
var self = this;
|
|
453
|
-
var _stepCounter = 0;
|
|
454
|
-
var _rowCounter = 0;
|
|
455
|
-
var _input;
|
|
456
|
-
var _parser;
|
|
457
|
-
var _paused = false;
|
|
458
|
-
var _aborted = false;
|
|
459
|
-
var _delimiterError;
|
|
460
|
-
var _fields = [];
|
|
252
|
+
var _stepCounter = 0;
|
|
253
|
+
var _rowCounter = 0;
|
|
254
|
+
var _input;
|
|
255
|
+
var _parser;
|
|
256
|
+
var _paused = false;
|
|
257
|
+
var _aborted = false;
|
|
258
|
+
var _delimiterError;
|
|
259
|
+
var _fields = [];
|
|
461
260
|
var _results = {
|
|
462
|
-
// The last results returned from the parser
|
|
463
261
|
data: [],
|
|
464
262
|
errors: [],
|
|
465
263
|
meta: {}
|
|
466
264
|
};
|
|
467
|
-
|
|
468
265
|
if (isFunction(_config.step)) {
|
|
469
266
|
var userStep = _config.step;
|
|
470
|
-
_config.step = function(results) {
|
|
267
|
+
_config.step = function (results) {
|
|
471
268
|
_results = results;
|
|
472
|
-
|
|
473
|
-
if (needsHeaderRow()) processResults();
|
|
474
|
-
// only call user's step function after header row
|
|
475
|
-
else {
|
|
269
|
+
if (needsHeaderRow()) processResults();else {
|
|
476
270
|
processResults();
|
|
477
|
-
|
|
478
|
-
// It's possbile that this line was empty and there's no row here after all
|
|
479
271
|
if (!_results.data || _results.data.length === 0) return;
|
|
480
|
-
|
|
481
272
|
_stepCounter += results.data.length;
|
|
482
|
-
if (_config.preview && _stepCounter > _config.preview) _parser.abort();
|
|
483
|
-
else userStep(_results, self);
|
|
273
|
+
if (_config.preview && _stepCounter > _config.preview) _parser.abort();else userStep(_results, self);
|
|
484
274
|
}
|
|
485
275
|
};
|
|
486
276
|
}
|
|
487
|
-
|
|
488
|
-
/**
|
|
489
|
-
* Parses input. Most users won't need, and shouldn't mess with, the baseIndex
|
|
490
|
-
* and ignoreLastRow parameters. They are used by streamers (wrapper functions)
|
|
491
|
-
* when an input comes in multiple chunks, like from a file.
|
|
492
|
-
*/
|
|
493
|
-
this.parse = function(input, baseIndex, ignoreLastRow) {
|
|
277
|
+
this.parse = function (input, baseIndex, ignoreLastRow) {
|
|
494
278
|
var quoteChar = _config.quoteChar || '"';
|
|
495
279
|
if (!_config.newline) _config.newline = guessLineEndings(input, quoteChar);
|
|
496
|
-
|
|
497
280
|
_delimiterError = false;
|
|
498
281
|
if (!_config.delimiter) {
|
|
499
|
-
var delimGuess = guessDelimiter(
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
_config.skipEmptyLines,
|
|
503
|
-
_config.comments,
|
|
504
|
-
_config.delimitersToGuess
|
|
505
|
-
);
|
|
506
|
-
if (delimGuess.successful) _config.delimiter = delimGuess.bestDelimiter;
|
|
507
|
-
else {
|
|
508
|
-
_delimiterError = true; // add error after parsing (otherwise it would be overwritten)
|
|
282
|
+
var delimGuess = guessDelimiter(input, _config.newline, _config.skipEmptyLines, _config.comments, _config.delimitersToGuess);
|
|
283
|
+
if (delimGuess.successful) _config.delimiter = delimGuess.bestDelimiter;else {
|
|
284
|
+
_delimiterError = true;
|
|
509
285
|
_config.delimiter = Papa.DefaultDelimiter;
|
|
510
286
|
}
|
|
511
287
|
_results.meta.delimiter = _config.delimiter;
|
|
@@ -513,187 +289,129 @@ function ParserHandle(_config) {
|
|
|
513
289
|
_config.delimiter = _config.delimiter(input);
|
|
514
290
|
_results.meta.delimiter = _config.delimiter;
|
|
515
291
|
}
|
|
516
|
-
|
|
517
292
|
var parserConfig = copy(_config);
|
|
518
|
-
if (_config.preview && _config.header) parserConfig.preview++;
|
|
519
|
-
|
|
293
|
+
if (_config.preview && _config.header) parserConfig.preview++;
|
|
520
294
|
_input = input;
|
|
521
295
|
_parser = new Parser(parserConfig);
|
|
522
296
|
_results = _parser.parse(_input, baseIndex, ignoreLastRow);
|
|
523
297
|
processResults();
|
|
524
|
-
return _paused ? {
|
|
298
|
+
return _paused ? {
|
|
299
|
+
meta: {
|
|
300
|
+
paused: true
|
|
301
|
+
}
|
|
302
|
+
} : _results || {
|
|
303
|
+
meta: {
|
|
304
|
+
paused: false
|
|
305
|
+
}
|
|
306
|
+
};
|
|
525
307
|
};
|
|
526
|
-
|
|
527
|
-
this.paused = function() {
|
|
308
|
+
this.paused = function () {
|
|
528
309
|
return _paused;
|
|
529
310
|
};
|
|
530
|
-
|
|
531
|
-
this.pause = function() {
|
|
311
|
+
this.pause = function () {
|
|
532
312
|
_paused = true;
|
|
533
313
|
_parser.abort();
|
|
534
314
|
_input = _input.substr(_parser.getCharIndex());
|
|
535
315
|
};
|
|
536
|
-
|
|
537
|
-
this.resume = function() {
|
|
316
|
+
this.resume = function () {
|
|
538
317
|
_paused = false;
|
|
539
318
|
self.streamer.parseChunk(_input, true);
|
|
540
319
|
};
|
|
541
|
-
|
|
542
|
-
this.aborted = function() {
|
|
320
|
+
this.aborted = function () {
|
|
543
321
|
return _aborted;
|
|
544
322
|
};
|
|
545
|
-
|
|
546
|
-
this.abort = function() {
|
|
323
|
+
this.abort = function () {
|
|
547
324
|
_aborted = true;
|
|
548
325
|
_parser.abort();
|
|
549
326
|
_results.meta.aborted = true;
|
|
550
327
|
if (isFunction(_config.complete)) _config.complete(_results);
|
|
551
328
|
_input = '';
|
|
552
329
|
};
|
|
553
|
-
|
|
554
330
|
function testEmptyLine(s) {
|
|
555
|
-
return _config.skipEmptyLines === 'greedy'
|
|
556
|
-
? s.join('').trim() === ''
|
|
557
|
-
: s.length === 1 && s[0].length === 0;
|
|
331
|
+
return _config.skipEmptyLines === 'greedy' ? s.join('').trim() === '' : s.length === 1 && s[0].length === 0;
|
|
558
332
|
}
|
|
559
|
-
|
|
560
333
|
function processResults() {
|
|
561
334
|
if (_results && _delimiterError) {
|
|
562
|
-
addError(
|
|
563
|
-
'Delimiter',
|
|
564
|
-
'UndetectableDelimiter',
|
|
565
|
-
"Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'"
|
|
566
|
-
);
|
|
335
|
+
addError('Delimiter', 'UndetectableDelimiter', "Unable to auto-detect delimiting character; defaulted to '" + Papa.DefaultDelimiter + "'");
|
|
567
336
|
_delimiterError = false;
|
|
568
337
|
}
|
|
569
|
-
|
|
570
338
|
if (_config.skipEmptyLines) {
|
|
571
|
-
for (var i = 0; i < _results.data.length; i++)
|
|
572
|
-
if (testEmptyLine(_results.data[i])) _results.data.splice(i--, 1);
|
|
339
|
+
for (var i = 0; i < _results.data.length; i++) if (testEmptyLine(_results.data[i])) _results.data.splice(i--, 1);
|
|
573
340
|
}
|
|
574
|
-
|
|
575
341
|
if (needsHeaderRow()) fillHeaderFields();
|
|
576
|
-
|
|
577
342
|
return applyHeaderAndDynamicTypingAndTransformation();
|
|
578
343
|
}
|
|
579
|
-
|
|
580
344
|
function needsHeaderRow() {
|
|
581
345
|
return _config.header && _fields.length === 0;
|
|
582
346
|
}
|
|
583
|
-
|
|
584
347
|
function fillHeaderFields() {
|
|
585
348
|
if (!_results) return;
|
|
586
|
-
|
|
587
349
|
function addHeder(header) {
|
|
588
350
|
if (isFunction(_config.transformHeader)) header = _config.transformHeader(header);
|
|
589
|
-
|
|
590
351
|
_fields.push(header);
|
|
591
352
|
}
|
|
592
|
-
|
|
593
353
|
if (Array.isArray(_results.data[0])) {
|
|
594
|
-
for (var i = 0; needsHeaderRow() && i < _results.data.length; i++)
|
|
595
|
-
_results.data[i].forEach(addHeder);
|
|
596
|
-
|
|
354
|
+
for (var i = 0; needsHeaderRow() && i < _results.data.length; i++) _results.data[i].forEach(addHeder);
|
|
597
355
|
_results.data.splice(0, 1);
|
|
598
|
-
}
|
|
599
|
-
// if _results.data[0] is not an array, we are in a step where _results.data is the row.
|
|
600
|
-
else _results.data.forEach(addHeder);
|
|
356
|
+
} else _results.data.forEach(addHeder);
|
|
601
357
|
}
|
|
602
|
-
|
|
603
358
|
function shouldApplyDynamicTyping(field) {
|
|
604
|
-
// Cache function values to avoid calling it for each row
|
|
605
359
|
if (_config.dynamicTypingFunction && _config.dynamicTyping[field] === undefined) {
|
|
606
360
|
_config.dynamicTyping[field] = _config.dynamicTypingFunction(field);
|
|
607
361
|
}
|
|
608
362
|
return (_config.dynamicTyping[field] || _config.dynamicTyping) === true;
|
|
609
363
|
}
|
|
610
|
-
|
|
611
364
|
function parseDynamic(field, value) {
|
|
612
365
|
if (shouldApplyDynamicTyping(field)) {
|
|
613
|
-
if (value === 'true' || value === 'TRUE') return true;
|
|
614
|
-
else if (value === 'false' || value === 'FALSE') return false;
|
|
615
|
-
else if (FLOAT.test(value)) return parseFloat(value);
|
|
616
|
-
else if (ISO_DATE.test(value)) return new Date(value);
|
|
617
|
-
else return value === '' ? null : value;
|
|
366
|
+
if (value === 'true' || value === 'TRUE') return true;else if (value === 'false' || value === 'FALSE') return false;else if (FLOAT.test(value)) return parseFloat(value);else if (ISO_DATE.test(value)) return new Date(value);else return value === '' ? null : value;
|
|
618
367
|
}
|
|
619
368
|
return value;
|
|
620
369
|
}
|
|
621
|
-
|
|
622
370
|
function applyHeaderAndDynamicTypingAndTransformation() {
|
|
623
|
-
if (!_results || !_results.data ||
|
|
624
|
-
return _results;
|
|
625
|
-
|
|
371
|
+
if (!_results || !_results.data || !_config.header && !_config.dynamicTyping && !_config.transform) return _results;
|
|
626
372
|
function processRow(rowSource, i) {
|
|
627
373
|
var row = _config.header ? {} : [];
|
|
628
|
-
|
|
629
374
|
var j;
|
|
630
375
|
for (j = 0; j < rowSource.length; j++) {
|
|
631
376
|
var field = j;
|
|
632
377
|
var value = rowSource[j];
|
|
633
|
-
|
|
634
378
|
if (_config.header) field = j >= _fields.length ? '__parsed_extra' : _fields[j];
|
|
635
|
-
|
|
636
379
|
if (_config.transform) value = _config.transform(value, field);
|
|
637
|
-
|
|
638
380
|
value = parseDynamic(field, value);
|
|
639
|
-
|
|
640
381
|
if (field === '__parsed_extra') {
|
|
641
382
|
row[field] = row[field] || [];
|
|
642
383
|
row[field].push(value);
|
|
643
384
|
} else row[field] = value;
|
|
644
385
|
}
|
|
645
|
-
|
|
646
386
|
if (_config.header) {
|
|
647
|
-
if (j > _fields.length)
|
|
648
|
-
addError(
|
|
649
|
-
'FieldMismatch',
|
|
650
|
-
'TooManyFields',
|
|
651
|
-
'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j,
|
|
652
|
-
_rowCounter + i
|
|
653
|
-
);
|
|
654
|
-
else if (j < _fields.length)
|
|
655
|
-
addError(
|
|
656
|
-
'FieldMismatch',
|
|
657
|
-
'TooFewFields',
|
|
658
|
-
'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j,
|
|
659
|
-
_rowCounter + i
|
|
660
|
-
);
|
|
387
|
+
if (j > _fields.length) addError('FieldMismatch', 'TooManyFields', 'Too many fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);else if (j < _fields.length) addError('FieldMismatch', 'TooFewFields', 'Too few fields: expected ' + _fields.length + ' fields but parsed ' + j, _rowCounter + i);
|
|
661
388
|
}
|
|
662
|
-
|
|
663
389
|
return row;
|
|
664
390
|
}
|
|
665
|
-
|
|
666
391
|
var incrementBy = 1;
|
|
667
392
|
if (!_results.data[0] || Array.isArray(_results.data[0])) {
|
|
668
393
|
_results.data = _results.data.map(processRow);
|
|
669
394
|
incrementBy = _results.data.length;
|
|
670
395
|
} else _results.data = processRow(_results.data, 0);
|
|
671
|
-
|
|
672
396
|
if (_config.header && _results.meta) _results.meta.fields = _fields;
|
|
673
|
-
|
|
674
397
|
_rowCounter += incrementBy;
|
|
675
398
|
return _results;
|
|
676
399
|
}
|
|
677
|
-
|
|
678
400
|
function guessDelimiter(input, newline, skipEmptyLines, comments, delimitersToGuess) {
|
|
679
401
|
var bestDelim, bestDelta, fieldCountPrevRow;
|
|
680
|
-
|
|
681
402
|
delimitersToGuess = delimitersToGuess || [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP];
|
|
682
|
-
|
|
683
403
|
for (var i = 0; i < delimitersToGuess.length; i++) {
|
|
684
404
|
var delim = delimitersToGuess[i];
|
|
685
405
|
var delta = 0,
|
|
686
406
|
avgFieldCount = 0,
|
|
687
407
|
emptyLinesCount = 0;
|
|
688
408
|
fieldCountPrevRow = undefined;
|
|
689
|
-
|
|
690
409
|
var preview = new Parser({
|
|
691
410
|
comments: comments,
|
|
692
411
|
delimiter: delim,
|
|
693
412
|
newline: newline,
|
|
694
413
|
preview: 10
|
|
695
414
|
}).parse(input);
|
|
696
|
-
|
|
697
415
|
for (var j = 0; j < preview.data.length; j++) {
|
|
698
416
|
if (skipEmptyLines && testEmptyLine(preview.data[j])) {
|
|
699
417
|
emptyLinesCount++;
|
|
@@ -701,7 +419,6 @@ function ParserHandle(_config) {
|
|
|
701
419
|
}
|
|
702
420
|
var fieldCount = preview.data[j].length;
|
|
703
421
|
avgFieldCount += fieldCount;
|
|
704
|
-
|
|
705
422
|
if (typeof fieldCountPrevRow === 'undefined') {
|
|
706
423
|
fieldCountPrevRow = 0;
|
|
707
424
|
continue;
|
|
@@ -710,45 +427,32 @@ function ParserHandle(_config) {
|
|
|
710
427
|
fieldCountPrevRow = fieldCount;
|
|
711
428
|
}
|
|
712
429
|
}
|
|
713
|
-
|
|
714
430
|
if (preview.data.length > 0) avgFieldCount /= preview.data.length - emptyLinesCount;
|
|
715
|
-
|
|
716
431
|
if ((typeof bestDelta === 'undefined' || delta > bestDelta) && avgFieldCount > 1.99) {
|
|
717
432
|
bestDelta = delta;
|
|
718
433
|
bestDelim = delim;
|
|
719
434
|
}
|
|
720
435
|
}
|
|
721
|
-
|
|
722
436
|
_config.delimiter = bestDelim;
|
|
723
|
-
|
|
724
437
|
return {
|
|
725
438
|
successful: !!bestDelim,
|
|
726
439
|
bestDelimiter: bestDelim
|
|
727
440
|
};
|
|
728
441
|
}
|
|
729
|
-
|
|
730
442
|
function guessLineEndings(input, quoteChar) {
|
|
731
|
-
input = input.substr(0, 1024 * 1024);
|
|
732
|
-
// Replace all the text inside quotes
|
|
443
|
+
input = input.substr(0, 1024 * 1024);
|
|
733
444
|
var re = new RegExp(escapeRegExp(quoteChar) + '([^]*?)' + escapeRegExp(quoteChar), 'gm');
|
|
734
445
|
input = input.replace(re, '');
|
|
735
|
-
|
|
736
446
|
var r = input.split('\r');
|
|
737
|
-
|
|
738
447
|
var n = input.split('\n');
|
|
739
|
-
|
|
740
448
|
var nAppearsFirst = n.length > 1 && n[0].length < r[0].length;
|
|
741
|
-
|
|
742
449
|
if (r.length === 1 || nAppearsFirst) return '\n';
|
|
743
|
-
|
|
744
450
|
var numWithN = 0;
|
|
745
451
|
for (var i = 0; i < r.length; i++) {
|
|
746
452
|
if (r[i][0] === '\n') numWithN++;
|
|
747
453
|
}
|
|
748
|
-
|
|
749
454
|
return numWithN >= r.length / 2 ? '\r\n' : '\r';
|
|
750
455
|
}
|
|
751
|
-
|
|
752
456
|
function addError(type, code, msg, row) {
|
|
753
457
|
_results.errors.push({
|
|
754
458
|
type: type,
|
|
@@ -758,15 +462,10 @@ function ParserHandle(_config) {
|
|
|
758
462
|
});
|
|
759
463
|
}
|
|
760
464
|
}
|
|
761
|
-
|
|
762
|
-
/** https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions */
|
|
763
465
|
function escapeRegExp(string) {
|
|
764
|
-
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
466
|
+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
765
467
|
}
|
|
766
|
-
|
|
767
|
-
/** The core parser implements speedy and correct CSV parsing */
|
|
768
468
|
function Parser(config) {
|
|
769
|
-
// Unpack the config object
|
|
770
469
|
config = config || {};
|
|
771
470
|
var delim = config.delimiter;
|
|
772
471
|
var newline = config.newline;
|
|
@@ -775,7 +474,6 @@ function Parser(config) {
|
|
|
775
474
|
var preview = config.preview;
|
|
776
475
|
var fastMode = config.fastMode;
|
|
777
476
|
var quoteChar;
|
|
778
|
-
/** Allows for no quoteChar by setting quoteChar to undefined in config */
|
|
779
477
|
if (config.quoteChar === undefined) {
|
|
780
478
|
quoteChar = '"';
|
|
781
479
|
} else {
|
|
@@ -785,51 +483,30 @@ function Parser(config) {
|
|
|
785
483
|
if (config.escapeChar !== undefined) {
|
|
786
484
|
escapeChar = config.escapeChar;
|
|
787
485
|
}
|
|
788
|
-
|
|
789
|
-
// Delimiter must be valid
|
|
790
486
|
if (typeof delim !== 'string' || Papa.BAD_DELIMITERS.indexOf(delim) > -1) delim = ',';
|
|
791
|
-
|
|
792
|
-
// Comment character must be valid
|
|
793
|
-
if (comments === delim) throw new Error('Comment character same as delimiter');
|
|
794
|
-
else if (comments === true) comments = '#';
|
|
795
|
-
else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1)
|
|
796
|
-
comments = false;
|
|
797
|
-
|
|
798
|
-
// Newline must be valid: \r, \n, or \r\n
|
|
487
|
+
if (comments === delim) throw new Error('Comment character same as delimiter');else if (comments === true) comments = '#';else if (typeof comments !== 'string' || Papa.BAD_DELIMITERS.indexOf(comments) > -1) comments = false;
|
|
799
488
|
if (newline !== '\n' && newline !== '\r' && newline !== '\r\n') newline = '\n';
|
|
800
|
-
|
|
801
|
-
// We're gonna need these at the Parser scope
|
|
802
489
|
var cursor = 0;
|
|
803
490
|
var aborted = false;
|
|
804
|
-
|
|
805
|
-
this.parse = function(input, baseIndex, ignoreLastRow) {
|
|
806
|
-
// For some reason, in Chrome, this speeds things up (!?)
|
|
491
|
+
this.parse = function (input, baseIndex, ignoreLastRow) {
|
|
807
492
|
if (typeof input !== 'string') throw new Error('Input must be a string');
|
|
808
|
-
|
|
809
|
-
// We don't need to compute some of these every time parse() is called,
|
|
810
|
-
// but having them in a more local scope seems to perform better
|
|
811
493
|
var inputLen = input.length,
|
|
812
494
|
delimLen = delim.length,
|
|
813
495
|
newlineLen = newline.length,
|
|
814
496
|
commentsLen = comments.length;
|
|
815
497
|
var stepIsFunction = isFunction(step);
|
|
816
|
-
|
|
817
|
-
// Establish starting state
|
|
818
498
|
cursor = 0;
|
|
819
499
|
var data = [],
|
|
820
500
|
errors = [],
|
|
821
501
|
row = [],
|
|
822
502
|
lastCursor = 0;
|
|
823
|
-
|
|
824
503
|
if (!input) return returnable();
|
|
825
|
-
|
|
826
|
-
if (fastMode || (fastMode !== false && input.indexOf(quoteChar) === -1)) {
|
|
504
|
+
if (fastMode || fastMode !== false && input.indexOf(quoteChar) === -1) {
|
|
827
505
|
var rows = input.split(newline);
|
|
828
506
|
for (var i = 0; i < rows.length; i++) {
|
|
829
507
|
row = rows[i];
|
|
830
508
|
cursor += row.length;
|
|
831
|
-
if (i !== rows.length - 1) cursor += newline.length;
|
|
832
|
-
else if (ignoreLastRow) return returnable();
|
|
509
|
+
if (i !== rows.length - 1) cursor += newline.length;else if (ignoreLastRow) return returnable();
|
|
833
510
|
if (comments && row.substr(0, commentsLen) === comments) continue;
|
|
834
511
|
if (stepIsFunction) {
|
|
835
512
|
data = [];
|
|
@@ -844,174 +521,112 @@ function Parser(config) {
|
|
|
844
521
|
}
|
|
845
522
|
return returnable();
|
|
846
523
|
}
|
|
847
|
-
|
|
848
524
|
var nextDelim = input.indexOf(delim, cursor);
|
|
849
525
|
var nextNewline = input.indexOf(newline, cursor);
|
|
850
526
|
var quoteCharRegex = new RegExp(escapeRegExp(escapeChar) + escapeRegExp(quoteChar), 'g');
|
|
851
527
|
var quoteSearch;
|
|
852
|
-
|
|
853
|
-
// Parser loop
|
|
854
528
|
for (;;) {
|
|
855
|
-
// Field has opening quote
|
|
856
529
|
if (input[cursor] === quoteChar) {
|
|
857
|
-
// Start our search for the closing quote where the cursor is
|
|
858
530
|
quoteSearch = cursor;
|
|
859
|
-
|
|
860
|
-
// Skip the opening quote
|
|
861
531
|
cursor++;
|
|
862
|
-
|
|
863
532
|
for (;;) {
|
|
864
|
-
// Find closing quote
|
|
865
533
|
quoteSearch = input.indexOf(quoteChar, quoteSearch + 1);
|
|
866
|
-
|
|
867
|
-
//No other quotes are found - no other delimiters
|
|
868
534
|
if (quoteSearch === -1) {
|
|
869
535
|
if (!ignoreLastRow) {
|
|
870
|
-
// No closing quote... what a pity
|
|
871
536
|
errors.push({
|
|
872
537
|
type: 'Quotes',
|
|
873
538
|
code: 'MissingQuotes',
|
|
874
539
|
message: 'Quoted field unterminated',
|
|
875
|
-
row: data.length,
|
|
540
|
+
row: data.length,
|
|
876
541
|
index: cursor
|
|
877
542
|
});
|
|
878
543
|
}
|
|
879
544
|
return finish();
|
|
880
545
|
}
|
|
881
|
-
|
|
882
|
-
// Closing quote at EOF
|
|
883
546
|
if (quoteSearch === inputLen - 1) {
|
|
884
547
|
var value = input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar);
|
|
885
548
|
return finish(value);
|
|
886
549
|
}
|
|
887
|
-
|
|
888
|
-
// If this quote is escaped, it's part of the data; skip it
|
|
889
|
-
// If the quote character is the escape character, then check if the next character is the escape character
|
|
890
550
|
if (quoteChar === escapeChar && input[quoteSearch + 1] === escapeChar) {
|
|
891
551
|
quoteSearch++;
|
|
892
552
|
continue;
|
|
893
553
|
}
|
|
894
|
-
|
|
895
|
-
// If the quote character is not the escape character, then check if the previous character was the escape character
|
|
896
|
-
if (
|
|
897
|
-
quoteChar !== escapeChar &&
|
|
898
|
-
quoteSearch !== 0 &&
|
|
899
|
-
input[quoteSearch - 1] === escapeChar
|
|
900
|
-
) {
|
|
554
|
+
if (quoteChar !== escapeChar && quoteSearch !== 0 && input[quoteSearch - 1] === escapeChar) {
|
|
901
555
|
continue;
|
|
902
556
|
}
|
|
903
|
-
|
|
904
|
-
// Check up to nextDelim or nextNewline, whichever is closest
|
|
905
557
|
var checkUpTo = nextNewline === -1 ? nextDelim : Math.min(nextDelim, nextNewline);
|
|
906
558
|
var spacesBetweenQuoteAndDelimiter = extraSpaces(checkUpTo);
|
|
907
|
-
|
|
908
|
-
// Closing quote followed by delimiter or 'unnecessary spaces + delimiter'
|
|
909
559
|
if (input[quoteSearch + 1 + spacesBetweenQuoteAndDelimiter] === delim) {
|
|
910
560
|
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
|
|
911
561
|
cursor = quoteSearch + 1 + spacesBetweenQuoteAndDelimiter + delimLen;
|
|
912
562
|
nextDelim = input.indexOf(delim, cursor);
|
|
913
563
|
nextNewline = input.indexOf(newline, cursor);
|
|
914
|
-
|
|
915
564
|
if (stepIsFunction) {
|
|
916
565
|
doStep();
|
|
917
566
|
if (aborted) return returnable();
|
|
918
567
|
}
|
|
919
|
-
|
|
920
568
|
if (preview && data.length >= preview) return returnable(true);
|
|
921
|
-
|
|
922
569
|
break;
|
|
923
570
|
}
|
|
924
|
-
|
|
925
571
|
var spacesBetweenQuoteAndNewLine = extraSpaces(nextNewline);
|
|
926
|
-
|
|
927
|
-
// Closing quote followed by newline or 'unnecessary spaces + newLine'
|
|
928
|
-
if (
|
|
929
|
-
input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline
|
|
930
|
-
) {
|
|
572
|
+
if (input.substr(quoteSearch + 1 + spacesBetweenQuoteAndNewLine, newlineLen) === newline) {
|
|
931
573
|
row.push(input.substring(cursor, quoteSearch).replace(quoteCharRegex, quoteChar));
|
|
932
574
|
saveRow(quoteSearch + 1 + spacesBetweenQuoteAndNewLine + newlineLen);
|
|
933
|
-
nextDelim = input.indexOf(delim, cursor);
|
|
934
|
-
|
|
575
|
+
nextDelim = input.indexOf(delim, cursor);
|
|
935
576
|
if (stepIsFunction) {
|
|
936
577
|
doStep();
|
|
937
578
|
if (aborted) return returnable();
|
|
938
579
|
}
|
|
939
|
-
|
|
940
580
|
if (preview && data.length >= preview) return returnable(true);
|
|
941
|
-
|
|
942
581
|
break;
|
|
943
582
|
}
|
|
944
|
-
|
|
945
|
-
// Checks for valid closing quotes are complete (escaped quotes or quote followed by EOF/delimiter/newline) -- assume these quotes are part of an invalid text string
|
|
946
583
|
errors.push({
|
|
947
584
|
type: 'Quotes',
|
|
948
585
|
code: 'InvalidQuotes',
|
|
949
586
|
message: 'Trailing quote on quoted field is malformed',
|
|
950
|
-
row: data.length,
|
|
587
|
+
row: data.length,
|
|
951
588
|
index: cursor
|
|
952
589
|
});
|
|
953
|
-
|
|
954
590
|
quoteSearch++;
|
|
955
591
|
continue;
|
|
956
592
|
}
|
|
957
|
-
|
|
958
593
|
if (stepIsFunction) {
|
|
959
594
|
doStep();
|
|
960
595
|
if (aborted) return returnable();
|
|
961
596
|
}
|
|
962
|
-
|
|
963
597
|
if (preview && data.length >= preview) return returnable(true);
|
|
964
598
|
continue;
|
|
965
599
|
}
|
|
966
|
-
|
|
967
|
-
// Comment found at start of new line
|
|
968
600
|
if (comments && row.length === 0 && input.substr(cursor, commentsLen) === comments) {
|
|
969
|
-
if (nextNewline === -1)
|
|
970
|
-
// Comment ends at EOF
|
|
971
|
-
return returnable();
|
|
601
|
+
if (nextNewline === -1) return returnable();
|
|
972
602
|
cursor = nextNewline + newlineLen;
|
|
973
603
|
nextNewline = input.indexOf(newline, cursor);
|
|
974
604
|
nextDelim = input.indexOf(delim, cursor);
|
|
975
605
|
continue;
|
|
976
606
|
}
|
|
977
|
-
|
|
978
|
-
// Next delimiter comes before next newline, so we've reached end of field
|
|
979
607
|
if (nextDelim !== -1 && (nextDelim < nextNewline || nextNewline === -1)) {
|
|
980
608
|
row.push(input.substring(cursor, nextDelim));
|
|
981
609
|
cursor = nextDelim + delimLen;
|
|
982
610
|
nextDelim = input.indexOf(delim, cursor);
|
|
983
611
|
continue;
|
|
984
612
|
}
|
|
985
|
-
|
|
986
|
-
// End of row
|
|
987
613
|
if (nextNewline !== -1) {
|
|
988
614
|
row.push(input.substring(cursor, nextNewline));
|
|
989
615
|
saveRow(nextNewline + newlineLen);
|
|
990
|
-
|
|
991
616
|
if (stepIsFunction) {
|
|
992
617
|
doStep();
|
|
993
618
|
if (aborted) return returnable();
|
|
994
619
|
}
|
|
995
|
-
|
|
996
620
|
if (preview && data.length >= preview) return returnable(true);
|
|
997
|
-
|
|
998
621
|
continue;
|
|
999
622
|
}
|
|
1000
|
-
|
|
1001
623
|
break;
|
|
1002
624
|
}
|
|
1003
|
-
|
|
1004
625
|
return finish();
|
|
1005
|
-
|
|
1006
626
|
function pushRow(row) {
|
|
1007
627
|
data.push(row);
|
|
1008
628
|
lastCursor = cursor;
|
|
1009
629
|
}
|
|
1010
|
-
|
|
1011
|
-
/**
|
|
1012
|
-
* checks if there are extra spaces after closing quote and given index without any text
|
|
1013
|
-
* if Yes, returns the number of spaces
|
|
1014
|
-
*/
|
|
1015
630
|
function extraSpaces(index) {
|
|
1016
631
|
var spaceLength = 0;
|
|
1017
632
|
if (index !== -1) {
|
|
@@ -1022,35 +637,21 @@ function Parser(config) {
|
|
|
1022
637
|
}
|
|
1023
638
|
return spaceLength;
|
|
1024
639
|
}
|
|
1025
|
-
|
|
1026
|
-
/**
|
|
1027
|
-
* Appends the remaining input from cursor to the end into
|
|
1028
|
-
* row, saves the row, calls step, and returns the results.
|
|
1029
|
-
*/
|
|
1030
640
|
function finish(value) {
|
|
1031
641
|
if (ignoreLastRow) return returnable();
|
|
1032
642
|
if (typeof value === 'undefined') value = input.substr(cursor);
|
|
1033
643
|
row.push(value);
|
|
1034
|
-
cursor = inputLen;
|
|
644
|
+
cursor = inputLen;
|
|
1035
645
|
pushRow(row);
|
|
1036
646
|
if (stepIsFunction) doStep();
|
|
1037
647
|
return returnable();
|
|
1038
648
|
}
|
|
1039
|
-
|
|
1040
|
-
/**
|
|
1041
|
-
* Appends the current row to the results. It sets the cursor
|
|
1042
|
-
* to newCursor and finds the nextNewline. The caller should
|
|
1043
|
-
* take care to execute user's step function and check for
|
|
1044
|
-
* preview and end parsing if necessary.
|
|
1045
|
-
*/
|
|
1046
649
|
function saveRow(newCursor) {
|
|
1047
650
|
cursor = newCursor;
|
|
1048
651
|
pushRow(row);
|
|
1049
652
|
row = [];
|
|
1050
653
|
nextNewline = input.indexOf(newline, cursor);
|
|
1051
654
|
}
|
|
1052
|
-
|
|
1053
|
-
/** Returns an object with the results, errors, and meta. */
|
|
1054
655
|
function returnable(stopped, step) {
|
|
1055
656
|
var isStep = step || false;
|
|
1056
657
|
return {
|
|
@@ -1065,38 +666,29 @@ function Parser(config) {
|
|
|
1065
666
|
}
|
|
1066
667
|
};
|
|
1067
668
|
}
|
|
1068
|
-
|
|
1069
|
-
/** Executes the user's step function and resets data & errors. */
|
|
1070
669
|
function doStep() {
|
|
1071
670
|
step(returnable(undefined, true));
|
|
1072
671
|
data = [];
|
|
1073
672
|
errors = [];
|
|
1074
673
|
}
|
|
1075
674
|
};
|
|
1076
|
-
|
|
1077
|
-
/** Sets the abort flag */
|
|
1078
|
-
this.abort = function() {
|
|
675
|
+
this.abort = function () {
|
|
1079
676
|
aborted = true;
|
|
1080
677
|
};
|
|
1081
|
-
|
|
1082
|
-
/** Gets the cursor position */
|
|
1083
|
-
this.getCharIndex = function() {
|
|
678
|
+
this.getCharIndex = function () {
|
|
1084
679
|
return cursor;
|
|
1085
680
|
};
|
|
1086
681
|
}
|
|
1087
|
-
|
|
1088
682
|
function notImplemented() {
|
|
1089
683
|
throw new Error('Not implemented.');
|
|
1090
684
|
}
|
|
1091
|
-
|
|
1092
|
-
/** Makes a deep copy of an array or object (mostly) */
|
|
1093
685
|
function copy(obj) {
|
|
1094
686
|
if (typeof obj !== 'object' || obj === null) return obj;
|
|
1095
687
|
var cpy = Array.isArray(obj) ? [] : {};
|
|
1096
688
|
for (var key in obj) cpy[key] = copy(obj[key]);
|
|
1097
689
|
return cpy;
|
|
1098
690
|
}
|
|
1099
|
-
|
|
1100
691
|
function isFunction(func) {
|
|
1101
692
|
return typeof func === 'function';
|
|
1102
693
|
}
|
|
694
|
+
//# sourceMappingURL=papaparse.js.map
|