@loaders.gl/csv 4.0.0-alpha.1 → 4.0.0-alpha.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bundle.d.ts +2 -0
- package/dist/bundle.d.ts.map +1 -0
- package/dist/bundle.js +2 -2
- package/dist/csv-loader.d.ts +20 -0
- package/dist/csv-loader.d.ts.map +1 -0
- package/dist/csv-loader.js +248 -220
- package/dist/csv-writer.d.ts +6 -0
- package/dist/csv-writer.d.ts.map +1 -0
- package/dist/csv-writer.js +23 -2
- package/dist/dist.min.js +1827 -0
- package/dist/es5/bundle.js +6 -0
- package/dist/es5/bundle.js.map +1 -0
- package/dist/es5/csv-loader.js +246 -0
- package/dist/es5/csv-loader.js.map +1 -0
- package/dist/es5/csv-writer.js +48 -0
- package/dist/es5/csv-writer.js.map +1 -0
- package/dist/es5/index.js +20 -0
- package/dist/es5/index.js.map +1 -0
- package/dist/es5/lib/encoders/encode-csv.js +60 -0
- package/dist/es5/lib/encoders/encode-csv.js.map +1 -0
- package/dist/es5/papaparse/async-iterator-streamer.js +100 -0
- package/dist/es5/papaparse/async-iterator-streamer.js.map +1 -0
- package/dist/es5/papaparse/papaparse.js +703 -0
- package/dist/es5/papaparse/papaparse.js.map +1 -0
- package/dist/esm/bundle.js +4 -0
- package/dist/esm/bundle.js.map +1 -0
- package/dist/esm/csv-loader.js +205 -0
- package/dist/esm/csv-loader.js.map +1 -0
- package/dist/esm/csv-writer.js +20 -0
- package/dist/esm/csv-writer.js.map +1 -0
- package/dist/esm/index.js +3 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/lib/encoders/encode-csv.js +40 -0
- package/dist/esm/lib/encoders/encode-csv.js.map +1 -0
- package/dist/{lib → esm/papaparse}/async-iterator-streamer.js +1 -6
- package/dist/esm/papaparse/async-iterator-streamer.js.map +1 -0
- package/{src/libs → dist/esm/papaparse}/papaparse.js +96 -504
- package/dist/esm/papaparse/papaparse.js.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +8 -2
- package/dist/lib/encoders/encode-csv.d.ts +13 -0
- package/dist/lib/encoders/encode-csv.d.ts.map +1 -0
- package/dist/lib/encoders/encode-csv.js +50 -0
- package/dist/papaparse/async-iterator-streamer.d.ts +6 -0
- package/dist/papaparse/async-iterator-streamer.d.ts.map +1 -0
- package/dist/papaparse/async-iterator-streamer.js +60 -32
- package/dist/papaparse/papaparse.d.ts +30 -0
- package/dist/papaparse/papaparse.d.ts.map +1 -0
- package/dist/papaparse/papaparse.js +935 -0
- package/package.json +8 -8
- package/src/csv-loader.ts +21 -15
- package/src/csv-writer.ts +29 -5
- package/src/index.ts +5 -0
- package/src/lib/encoders/encode-csv.ts +66 -0
- package/src/{lib → papaparse}/async-iterator-streamer.ts +2 -2
- package/{dist/libs/papaparse.js → src/papaparse/papaparse.ts} +48 -73
- package/dist/bundle.js.map +0 -1
- package/dist/csv-loader.js.map +0 -1
- package/dist/csv-writer.js.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/lib/async-iterator-streamer.js.map +0 -1
- package/dist/papaparse/async-iterator-streamer.js.map +0 -1
- package/src/papaparse/async-iterator-streamer.js +0 -71
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@loaders.gl/csv",
|
|
3
|
-
"version": "4.0.0-alpha.
|
|
3
|
+
"version": "4.0.0-alpha.11",
|
|
4
4
|
"description": "Framework-independent loader for CSV and DSV table formats",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"publishConfig": {
|
|
@@ -16,9 +16,9 @@
|
|
|
16
16
|
"table",
|
|
17
17
|
"CSV"
|
|
18
18
|
],
|
|
19
|
-
"types": "
|
|
20
|
-
"main": "dist/index.js",
|
|
21
|
-
"module": "dist/index.js",
|
|
19
|
+
"types": "dist/index.d.ts",
|
|
20
|
+
"main": "dist/es5/index.js",
|
|
21
|
+
"module": "dist/esm/index.js",
|
|
22
22
|
"sideEffects": false,
|
|
23
23
|
"files": [
|
|
24
24
|
"src",
|
|
@@ -27,14 +27,14 @@
|
|
|
27
27
|
],
|
|
28
28
|
"scripts": {
|
|
29
29
|
"pre-build": "npm run build-bundle",
|
|
30
|
-
"build-bundle": "esbuild src/bundle.ts --bundle --outfile=dist/
|
|
30
|
+
"build-bundle": "esbuild src/bundle.ts --bundle --outfile=dist/dist.min.js"
|
|
31
31
|
},
|
|
32
32
|
"dependencies": {
|
|
33
|
-
"@loaders.gl/loader-utils": "4.0.0-alpha.
|
|
34
|
-
"@loaders.gl/schema": "4.0.0-alpha.
|
|
33
|
+
"@loaders.gl/loader-utils": "4.0.0-alpha.11",
|
|
34
|
+
"@loaders.gl/schema": "4.0.0-alpha.11"
|
|
35
35
|
},
|
|
36
36
|
"devDependencies": {
|
|
37
37
|
"d3-dsv": "^1.2.0"
|
|
38
38
|
},
|
|
39
|
-
"gitHead": "
|
|
39
|
+
"gitHead": "bc680098cfea790c67b7fb95bab96e8d9288d34f"
|
|
40
40
|
}
|
package/src/csv-loader.ts
CHANGED
|
@@ -1,15 +1,20 @@
|
|
|
1
|
+
// loaders.gl, MIT license
|
|
2
|
+
|
|
1
3
|
import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
2
|
-
import type {Batch} from '@loaders.gl/schema';
|
|
3
|
-
type Schema = any;
|
|
4
|
+
import type {Batch, TableBatch} from '@loaders.gl/schema';
|
|
4
5
|
|
|
5
6
|
import {
|
|
6
7
|
AsyncQueue,
|
|
8
|
+
Table,
|
|
7
9
|
TableBatchBuilder,
|
|
8
10
|
convertToArrayRow,
|
|
9
11
|
convertToObjectRow
|
|
10
12
|
} from '@loaders.gl/schema';
|
|
11
|
-
import Papa from './
|
|
12
|
-
import AsyncIteratorStreamer from './
|
|
13
|
+
import Papa from './papaparse/papaparse';
|
|
14
|
+
import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
|
|
15
|
+
|
|
16
|
+
type ObjectField = {name: string; index: number; type: any};
|
|
17
|
+
type ObjectSchema = {[key: string]: ObjectField} | ObjectField[];
|
|
13
18
|
|
|
14
19
|
// __VERSION__ is injected by babel-plugin-version-inline
|
|
15
20
|
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
|
|
@@ -59,7 +64,7 @@ const DEFAULT_CSV_LOADER_OPTIONS = {
|
|
|
59
64
|
}
|
|
60
65
|
};
|
|
61
66
|
|
|
62
|
-
export const CSVLoader = {
|
|
67
|
+
export const CSVLoader: LoaderWithParser<Table, TableBatch, CSVLoaderOptions> = {
|
|
63
68
|
id: 'csv',
|
|
64
69
|
module: 'csv',
|
|
65
70
|
name: 'CSV',
|
|
@@ -147,7 +152,7 @@ function parseCSVInBatches(
|
|
|
147
152
|
let isFirstRow: boolean = true;
|
|
148
153
|
let headerRow: string[] | null = null;
|
|
149
154
|
let tableBatchBuilder: TableBatchBuilder | null = null;
|
|
150
|
-
let schema:
|
|
155
|
+
let schema: ObjectSchema | null = null;
|
|
151
156
|
|
|
152
157
|
const config = {
|
|
153
158
|
// dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
|
|
@@ -207,11 +212,14 @@ function parseCSVInBatches(
|
|
|
207
212
|
// Add the row
|
|
208
213
|
tableBatchBuilder =
|
|
209
214
|
tableBatchBuilder ||
|
|
210
|
-
new TableBatchBuilder(
|
|
211
|
-
// @ts-expect-error
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
+
new TableBatchBuilder(
|
|
216
|
+
// @ts-expect-error TODO this is not a proper schema
|
|
217
|
+
schema,
|
|
218
|
+
{
|
|
219
|
+
shape: csvOptions.shape || 'array-row-table',
|
|
220
|
+
...options
|
|
221
|
+
}
|
|
222
|
+
);
|
|
215
223
|
|
|
216
224
|
try {
|
|
217
225
|
tableBatchBuilder.addRow(row);
|
|
@@ -306,8 +314,8 @@ function generateHeader(columnPrefix: string, count: number = 0): string[] {
|
|
|
306
314
|
return headers;
|
|
307
315
|
}
|
|
308
316
|
|
|
309
|
-
function deduceSchema(row, headerRow) {
|
|
310
|
-
const schema = headerRow ? {} : [];
|
|
317
|
+
function deduceSchema(row, headerRow): ObjectSchema {
|
|
318
|
+
const schema: ObjectSchema = headerRow ? {} : [];
|
|
311
319
|
for (let i = 0; i < row.length; i++) {
|
|
312
320
|
const columnName = (headerRow && headerRow[i]) || i;
|
|
313
321
|
const value = row[i];
|
|
@@ -326,5 +334,3 @@ function deduceSchema(row, headerRow) {
|
|
|
326
334
|
}
|
|
327
335
|
return schema;
|
|
328
336
|
}
|
|
329
|
-
|
|
330
|
-
export const _typecheckCSVLoader: LoaderWithParser = CSVLoader;
|
package/src/csv-writer.ts
CHANGED
|
@@ -1,6 +1,30 @@
|
|
|
1
|
-
|
|
1
|
+
// loaders.gl, MIT license
|
|
2
2
|
|
|
3
|
-
/*
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
/* global TextEncoder */
|
|
4
|
+
import type {Writer} from '@loaders.gl/loader-utils';
|
|
5
|
+
import type {Table, TableBatch} from '@loaders.gl/schema';
|
|
6
|
+
import type {CSVWriterOptions} from './lib/encoders/encode-csv';
|
|
7
|
+
import {encodeTableAsCSV} from './lib/encoders/encode-csv';
|
|
8
|
+
|
|
9
|
+
export type {CSVWriterOptions};
|
|
10
|
+
|
|
11
|
+
const DEFAULT_WRITER_OPTIONS: Required<CSVWriterOptions> = {
|
|
12
|
+
csv: {
|
|
13
|
+
useDisplayNames: false
|
|
14
|
+
},
|
|
15
|
+
useDisplayNames: false
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
export const CSVWriter: Writer<Table, TableBatch, CSVWriterOptions> = {
|
|
19
|
+
id: 'csv',
|
|
20
|
+
version: 'latest',
|
|
21
|
+
module: 'csv',
|
|
22
|
+
name: 'CSV',
|
|
23
|
+
extensions: ['csv'],
|
|
24
|
+
mimeTypes: ['text/csv'],
|
|
25
|
+
options: DEFAULT_WRITER_OPTIONS,
|
|
26
|
+
text: true,
|
|
27
|
+
encode: async (table, options) =>
|
|
28
|
+
new TextEncoder().encode(encodeTableAsCSV(table, options)).buffer,
|
|
29
|
+
encodeText: (table, options) => encodeTableAsCSV(table, options)
|
|
30
|
+
};
|
package/src/index.ts
CHANGED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
// loaders.gl, MIT license
|
|
2
|
+
// Copyright 2022 Foursquare Labs, Inc.
|
|
3
|
+
|
|
4
|
+
import {Table, makeArrayRowIterator, getTableNumCols} from '@loaders.gl/schema';
|
|
5
|
+
import {csvFormatRows} from 'd3-dsv';
|
|
6
|
+
|
|
7
|
+
type EncodableData = string | null;
|
|
8
|
+
|
|
9
|
+
export type CSVWriterOptions = {
|
|
10
|
+
csv?: {
|
|
11
|
+
useDisplayNames?: boolean;
|
|
12
|
+
};
|
|
13
|
+
/** @deprecated */
|
|
14
|
+
useDisplayNames?: boolean;
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Encode a Table object as CSV
|
|
19
|
+
*/
|
|
20
|
+
export function encodeTableAsCSV(
|
|
21
|
+
table: Table,
|
|
22
|
+
options: CSVWriterOptions = {csv: {useDisplayNames: true}}
|
|
23
|
+
): string {
|
|
24
|
+
const useDisplayNames = options.useDisplayNames || options.csv?.useDisplayNames;
|
|
25
|
+
|
|
26
|
+
const fields = table.schema?.fields || [];
|
|
27
|
+
|
|
28
|
+
const columnNames = fields.map((f) => {
|
|
29
|
+
// This is a leaky abstraction, assuming Kepler metadata
|
|
30
|
+
const displayName = f.metadata?.displayName;
|
|
31
|
+
return useDisplayNames && typeof displayName === 'string' ? displayName : f.name;
|
|
32
|
+
});
|
|
33
|
+
const formattedData: EncodableData[][] = [columnNames];
|
|
34
|
+
|
|
35
|
+
for (const row of makeArrayRowIterator(table)) {
|
|
36
|
+
const formattedRow: EncodableData[] = [];
|
|
37
|
+
for (let columnIndex = 0; columnIndex < getTableNumCols(table); ++columnIndex) {
|
|
38
|
+
const value = row[columnIndex];
|
|
39
|
+
formattedRow[columnIndex] = preformatFieldValue(value);
|
|
40
|
+
}
|
|
41
|
+
formattedData.push(formattedRow);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return csvFormatRows(formattedData);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Stringifies a value
|
|
49
|
+
* @todo Why is it called parse?
|
|
50
|
+
*/
|
|
51
|
+
const preformatFieldValue = (value: unknown): EncodableData => {
|
|
52
|
+
if (value === null || value === undefined) {
|
|
53
|
+
// TODO: It would be nice to distinguish between missing values and the empty string
|
|
54
|
+
// https://github.com/d3/d3-dsv/issues/84
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
if (value instanceof Date) {
|
|
58
|
+
// d3-dsv formats dates without timezones if they don't have time info;
|
|
59
|
+
// this forces them to always use fully-qualified ISO time strings
|
|
60
|
+
return value.toISOString();
|
|
61
|
+
}
|
|
62
|
+
if (typeof value === 'object') {
|
|
63
|
+
return JSON.stringify(value);
|
|
64
|
+
}
|
|
65
|
+
return String(value);
|
|
66
|
+
};
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
1
2
|
// A custom papaparse `Streamer` for async iterators
|
|
2
3
|
// Ideally this can be contributed back to papaparse
|
|
3
4
|
// Or papaparse can expose Streamer API so we can extend without forking.
|
|
4
5
|
|
|
5
|
-
// @ts-nocheck
|
|
6
6
|
/* eslint-disable no-invalid-this */
|
|
7
7
|
|
|
8
8
|
// Note: papaparse is not an ES6 module
|
|
9
|
-
import Papa from '
|
|
9
|
+
import Papa from './papaparse';
|
|
10
10
|
const {ChunkStreamer} = Papa;
|
|
11
11
|
|
|
12
12
|
export default function AsyncIteratorStreamer(config) {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
// @ts-nocheck
|
|
1
2
|
// This is a fork of papaparse
|
|
2
3
|
// https://github.com/mholt/PapaParse
|
|
3
4
|
/* @license
|
|
@@ -14,52 +15,34 @@ License: MIT
|
|
|
14
15
|
// - Remove unused jQuery plugin support
|
|
15
16
|
|
|
16
17
|
/* eslint-disable */
|
|
17
|
-
|
|
18
|
-
var global = (function() {
|
|
19
|
-
// alternative method, similar to `Function('return this')()`
|
|
20
|
-
// but without using `eval` (which is disabled when
|
|
21
|
-
// using Content Security Policy).
|
|
22
|
-
|
|
23
|
-
if (typeof self !== 'undefined') {
|
|
24
|
-
return self;
|
|
25
|
-
}
|
|
26
|
-
if (typeof window !== 'undefined') {
|
|
27
|
-
return window;
|
|
28
|
-
}
|
|
29
|
-
if (typeof global !== 'undefined') {
|
|
30
|
-
return global;
|
|
31
|
-
}
|
|
18
|
+
const BYTE_ORDER_MARK = '\ufeff';
|
|
32
19
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
20
|
+
const Papa = {
|
|
21
|
+
parse: CsvToJson,
|
|
22
|
+
unparse: JsonToCsv,
|
|
36
23
|
|
|
37
|
-
|
|
24
|
+
RECORD_SEP: String.fromCharCode(30),
|
|
25
|
+
UNIT_SEP: String.fromCharCode(31),
|
|
26
|
+
BYTE_ORDER_MARK,
|
|
27
|
+
BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
|
|
28
|
+
WORKERS_SUPPORTED: false, // !IS_WORKER && !!globalThis.Worker
|
|
29
|
+
NODE_STREAM_INPUT: 1,
|
|
38
30
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
31
|
+
// Configurable chunk sizes for local and remote files, respectively
|
|
32
|
+
LocalChunkSize: 1024 * 1024 * 10, // 10 M,
|
|
33
|
+
RemoteChunkSize: 1024 * 1024 * 5, // 5 M,
|
|
34
|
+
DefaultDelimiter: ',', // Used if not specified and detection fail,
|
|
43
35
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
Papa.BAD_DELIMITERS = ['\r', '\n', '"', Papa.BYTE_ORDER_MARK];
|
|
48
|
-
Papa.WORKERS_SUPPORTED = false; // !IS_WORKER && !!global.Worker;
|
|
49
|
-
Papa.NODE_STREAM_INPUT = 1;
|
|
36
|
+
// Exposed for testing and development only
|
|
37
|
+
Parser: Parser,
|
|
38
|
+
ParserHandle: ParserHandle,
|
|
50
39
|
|
|
51
|
-
//
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
// Exposed for testing and development only
|
|
57
|
-
Papa.Parser = Parser;
|
|
58
|
-
Papa.ParserHandle = ParserHandle;
|
|
40
|
+
// BEGIN FORK
|
|
41
|
+
ChunkStreamer: ChunkStreamer,
|
|
42
|
+
StringStreamer: StringStreamer
|
|
43
|
+
};
|
|
44
|
+
export default Papa;
|
|
59
45
|
|
|
60
|
-
// BEGIN FORK
|
|
61
|
-
Papa.ChunkStreamer = ChunkStreamer;
|
|
62
|
-
Papa.StringStreamer = StringStreamer;
|
|
63
46
|
/*
|
|
64
47
|
Papa.NetworkStreamer = NetworkStreamer;
|
|
65
48
|
Papa.FileStreamer = FileStreamer;
|
|
@@ -76,7 +59,7 @@ if (typeof PAPA_BROWSER_CONTEXT === 'undefined') {
|
|
|
76
59
|
function CsvToJson(
|
|
77
60
|
_input,
|
|
78
61
|
_config,
|
|
79
|
-
UserDefinedStreamer // BEGIN FORK
|
|
62
|
+
UserDefinedStreamer? // BEGIN FORK
|
|
80
63
|
) {
|
|
81
64
|
_config = _config || {};
|
|
82
65
|
var dynamicTyping = _config.dynamicTyping || false;
|
|
@@ -129,7 +112,7 @@ function CsvToJson(
|
|
|
129
112
|
/*
|
|
130
113
|
else if (_input.readable === true && isFunction(_input.read) && isFunction(_input.on)) {
|
|
131
114
|
streamer = new ReadableStreamStreamer(_config);
|
|
132
|
-
} else if ((
|
|
115
|
+
} else if ((globalThis.File && _input instanceof File) || _input instanceof Object)
|
|
133
116
|
// ...Safari. (see issue #106)
|
|
134
117
|
streamer = new FileStreamer(_config);
|
|
135
118
|
*/
|
|
@@ -204,7 +187,7 @@ function JsonToCsv(_input, _config) {
|
|
|
204
187
|
|
|
205
188
|
if (
|
|
206
189
|
typeof _config.delimiter === 'string' &&
|
|
207
|
-
!Papa.BAD_DELIMITERS.filter(function(value) {
|
|
190
|
+
!Papa.BAD_DELIMITERS.filter(function (value) {
|
|
208
191
|
return _config.delimiter.indexOf(value) !== -1;
|
|
209
192
|
}).length
|
|
210
193
|
) {
|
|
@@ -339,7 +322,7 @@ function ChunkStreamer(config) {
|
|
|
339
322
|
};
|
|
340
323
|
replaceConfig.call(this, config);
|
|
341
324
|
|
|
342
|
-
this.parseChunk = function(chunk, isFakeChunk) {
|
|
325
|
+
this.parseChunk = function (chunk, isFakeChunk) {
|
|
343
326
|
// First chunk pre-processing
|
|
344
327
|
if (this.isFirstChunk && isFunction(this._config.beforeFirstChunk)) {
|
|
345
328
|
var modifiedChunk = this._config.beforeFirstChunk(chunk);
|
|
@@ -367,13 +350,7 @@ function ChunkStreamer(config) {
|
|
|
367
350
|
var finishedIncludingPreview =
|
|
368
351
|
this._finished || (this._config.preview && this._rowCount >= this._config.preview);
|
|
369
352
|
|
|
370
|
-
if (
|
|
371
|
-
global.postMessage({
|
|
372
|
-
results: results,
|
|
373
|
-
workerId: Papa.WORKER_ID,
|
|
374
|
-
finished: finishedIncludingPreview
|
|
375
|
-
});
|
|
376
|
-
} else if (isFunction(this._config.chunk) && !isFakeChunk) {
|
|
353
|
+
if (isFunction(this._config.chunk) && !isFakeChunk) {
|
|
377
354
|
this._config.chunk(results, this._handle);
|
|
378
355
|
if (this._handle.paused() || this._handle.aborted()) return;
|
|
379
356
|
results = undefined;
|
|
@@ -401,15 +378,8 @@ function ChunkStreamer(config) {
|
|
|
401
378
|
return results;
|
|
402
379
|
};
|
|
403
380
|
|
|
404
|
-
this._sendError = function(error) {
|
|
381
|
+
this._sendError = function (error) {
|
|
405
382
|
if (isFunction(this._config.error)) this._config.error(error);
|
|
406
|
-
else if (IS_PAPA_WORKER && this._config.error) {
|
|
407
|
-
global.postMessage({
|
|
408
|
-
workerId: Papa.WORKER_ID,
|
|
409
|
-
error: error,
|
|
410
|
-
finished: false
|
|
411
|
-
});
|
|
412
|
-
}
|
|
413
383
|
};
|
|
414
384
|
|
|
415
385
|
function replaceConfig(config) {
|
|
@@ -427,11 +397,11 @@ function StringStreamer(config) {
|
|
|
427
397
|
ChunkStreamer.call(this, config);
|
|
428
398
|
|
|
429
399
|
var remaining;
|
|
430
|
-
this.stream = function(s) {
|
|
400
|
+
this.stream = function (s) {
|
|
431
401
|
remaining = s;
|
|
432
402
|
return this._nextChunk();
|
|
433
403
|
};
|
|
434
|
-
this._nextChunk = function() {
|
|
404
|
+
this._nextChunk = function () {
|
|
435
405
|
if (this._finished) return;
|
|
436
406
|
var size = this._config.chunkSize;
|
|
437
407
|
var chunk = size ? remaining.substr(0, size) : remaining;
|
|
@@ -447,7 +417,8 @@ StringStreamer.prototype.constructor = StringStreamer;
|
|
|
447
417
|
function ParserHandle(_config) {
|
|
448
418
|
// One goal is to minimize the use of regular expressions...
|
|
449
419
|
var FLOAT = /^\s*-?(\d*\.?\d+|\d+\.?\d*)(e[-+]?\d+)?\s*$/i;
|
|
450
|
-
var ISO_DATE =
|
|
420
|
+
var ISO_DATE =
|
|
421
|
+
/(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d\.\d+([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))|(\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d([+-][0-2]\d:[0-5]\d|Z))/;
|
|
451
422
|
|
|
452
423
|
var self = this;
|
|
453
424
|
var _stepCounter = 0; // Number of times step was called (number of rows parsed)
|
|
@@ -467,7 +438,7 @@ function ParserHandle(_config) {
|
|
|
467
438
|
|
|
468
439
|
if (isFunction(_config.step)) {
|
|
469
440
|
var userStep = _config.step;
|
|
470
|
-
_config.step = function(results) {
|
|
441
|
+
_config.step = function (results) {
|
|
471
442
|
_results = results;
|
|
472
443
|
|
|
473
444
|
if (needsHeaderRow()) processResults();
|
|
@@ -490,7 +461,7 @@ function ParserHandle(_config) {
|
|
|
490
461
|
* and ignoreLastRow parameters. They are used by streamers (wrapper functions)
|
|
491
462
|
* when an input comes in multiple chunks, like from a file.
|
|
492
463
|
*/
|
|
493
|
-
this.parse = function(input, baseIndex, ignoreLastRow) {
|
|
464
|
+
this.parse = function (input, baseIndex, ignoreLastRow) {
|
|
494
465
|
var quoteChar = _config.quoteChar || '"';
|
|
495
466
|
if (!_config.newline) _config.newline = guessLineEndings(input, quoteChar);
|
|
496
467
|
|
|
@@ -524,26 +495,26 @@ function ParserHandle(_config) {
|
|
|
524
495
|
return _paused ? {meta: {paused: true}} : _results || {meta: {paused: false}};
|
|
525
496
|
};
|
|
526
497
|
|
|
527
|
-
this.paused = function() {
|
|
498
|
+
this.paused = function () {
|
|
528
499
|
return _paused;
|
|
529
500
|
};
|
|
530
501
|
|
|
531
|
-
this.pause = function() {
|
|
502
|
+
this.pause = function () {
|
|
532
503
|
_paused = true;
|
|
533
504
|
_parser.abort();
|
|
534
505
|
_input = _input.substr(_parser.getCharIndex());
|
|
535
506
|
};
|
|
536
507
|
|
|
537
|
-
this.resume = function() {
|
|
508
|
+
this.resume = function () {
|
|
538
509
|
_paused = false;
|
|
539
510
|
self.streamer.parseChunk(_input, true);
|
|
540
511
|
};
|
|
541
512
|
|
|
542
|
-
this.aborted = function() {
|
|
513
|
+
this.aborted = function () {
|
|
543
514
|
return _aborted;
|
|
544
515
|
};
|
|
545
516
|
|
|
546
|
-
this.abort = function() {
|
|
517
|
+
this.abort = function () {
|
|
547
518
|
_aborted = true;
|
|
548
519
|
_parser.abort();
|
|
549
520
|
_results.meta.aborted = true;
|
|
@@ -620,7 +591,11 @@ function ParserHandle(_config) {
|
|
|
620
591
|
}
|
|
621
592
|
|
|
622
593
|
function applyHeaderAndDynamicTypingAndTransformation() {
|
|
623
|
-
if (
|
|
594
|
+
if (
|
|
595
|
+
!_results ||
|
|
596
|
+
!_results.data ||
|
|
597
|
+
(!_config.header && !_config.dynamicTyping && !_config.transform)
|
|
598
|
+
)
|
|
624
599
|
return _results;
|
|
625
600
|
|
|
626
601
|
function processRow(rowSource, i) {
|
|
@@ -802,7 +777,7 @@ function Parser(config) {
|
|
|
802
777
|
var cursor = 0;
|
|
803
778
|
var aborted = false;
|
|
804
779
|
|
|
805
|
-
this.parse = function(input, baseIndex, ignoreLastRow) {
|
|
780
|
+
this.parse = function (input, baseIndex, ignoreLastRow) {
|
|
806
781
|
// For some reason, in Chrome, this speeds things up (!?)
|
|
807
782
|
if (typeof input !== 'string') throw new Error('Input must be a string');
|
|
808
783
|
|
|
@@ -1075,12 +1050,12 @@ function Parser(config) {
|
|
|
1075
1050
|
};
|
|
1076
1051
|
|
|
1077
1052
|
/** Sets the abort flag */
|
|
1078
|
-
this.abort = function() {
|
|
1053
|
+
this.abort = function () {
|
|
1079
1054
|
aborted = true;
|
|
1080
1055
|
};
|
|
1081
1056
|
|
|
1082
1057
|
/** Gets the cursor position */
|
|
1083
|
-
this.getCharIndex = function() {
|
|
1058
|
+
this.getCharIndex = function () {
|
|
1084
1059
|
return cursor;
|
|
1085
1060
|
};
|
|
1086
1061
|
}
|
package/dist/bundle.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/bundle.ts"],"names":["moduleExports","require","globalThis","loaders","module","exports","Object","assign"],"mappings":"AACA,MAAMA,aAAa,GAAGC,OAAO,CAAC,SAAD,CAA7B;;AACAC,UAAU,CAACC,OAAX,GAAqBD,UAAU,CAACC,OAAX,IAAsB,EAA3C;AACAC,MAAM,CAACC,OAAP,GAAiBC,MAAM,CAACC,MAAP,CAAcL,UAAU,CAACC,OAAzB,EAAkCH,aAAlC,CAAjB","sourcesContent":["// @ts-nocheck\nconst moduleExports = require('./index');\nglobalThis.loaders = globalThis.loaders || {};\nmodule.exports = Object.assign(globalThis.loaders, moduleExports);\n"],"file":"bundle.js"}
|
package/dist/csv-loader.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/csv-loader.ts"],"names":["AsyncQueue","TableBatchBuilder","convertToArrayRow","convertToObjectRow","Papa","AsyncIteratorStreamer","VERSION","DEFAULT_CSV_LOADER_OPTIONS","csv","shape","optimizeMemoryUsage","header","columnPrefix","quoteChar","escapeChar","dynamicTyping","comments","skipEmptyLines","delimitersToGuess","CSVLoader","id","module","name","version","extensions","mimeTypes","category","parse","arrayBuffer","options","parseCSV","TextDecoder","decode","parseText","text","parseInBatches","parseCSVInBatches","csvText","csvOptions","firstRow","readFirstRow","isHeaderRow","Boolean","parseWithHeader","papaparseConfig","download","transformHeader","duplicateColumnTransformer","undefined","error","e","Error","result","data","rows","headerRow","meta","fields","generateHeader","length","map","row","Array","isArray","asyncIterator","batchSize","asyncQueue","isFirstRow","tableBatchBuilder","schema","config","chunkSize","step","results","collapsedRow","flat","join","trim","bytesUsed","cursor","deduceSchema","JSON","stringify","addRow","batch","getFullBatch","enqueue","complete","getFinalBatch","close","every","value","preview","observedColumns","Set","col","colName","counter","has","add","count","headers","i","push","columnName","String","index","type","Float32Array","_typecheckCSVLoader"],"mappings":"AAIA,SACEA,UADF,EAEEC,iBAFF,EAGEC,iBAHF,EAIEC,kBAJF,QAKO,oBALP;AAMA,OAAOC,IAAP,MAAiB,kBAAjB;AACA,OAAOC,qBAAP,MAAkC,+BAAlC;AAIA,MAAMC,OAAO,GAAG,2BAAuB,WAAvB,qBAAmD,QAAnE;AA0BA,MAAMC,0BAA0B,GAAG;AACjCC,EAAAA,GAAG,EAAE;AACHC,IAAAA,KAAK,EAAE,kBADJ;AAEHC,IAAAA,mBAAmB,EAAE,KAFlB;AAIHC,IAAAA,MAAM,EAAE,MAJL;AAKHC,IAAAA,YAAY,EAAE,QALX;AAQHC,IAAAA,SAAS,EAAE,GARR;AASHC,IAAAA,UAAU,EAAE,GATT;AAUHC,IAAAA,aAAa,EAAE,IAVZ;AAWHC,IAAAA,QAAQ,EAAE,KAXP;AAYHC,IAAAA,cAAc,EAAE,IAZb;AAcHC,IAAAA,iBAAiB,EAAE,CAAC,GAAD,EAAM,IAAN,EAAY,GAAZ,EAAiB,GAAjB;AAdhB;AAD4B,CAAnC;AAoBA,OAAO,MAAMC,SAAS,GAAG;AACvBC,EAAAA,EAAE,EAAE,KADmB;AAEvBC,EAAAA,MAAM,EAAE,KAFe;AAGvBC,EAAAA,IAAI,EAAE,KAHiB;AAIvBC,EAAAA,OAAO,EAAEjB,OAJc;AAKvBkB,EAAAA,UAAU,EAAE,CAAC,KAAD,EAAQ,KAAR,EAAe,KAAf,CALW;AAMvBC,EAAAA,SAAS,EAAE,CAAC,UAAD,EAAa,2BAAb,EAA0C,UAA1C,CANY;AAOvBC,EAAAA,QAAQ,EAAE,OAPa;AAQvBC,EAAAA,KAAK,EAAE,OAAOC,WAAP,EAAiCC,OAAjC,KACLC,QAAQ,CAAC,IAAIC,WAAJ,GAAkBC,MAAlB,CAAyBJ,WAAzB,CAAD,EAAwCC,OAAxC,CATa;AAUvBI,EAAAA,SAAS,EAAE,CAACC,IAAD,EAAeL,OAAf,KAA8CC,QAAQ,CAACI,IAAD,EAAOL,OAAP,CAV1C;AAWvBM,EAAAA,cAAc,EAAEC,iBAXO;AAcvBP,EAAAA,OAAO,EAAEtB;AAdc,CAAlB;;AAiBP,eAAeuB,QAAf,CAAwBO,OAAxB,EAAyCR,OAAzC,EAAqE;AAEnE,QAAMS,UAAU,GAAG,EAAC,GAAG/B,0BAA0B,CAACC,GAA/B;AAAoC,QAAGqB,OAAH,aAAGA,OAAH,uBAAGA,OAAO,CAAErB,GAAZ;AAApC,GAAnB;AAEA,QAAM+B,QAAQ,GAAGC,YAAY,CAACH,OAAD,CAA7B;AACA,QAAM1B,MAAe,GACnB2B,UAAU,CAAC3B,MAAX,KAAsB,MAAtB,GAA+B8B,WAAW,CAACF,QAAD,CAA1C,GAAuDG,OAAO,CAACJ,UAAU,CAAC3B,MAAZ,CADhE;AAGA,QAAMgC,eAAe,GAAGhC,MAAxB;AAEA,QAAMiC,eAAe,GAAG,EAEtB,GAAGN,UAFmB;AAGtB3B,IAAAA,MAAM,EAAEgC,eAHc;AAItBE,IAAAA,QAAQ,EAAE,KAJY;AAKtBC,IAAAA,eAAe,EAAEH,eAAe,GAAGI,0BAA0B,EAA7B,GAAkCC,SAL5C;AAMtBC,IAAAA,KAAK,EAAGC,CAAD,IAAO;AACZ,YAAM,IAAIC,KAAJ,CAAUD,CAAV,CAAN;AACD;AARqB,GAAxB;AAWA,QAAME,MAAM,GAAGhD,IAAI,CAACuB,KAAL,CAAWU,OAAX,EAAoBO,eAApB,CAAf;AACA,MAAI;AAACS,IAAAA,IAAI,EAAEC;AAAP,MAAeF,MAAnB;AAEA,QAAMG,SAAS,GAAGH,MAAM,CAACI,IAAP,CAAYC,MAAZ,IAAsBC,cAAc,CAACpB,UAAU,CAAC1B,YAAZ,EAA0B2B,QAAQ,CAACoB,MAAnC,CAAtD;;AAEA,UAAQrB,UAAU,CAAC7B,KAAnB;AACE,SAAK,kBAAL;AACE6C,MAAAA,IAAI,GAAGA,IAAI,CAACM,GAAL,CAAUC,GAAD,IAAUC,KAAK,CAACC,OAAN,CAAcF,GAAd,IAAqB1D,kBAAkB,CAAC0D,GAAD,EAAMN,SAAN,CAAvC,GAA0DM,GAA7E,CAAP;AACA;;AACF,SAAK,iBAAL;AACEP,MAAAA,IAAI,GAAGA,IAAI,CAACM,GAAL,CAAUC,GAAD,IAAUC,KAAK,CAACC,OAAN,CAAcF,GAAd,IAAqBA,GAArB,GAA2B3D,iBAAiB,CAAC2D,GAAD,EAAMN,SAAN,CAA/D,CAAP;AACA;;AACF;AAPF;;AAsBA,SAAOD,IAAP;AACD;;AAGD,SAASlB,iBAAT,CACE4B,aADF,EAEEnC,OAFF,EAGwB;AAAA;;AAGtBA,EAAAA,OAAO,GAAG,EAAC,GAAGA;AAAJ,GAAV;;AACA,MAAIA,OAAO,CAACoC,SAAR,KAAsB,MAA1B,EAAkC;AAChCpC,IAAAA,OAAO,CAACoC,SAAR,GAAoB,IAApB;AACD;;AAGD,QAAM3B,UAAU,GAAG,EAAC,GAAG/B,0BAA0B,CAACC,GAA/B;AAAoC,oBAAGqB,OAAH,6CAAG,SAASrB,GAAZ;AAApC,GAAnB;AAEA,QAAM0D,UAAU,GAAG,IAAIlE,UAAJ,EAAnB;AAEA,MAAImE,UAAmB,GAAG,IAA1B;AACA,MAAIZ,SAA0B,GAAG,IAAjC;AACA,MAAIa,iBAA2C,GAAG,IAAlD;AACA,MAAIC,MAAqB,GAAG,IAA5B;AAEA,QAAMC,MAAM,GAAG,EAEb,GAAGhC,UAFU;AAGb3B,IAAAA,MAAM,EAAE,KAHK;AAIbkC,IAAAA,QAAQ,EAAE,KAJG;AAQb0B,IAAAA,SAAS,EAAE,OAAO,IAAP,GAAc,CARZ;AAcbtD,IAAAA,cAAc,EAAE,KAdH;;AAkBbuD,IAAAA,IAAI,CAACC,OAAD,EAAU;AACZ,UAAIZ,GAAG,GAAGY,OAAO,CAACpB,IAAlB;;AAEA,UAAIf,UAAU,CAACrB,cAAf,EAA+B;AAE7B,cAAMyD,YAAY,GAAGb,GAAG,CAACc,IAAJ,GAAWC,IAAX,CAAgB,EAAhB,EAAoBC,IAApB,EAArB;;AACA,YAAIH,YAAY,KAAK,EAArB,EAAyB;AACvB;AACD;AACF;;AACD,YAAMI,SAAS,GAAGL,OAAO,CAACjB,IAAR,CAAauB,MAA/B;;AAGA,UAAIZ,UAAU,IAAI,CAACZ,SAAnB,EAA8B;AAE5B,cAAM5C,MAAM,GAAG2B,UAAU,CAAC3B,MAAX,KAAsB,MAAtB,GAA+B8B,WAAW,CAACoB,GAAD,CAA1C,GAAkDnB,OAAO,CAACJ,UAAU,CAAC3B,MAAZ,CAAxE;;AACA,YAAIA,MAAJ,EAAY;AACV4C,UAAAA,SAAS,GAAGM,GAAG,CAACD,GAAJ,CAAQb,0BAA0B,EAAlC,CAAZ;AACA;AACD;AACF;;AAGD,UAAIoB,UAAJ,EAAgB;AACdA,QAAAA,UAAU,GAAG,KAAb;;AACA,YAAI,CAACZ,SAAL,EAAgB;AACdA,UAAAA,SAAS,GAAGG,cAAc,CAACpB,UAAU,CAAC1B,YAAZ,EAA0BiD,GAAG,CAACF,MAA9B,CAA1B;AACD;;AACDU,QAAAA,MAAM,GAAGW,YAAY,CAACnB,GAAD,EAAMN,SAAN,CAArB;AACD;;AAED,UAAIjB,UAAU,CAAC5B,mBAAf,EAAoC;AAGlCmD,QAAAA,GAAG,GAAGoB,IAAI,CAACtD,KAAL,CAAWsD,IAAI,CAACC,SAAL,CAAerB,GAAf,CAAX,CAAN;AACD;;AAGDO,MAAAA,iBAAiB,GACfA,iBAAiB,IACjB,IAAInE,iBAAJ,CAAsBoE,MAAtB,EAA8B;AAE5B5D,QAAAA,KAAK,EAAE6B,UAAU,CAAC7B,KAAX,IAAoB,iBAFC;AAG5B,WAAGoB;AAHyB,OAA9B,CAFF;;AAQA,UAAI;AACFuC,QAAAA,iBAAiB,CAACe,MAAlB,CAAyBtB,GAAzB;AAEA,cAAMuB,KAAK,GAAGhB,iBAAiB,IAAIA,iBAAiB,CAACiB,YAAlB,CAA+B;AAACP,UAAAA;AAAD,SAA/B,CAAnC;;AACA,YAAIM,KAAJ,EAAW;AACTlB,UAAAA,UAAU,CAACoB,OAAX,CAAmBF,KAAnB;AACD;AACF,OAPD,CAOE,OAAOnC,KAAP,EAAc;AACdiB,QAAAA,UAAU,CAACoB,OAAX,CAAmBrC,KAAnB;AACD;AACF,KA1EY;;AA6EbsC,IAAAA,QAAQ,CAACd,OAAD,EAAU;AAChB,UAAI;AACF,cAAMK,SAAS,GAAGL,OAAO,CAACjB,IAAR,CAAauB,MAA/B;AAEA,cAAMK,KAAK,GAAGhB,iBAAiB,IAAIA,iBAAiB,CAACoB,aAAlB,CAAgC;AAACV,UAAAA;AAAD,SAAhC,CAAnC;;AACA,YAAIM,KAAJ,EAAW;AACTlB,UAAAA,UAAU,CAACoB,OAAX,CAAmBF,KAAnB;AACD;AACF,OAPD,CAOE,OAAOnC,KAAP,EAAc;AACdiB,QAAAA,UAAU,CAACoB,OAAX,CAAmBrC,KAAnB;AACD;;AAEDiB,MAAAA,UAAU,CAACuB,KAAX;AACD;;AA1FY,GAAf;AA6FArF,EAAAA,IAAI,CAACuB,KAAL,CAAWqC,aAAX,EAA0BM,MAA1B,EAAkCjE,qBAAlC;AAIA,SAAO6D,UAAP;AACD;;AAOD,SAASzB,WAAT,CAAqBoB,GAArB,EAA6C;AAC3C,SAAOA,GAAG,IAAIA,GAAG,CAAC6B,KAAJ,CAAWC,KAAD,IAAW,OAAOA,KAAP,KAAiB,QAAtC,CAAd;AACD;;AAOD,SAASnD,YAAT,CAAsBH,OAAtB,EAA8C;AAC5C,QAAMe,MAAM,GAAGhD,IAAI,CAACuB,KAAL,CAAWU,OAAX,EAAoB;AACjCQ,IAAAA,QAAQ,EAAE,KADuB;AAEjC9B,IAAAA,aAAa,EAAE,IAFkB;AAGjC6E,IAAAA,OAAO,EAAE;AAHwB,GAApB,CAAf;AAKA,SAAOxC,MAAM,CAACC,IAAP,CAAY,CAAZ,CAAP;AACD;;AAQD,SAASN,0BAAT,GAAsC;AACpC,QAAM8C,eAAe,GAAG,IAAIC,GAAJ,EAAxB;AACA,SAAQC,GAAD,IAAS;AACd,QAAIC,OAAO,GAAGD,GAAd;AACA,QAAIE,OAAO,GAAG,CAAd;;AACA,WAAOJ,eAAe,CAACK,GAAhB,CAAoBF,OAApB,CAAP,EAAqC;AACnCA,MAAAA,OAAO,aAAMD,GAAN,cAAaE,OAAb,CAAP;AACAA,MAAAA,OAAO;AACR;;AACDJ,IAAAA,eAAe,CAACM,GAAhB,CAAoBH,OAApB;AACA,WAAOA,OAAP;AACD,GATD;AAUD;;AAQD,SAAStC,cAAT,CAAwB9C,YAAxB,EAA8CwF,KAAa,GAAG,CAA9D,EAA2E;AACzE,QAAMC,OAAiB,GAAG,EAA1B;;AACA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGF,KAApB,EAA2BE,CAAC,EAA5B,EAAgC;AAC9BD,IAAAA,OAAO,CAACE,IAAR,WAAgB3F,YAAhB,SAA+B0F,CAAC,GAAG,CAAnC;AACD;;AACD,SAAOD,OAAP;AACD;;AAED,SAASrB,YAAT,CAAsBnB,GAAtB,EAA2BN,SAA3B,EAAsC;AACpC,QAAMc,MAAM,GAAGd,SAAS,GAAG,EAAH,GAAQ,EAAhC;;AACA,OAAK,IAAI+C,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGzC,GAAG,CAACF,MAAxB,EAAgC2C,CAAC,EAAjC,EAAqC;AACnC,UAAME,UAAU,GAAIjD,SAAS,IAAIA,SAAS,CAAC+C,CAAD,CAAvB,IAA+BA,CAAlD;AACA,UAAMX,KAAK,GAAG9B,GAAG,CAACyC,CAAD,CAAjB;;AACA,YAAQ,OAAOX,KAAf;AACE,WAAK,QAAL;AACA,WAAK,SAAL;AAEEtB,QAAAA,MAAM,CAACmC,UAAD,CAAN,GAAqB;AAAClF,UAAAA,IAAI,EAAEmF,MAAM,CAACD,UAAD,CAAb;AAA2BE,UAAAA,KAAK,EAAEJ,CAAlC;AAAqCK,UAAAA,IAAI,EAAEC;AAA3C,SAArB;AACA;;AACF,WAAK,QAAL;AACA;AACEvC,QAAAA,MAAM,CAACmC,UAAD,CAAN,GAAqB;AAAClF,UAAAA,IAAI,EAAEmF,MAAM,CAACD,UAAD,CAAb;AAA2BE,UAAAA,KAAK,EAAEJ,CAAlC;AAAqCK,UAAAA,IAAI,EAAE7C;AAA3C,SAArB;AARJ;AAYD;;AACD,SAAOO,MAAP;AACD;;AAED,OAAO,MAAMwC,mBAAqC,GAAG1F,SAA9C","sourcesContent":["import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';\nimport type {Batch} from '@loaders.gl/schema';\ntype Schema = any;\n\nimport {\n AsyncQueue,\n TableBatchBuilder,\n convertToArrayRow,\n convertToObjectRow\n} from '@loaders.gl/schema';\nimport Papa from './libs/papaparse';\nimport AsyncIteratorStreamer from './lib/async-iterator-streamer';\n\n// __VERSION__ is injected by babel-plugin-version-inline\n// @ts-ignore TS2304: Cannot find name '__VERSION__'.\nconst VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';\n\nexport type CSVLoaderOptions = LoaderOptions & {\n csv?: {\n // loaders.gl options\n shape?: 'array-row-table' | 'object-row-table' | 'columnar-table';\n /** optimizes memory usage but increases parsing time. */\n optimizeMemoryUsage?: boolean;\n columnPrefix?: string;\n header?: 'auto';\n\n // CSV options (papaparse)\n // delimiter: auto\n // newline: auto\n quoteChar?: string;\n escapeChar?: string;\n // Convert numbers and boolean values in rows from strings\n dynamicTyping?: boolean;\n comments?: boolean;\n skipEmptyLines?: boolean | 'greedy';\n // transform: null?\n delimitersToGuess?: string[];\n // fastMode: auto\n };\n};\n\nconst DEFAULT_CSV_LOADER_OPTIONS = {\n csv: {\n shape: 'object-row-table',\n optimizeMemoryUsage: false,\n // CSV options\n header: 'auto',\n columnPrefix: 'column',\n // delimiter: auto\n // newline: auto\n quoteChar: '\"',\n escapeChar: '\"',\n dynamicTyping: true,\n comments: false,\n skipEmptyLines: true,\n // transform: null?\n delimitersToGuess: [',', '\\t', '|', ';']\n // fastMode: auto\n }\n};\n\nexport const CSVLoader = {\n id: 'csv',\n module: 'csv',\n name: 'CSV',\n version: VERSION,\n extensions: ['csv', 'tsv', 'dsv'],\n mimeTypes: ['text/csv', 'text/tab-separated-values', 'text/dsv'],\n category: 'table',\n parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>\n parseCSV(new TextDecoder().decode(arrayBuffer), options),\n parseText: (text: string, options?: CSVLoaderOptions) => parseCSV(text, options),\n parseInBatches: parseCSVInBatches,\n // @ts-ignore\n // testText: null,\n options: DEFAULT_CSV_LOADER_OPTIONS as CSVLoaderOptions\n};\n\nasync function parseCSV(csvText: string, options?: CSVLoaderOptions) {\n // Apps can call the parse method directly, we so apply default options here\n const csvOptions = {...DEFAULT_CSV_LOADER_OPTIONS.csv, ...options?.csv};\n\n const firstRow = readFirstRow(csvText);\n const header: boolean =\n csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);\n\n const parseWithHeader = header;\n\n const papaparseConfig = {\n // dynamicTyping: true,\n ...csvOptions,\n header: parseWithHeader,\n download: false, // We handle loading, no need for papaparse to do it for us\n transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,\n error: (e) => {\n throw new Error(e);\n }\n };\n\n const result = Papa.parse(csvText, papaparseConfig);\n let {data: rows} = result;\n\n const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);\n\n switch (csvOptions.shape) {\n case 'object-row-table':\n rows = rows.map((row) => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row));\n break;\n case 'array-row-table':\n rows = rows.map((row) => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)));\n break;\n default:\n }\n\n /*\n if (!header && shape === 'object-row-table') {\n // If the dataset has no header, transform the array result into an object shape with an\n // autogenerated header\n return result.data.map((row) =>\n row.reduce((acc, value, i) => {\n acc[headerRow[i]] = value;\n return acc;\n }, {})\n );\n }\n */\n return rows;\n}\n\n// TODO - support batch size 0 = no batching/single batch?\nfunction parseCSVInBatches(\n asyncIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>,\n options?: CSVLoaderOptions\n): AsyncIterable<Batch> {\n // Papaparse does not support standard batch size handling\n // TODO - investigate papaparse chunks mode\n options = {...options};\n if (options.batchSize === 'auto') {\n options.batchSize = 4000;\n }\n\n // Apps can call the parse method directly, we so apply default options here\n const csvOptions = {...DEFAULT_CSV_LOADER_OPTIONS.csv, ...options?.csv};\n\n const asyncQueue = new AsyncQueue<Batch>();\n\n let isFirstRow: boolean = true;\n let headerRow: string[] | null = null;\n let tableBatchBuilder: TableBatchBuilder | null = null;\n let schema: Schema | null = null;\n\n const config = {\n // dynamicTyping: true, // Convert numbers and boolean values in rows from strings,\n ...csvOptions,\n header: false, // Unfortunately, header detection is not automatic and does not infer shapes\n download: false, // We handle loading, no need for papaparse to do it for us\n // chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the\n // streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.\n // See https://github.com/mholt/PapaParse/issues/465\n chunkSize: 1024 * 1024 * 5,\n // skipEmptyLines is set to a boolean value if supplied. Greedy is set to true\n // skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if\n // both of the skipEmptyLines and step callback options are provided:\n // - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465\n // - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825\n skipEmptyLines: false,\n\n // step is called on every row\n // eslint-disable-next-line complexity\n step(results) {\n let row = results.data;\n\n if (csvOptions.skipEmptyLines) {\n // Manually reject lines that are empty\n const collapsedRow = row.flat().join('').trim();\n if (collapsedRow === '') {\n return;\n }\n }\n const bytesUsed = results.meta.cursor;\n\n // Check if we need to save a header row\n if (isFirstRow && !headerRow) {\n // Auto detects or can be forced with csvOptions.header\n const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);\n if (header) {\n headerRow = row.map(duplicateColumnTransformer());\n return;\n }\n }\n\n // If first data row, we can deduce the schema\n if (isFirstRow) {\n isFirstRow = false;\n if (!headerRow) {\n headerRow = generateHeader(csvOptions.columnPrefix, row.length);\n }\n schema = deduceSchema(row, headerRow);\n }\n\n if (csvOptions.optimizeMemoryUsage) {\n // A workaround to allocate new strings and don't retain pointers to original strings.\n // https://bugs.chromium.org/p/v8/issues/detail?id=2869\n row = JSON.parse(JSON.stringify(row));\n }\n\n // Add the row\n tableBatchBuilder =\n tableBatchBuilder ||\n new TableBatchBuilder(schema, {\n // @ts-expect-error\n shape: csvOptions.shape || 'array-row-table',\n ...options\n });\n\n try {\n tableBatchBuilder.addRow(row);\n // If a batch has been completed, emit it\n const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({bytesUsed});\n if (batch) {\n asyncQueue.enqueue(batch);\n }\n } catch (error) {\n asyncQueue.enqueue(error as Error);\n }\n },\n\n // complete is called when all rows have been read\n complete(results) {\n try {\n const bytesUsed = results.meta.cursor;\n // Ensure any final (partial) batch gets emitted\n const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({bytesUsed});\n if (batch) {\n asyncQueue.enqueue(batch);\n }\n } catch (error) {\n asyncQueue.enqueue(error as Error);\n }\n\n asyncQueue.close();\n }\n };\n\n Papa.parse(asyncIterator, config, AsyncIteratorStreamer);\n\n // TODO - Does it matter if we return asyncIterable or asyncIterator\n // return asyncQueue[Symbol.asyncIterator]();\n return asyncQueue;\n}\n\n/**\n * Checks if a certain row is a header row\n * @param row the row to check\n * @returns true if the row looks like a header\n */\nfunction isHeaderRow(row: string[]): boolean {\n return row && row.every((value) => typeof value === 'string');\n}\n\n/**\n * Reads, parses, and returns the first row of a CSV text\n * @param csvText the csv text to parse\n * @returns the first row\n */\nfunction readFirstRow(csvText: string): any[] {\n const result = Papa.parse(csvText, {\n download: false,\n dynamicTyping: true,\n preview: 1\n });\n return result.data[0];\n}\n\n/**\n * Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle\n * duplicate header columns and would use the latest occurrence by default.\n * See the header option in https://www.papaparse.com/docs#config\n * @returns a transform function that returns sanitized names for duplicate fields\n */\nfunction duplicateColumnTransformer() {\n const observedColumns = new Set();\n return (col) => {\n let colName = col;\n let counter = 1;\n while (observedColumns.has(colName)) {\n colName = `${col}.${counter}`;\n counter++;\n }\n observedColumns.add(colName);\n return colName;\n };\n}\n\n/**\n * Generates the header of a CSV given a prefix and a column count\n * @param columnPrefix the columnPrefix to use\n * @param count the count of column names to generate\n * @returns an array of column names\n */\nfunction generateHeader(columnPrefix: string, count: number = 0): string[] {\n const headers: string[] = [];\n for (let i = 0; i < count; i++) {\n headers.push(`${columnPrefix}${i + 1}`);\n }\n return headers;\n}\n\nfunction deduceSchema(row, headerRow) {\n const schema = headerRow ? {} : [];\n for (let i = 0; i < row.length; i++) {\n const columnName = (headerRow && headerRow[i]) || i;\n const value = row[i];\n switch (typeof value) {\n case 'number':\n case 'boolean':\n // TODO - booleans could be handled differently...\n schema[columnName] = {name: String(columnName), index: i, type: Float32Array};\n break;\n case 'string':\n default:\n schema[columnName] = {name: String(columnName), index: i, type: Array};\n // We currently only handle numeric rows\n // TODO we could offer a function to map strings to numbers?\n }\n }\n return schema;\n}\n\nexport const _typecheckCSVLoader: LoaderWithParser = CSVLoader;\n"],"file":"csv-loader.js"}
|
package/dist/csv-writer.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":[],"names":[],"mappings":"","sourcesContent":[],"file":"csv-writer.js"}
|
package/dist/index.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts"],"names":["CSVLoader"],"mappings":"AACA,SAAQA,SAAR,QAAwB,cAAxB","sourcesContent":["export type {CSVLoaderOptions} from './csv-loader';\nexport {CSVLoader} from './csv-loader';\n"],"file":"index.js"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/lib/async-iterator-streamer.ts"],"names":["Papa","ChunkStreamer","AsyncIteratorStreamer","config","call","textDecoder","TextDecoder","_config","encoding","stream","asyncIterator","_input","chunk","parseChunk","getStringChunk","_finished","error","_sendError","_nextChunk","nextChunk","decode","prototype","Object","create","constructor"],"mappings":"AAQA,OAAOA,IAAP,MAAiB,mBAAjB;AACA,MAAM;AAACC,EAAAA;AAAD,IAAkBD,IAAxB;AAEA,eAAe,SAASE,qBAAT,CAA+BC,MAA/B,EAAuC;AACpDA,EAAAA,MAAM,GAAGA,MAAM,IAAI,EAAnB;AAEAF,EAAAA,aAAa,CAACG,IAAd,CAAmB,IAAnB,EAAyBD,MAAzB;AAEA,OAAKE,WAAL,GAAmB,IAAIC,WAAJ,CAAgB,KAAKC,OAAL,CAAaC,QAA7B,CAAnB;;AAaA,OAAKC,MAAL,GAAc,gBAAgBC,aAAhB,EAA+B;AAC3C,SAAKC,MAAL,GAAcD,aAAd;;AAEA,QAAI;AAGF,iBAAW,MAAME,KAAjB,IAA0BF,aAA1B,EAAyC;AACvC,aAAKG,UAAL,CAAgB,KAAKC,cAAL,CAAoBF,KAApB,CAAhB;AACD;;AAcD,WAAKG,SAAL,GAAiB,IAAjB;AACA,WAAKF,UAAL,CAAgB,EAAhB;AACD,KArBD,CAqBE,OAAOG,KAAP,EAAc;AAEd,WAAKC,UAAL,CAAgBD,KAAhB;AACD;AACF,GA5BD;;AA8BA,OAAKE,UAAL,GAAkB,SAASC,SAAT,GAAqB,CAEtC,CAFD;;AAKA,OAAKL,cAAL,GAAsB,UAAUF,KAAV,EAAiB;AACrC,WAAO,OAAOA,KAAP,KAAiB,QAAjB,GAA4BA,KAA5B,GAAoC,KAAKP,WAAL,CAAiBe,MAAjB,CAAwBR,KAAxB,EAA+B;AAACH,MAAAA,MAAM,EAAE;AAAT,KAA/B,CAA3C;AACD,GAFD;AAGD;AAEDP,qBAAqB,CAACmB,SAAtB,GAAkCC,MAAM,CAACC,MAAP,CAActB,aAAa,CAACoB,SAA5B,CAAlC;AACAnB,qBAAqB,CAACmB,SAAtB,CAAgCG,WAAhC,GAA8CtB,qBAA9C","sourcesContent":["// A custom papaparse `Streamer` for async iterators\n// Ideally this can be contributed back to papaparse\n// Or papaparse can expose Streamer API so we can extend without forking.\n\n// @ts-nocheck\n/* eslint-disable no-invalid-this */\n\n// Note: papaparse is not an ES6 module\nimport Papa from '../libs/papaparse';\nconst {ChunkStreamer} = Papa;\n\nexport default function AsyncIteratorStreamer(config) {\n config = config || {};\n\n ChunkStreamer.call(this, config);\n\n this.textDecoder = new TextDecoder(this._config.encoding);\n\n // Implement ChunkStreamer base class methods\n\n // this.pause = function() {\n // ChunkStreamer.prototype.pause.apply(this, arguments);\n // };\n\n // this.resume = function() {\n // ChunkStreamer.prototype.resume.apply(this, arguments);\n // this._input.resume();\n // };\n\n this.stream = async function (asyncIterator) {\n this._input = asyncIterator;\n\n try {\n // ES2018 version\n // TODO - check for pause and abort flags?\n for await (const chunk of asyncIterator) {\n this.parseChunk(this.getStringChunk(chunk));\n }\n\n // ES5 VERSION\n // while (true) {\n // asyncIterator.next().then(function(value) {\n // if (value.done) {\n // // finalize iterator?\n // }\n // }\n // const = await ;\n // if (done) return total;\n // total += value.length;\n // }\n\n this._finished = true;\n this.parseChunk('');\n } catch (error) {\n // Inform ChunkStreamer base class of error\n this._sendError(error);\n }\n };\n\n this._nextChunk = function nextChunk() {\n // Left empty, as async iterator automatically pulls next chunk\n };\n\n // HELPER METHODS\n this.getStringChunk = function (chunk) {\n return typeof chunk === 'string' ? chunk : this.textDecoder.decode(chunk, {stream: true});\n };\n}\n\nAsyncIteratorStreamer.prototype = Object.create(ChunkStreamer.prototype);\nAsyncIteratorStreamer.prototype.constructor = AsyncIteratorStreamer;\n"],"file":"async-iterator-streamer.js"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/papaparse/async-iterator-streamer.js"],"names":["Papa","ChunkStreamer","AsyncIteratorStreamer","config","call","textDecoder","TextDecoder","_config","encoding","stream","asyncIterator","_input","chunk","parseChunk","getStringChunk","_finished","error","_sendError","_nextChunk","nextChunk","decode","prototype","Object","create","constructor"],"mappings":"AAQA,OAAOA,IAAP,MAAiB,mBAAjB;AACA,MAAM;AAACC,EAAAA;AAAD,IAAkBD,IAAxB;AAEA,eAAe,SAASE,qBAAT,CAA+BC,MAA/B,EAAuC;AACpDA,EAAAA,MAAM,GAAGA,MAAM,IAAI,EAAnB;AAEAF,EAAAA,aAAa,CAACG,IAAd,CAAmB,IAAnB,EAAyBD,MAAzB;AAEA,OAAKE,WAAL,GAAmB,IAAIC,WAAJ,CAAgB,KAAKC,OAAL,CAAaC,QAA7B,CAAnB;;AAaA,OAAKC,MAAL,GAAc,gBAAgBC,aAAhB,EAA+B;AAC3C,SAAKC,MAAL,GAAcD,aAAd;;AAEA,QAAI;AAGF,iBAAW,MAAME,KAAjB,IAA0BF,aAA1B,EAAyC;AACvC,aAAKG,UAAL,CAAgB,KAAKC,cAAL,CAAoBF,KAApB,CAAhB;AACD;;AAcD,WAAKG,SAAL,GAAiB,IAAjB;AACA,WAAKF,UAAL,CAAgB,EAAhB;AACD,KArBD,CAqBE,OAAOG,KAAP,EAAc;AAEd,WAAKC,UAAL,CAAgBD,KAAhB;AACD;AACF,GA5BD;;AA8BA,OAAKE,UAAL,GAAkB,SAASC,SAAT,GAAqB,CAEtC,CAFD;;AAKA,OAAKL,cAAL,GAAsB,UAAUF,KAAV,EAAiB;AACrC,WAAO,OAAOA,KAAP,KAAiB,QAAjB,GAA4BA,KAA5B,GAAoC,KAAKP,WAAL,CAAiBe,MAAjB,CAAwBR,KAAxB,EAA+B;AAACH,MAAAA,MAAM,EAAE;AAAT,KAA/B,CAA3C;AACD,GAFD;AAGD;AAEDP,qBAAqB,CAACmB,SAAtB,GAAkCC,MAAM,CAACC,MAAP,CAActB,aAAa,CAACoB,SAA5B,CAAlC;AACAnB,qBAAqB,CAACmB,SAAtB,CAAgCG,WAAhC,GAA8CtB,qBAA9C","sourcesContent":["// A custom papaparse `Streamer` for async iterators\n// Ideally this can be contributed back to papaparse\n// Or papaparse can expose Streamer API so we can extend without forking.\n\n// @ts-nocheck\n/* eslint-disable no-invalid-this */\n\n// Note: papaparse is not an ES6 module\nimport Papa from '../libs/papaparse';\nconst {ChunkStreamer} = Papa;\n\nexport default function AsyncIteratorStreamer(config) {\n config = config || {};\n\n ChunkStreamer.call(this, config);\n\n this.textDecoder = new TextDecoder(this._config.encoding);\n\n // Implement ChunkStreamer base class methods\n\n // this.pause = function() {\n // ChunkStreamer.prototype.pause.apply(this, arguments);\n // };\n\n // this.resume = function() {\n // ChunkStreamer.prototype.resume.apply(this, arguments);\n // this._input.resume();\n // };\n\n this.stream = async function (asyncIterator) {\n this._input = asyncIterator;\n\n try {\n // ES2018 version\n // TODO - check for pause and abort flags?\n for await (const chunk of asyncIterator) {\n this.parseChunk(this.getStringChunk(chunk));\n }\n\n // ES5 VERSION\n // while (true) {\n // asyncIterator.next().then(function(value) {\n // if (value.done) {\n // // finalize iterator?\n // }\n // }\n // const = await ;\n // if (done) return total;\n // total += value.length;\n // }\n\n this._finished = true;\n this.parseChunk('');\n } catch (error) {\n // Inform ChunkStreamer base class of error\n this._sendError(error);\n }\n };\n\n this._nextChunk = function nextChunk() {\n // Left empty, as async iterator automatically pulls next chunk\n };\n\n // HELPER METHODS\n this.getStringChunk = function (chunk) {\n return typeof chunk === 'string' ? chunk : this.textDecoder.decode(chunk, {stream: true});\n };\n}\n\nAsyncIteratorStreamer.prototype = Object.create(ChunkStreamer.prototype);\nAsyncIteratorStreamer.prototype.constructor = AsyncIteratorStreamer;\n"],"file":"async-iterator-streamer.js"}
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
// A custom papaparse `Streamer` for async iterators
|
|
2
|
-
// Ideally this can be contributed back to papaparse
|
|
3
|
-
// Or papaparse can expose Streamer API so we can extend without forking.
|
|
4
|
-
|
|
5
|
-
// @ts-nocheck
|
|
6
|
-
/* eslint-disable no-invalid-this */
|
|
7
|
-
|
|
8
|
-
// Note: papaparse is not an ES6 module
|
|
9
|
-
import Papa from '../libs/papaparse';
|
|
10
|
-
const {ChunkStreamer} = Papa;
|
|
11
|
-
|
|
12
|
-
export default function AsyncIteratorStreamer(config) {
|
|
13
|
-
config = config || {};
|
|
14
|
-
|
|
15
|
-
ChunkStreamer.call(this, config);
|
|
16
|
-
|
|
17
|
-
this.textDecoder = new TextDecoder(this._config.encoding);
|
|
18
|
-
|
|
19
|
-
// Implement ChunkStreamer base class methods
|
|
20
|
-
|
|
21
|
-
// this.pause = function() {
|
|
22
|
-
// ChunkStreamer.prototype.pause.apply(this, arguments);
|
|
23
|
-
// };
|
|
24
|
-
|
|
25
|
-
// this.resume = function() {
|
|
26
|
-
// ChunkStreamer.prototype.resume.apply(this, arguments);
|
|
27
|
-
// this._input.resume();
|
|
28
|
-
// };
|
|
29
|
-
|
|
30
|
-
this.stream = async function (asyncIterator) {
|
|
31
|
-
this._input = asyncIterator;
|
|
32
|
-
|
|
33
|
-
try {
|
|
34
|
-
// ES2018 version
|
|
35
|
-
// TODO - check for pause and abort flags?
|
|
36
|
-
for await (const chunk of asyncIterator) {
|
|
37
|
-
this.parseChunk(this.getStringChunk(chunk));
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// ES5 VERSION
|
|
41
|
-
// while (true) {
|
|
42
|
-
// asyncIterator.next().then(function(value) {
|
|
43
|
-
// if (value.done) {
|
|
44
|
-
// // finalize iterator?
|
|
45
|
-
// }
|
|
46
|
-
// }
|
|
47
|
-
// const = await ;
|
|
48
|
-
// if (done) return total;
|
|
49
|
-
// total += value.length;
|
|
50
|
-
// }
|
|
51
|
-
|
|
52
|
-
this._finished = true;
|
|
53
|
-
this.parseChunk('');
|
|
54
|
-
} catch (error) {
|
|
55
|
-
// Inform ChunkStreamer base class of error
|
|
56
|
-
this._sendError(error);
|
|
57
|
-
}
|
|
58
|
-
};
|
|
59
|
-
|
|
60
|
-
this._nextChunk = function nextChunk() {
|
|
61
|
-
// Left empty, as async iterator automatically pulls next chunk
|
|
62
|
-
};
|
|
63
|
-
|
|
64
|
-
// HELPER METHODS
|
|
65
|
-
this.getStringChunk = function (chunk) {
|
|
66
|
-
return typeof chunk === 'string' ? chunk : this.textDecoder.decode(chunk, {stream: true});
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
AsyncIteratorStreamer.prototype = Object.create(ChunkStreamer.prototype);
|
|
71
|
-
AsyncIteratorStreamer.prototype.constructor = AsyncIteratorStreamer;
|