@loaders.gl/csv 4.3.2 → 4.4.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/csv-arrow-loader.d.ts +37 -0
- package/dist/csv-arrow-loader.d.ts.map +1 -0
- package/dist/csv-arrow-loader.js +23 -0
- package/dist/csv-format.d.ts +10 -0
- package/dist/csv-format.d.ts.map +1 -0
- package/dist/csv-format.js +12 -0
- package/dist/csv-loader.d.ts +6 -6
- package/dist/csv-loader.d.ts.map +1 -1
- package/dist/csv-loader.js +53 -20
- package/dist/csv-writer.d.ts +6 -5
- package/dist/csv-writer.d.ts.map +1 -1
- package/dist/csv-writer.js +2 -5
- package/dist/dist.dev.js +13318 -449
- package/dist/dist.min.js +23 -20
- package/dist/index.cjs +317 -262
- package/dist/index.cjs.map +4 -4
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/lib/encoders/encode-csv.d.ts +1 -1
- package/dist/lib/encoders/encode-csv.d.ts.map +1 -1
- package/dist/lib/encoders/encode-csv.js +1 -1
- package/dist/papaparse/async-iterator-streamer.d.ts +1 -21
- package/dist/papaparse/async-iterator-streamer.d.ts.map +1 -1
- package/dist/papaparse/async-iterator-streamer.js +6 -6
- package/dist/papaparse/papa-constants.d.ts +12 -0
- package/dist/papaparse/papa-constants.d.ts.map +1 -0
- package/dist/papaparse/papa-constants.js +19 -0
- package/dist/papaparse/papa-parser.d.ts +110 -0
- package/dist/papaparse/papa-parser.d.ts.map +1 -0
- package/dist/papaparse/papa-parser.js +733 -0
- package/dist/papaparse/papa-writer.d.ts +22 -0
- package/dist/papaparse/papa-writer.d.ts.map +1 -0
- package/dist/papaparse/papa-writer.js +166 -0
- package/dist/papaparse/papaparse.d.ts +9 -113
- package/dist/papaparse/papaparse.d.ts.map +1 -1
- package/dist/papaparse/papaparse.js +13 -882
- package/package.json +5 -5
- package/src/csv-arrow-loader.ts +41 -0
- package/src/csv-format.ts +15 -0
- package/src/csv-loader.ts +58 -25
- package/src/csv-writer.ts +2 -5
- package/src/index.ts +3 -0
- package/src/lib/encoders/encode-csv.ts +2 -1
- package/src/papaparse/async-iterator-streamer.ts +6 -6
- package/src/papaparse/papa-constants.ts +23 -0
- package/src/papaparse/papa-parser.ts +872 -0
- package/src/papaparse/papa-writer.ts +219 -0
- package/src/papaparse/papaparse.ts +17 -1048
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@loaders.gl/csv",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.4.0-alpha.1",
|
|
4
4
|
"description": "Framework-independent loader for CSV and DSV table formats",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|
|
@@ -44,12 +44,12 @@
|
|
|
44
44
|
"build-bundle-dev": "ocular-bundle ./bundle.ts --env=dev --output=dist/dist.dev.js"
|
|
45
45
|
},
|
|
46
46
|
"dependencies": {
|
|
47
|
-
"@loaders.gl/loader-utils": "4.
|
|
48
|
-
"@loaders.gl/schema": "4.
|
|
47
|
+
"@loaders.gl/loader-utils": "4.4.0-alpha.1",
|
|
48
|
+
"@loaders.gl/schema": "4.4.0-alpha.1",
|
|
49
49
|
"d3-dsv": "^1.2.0"
|
|
50
50
|
},
|
|
51
51
|
"peerDependencies": {
|
|
52
|
-
"@loaders.gl/core": "
|
|
52
|
+
"@loaders.gl/core": "4.4.0-alpha.0"
|
|
53
53
|
},
|
|
54
|
-
"gitHead": "
|
|
54
|
+
"gitHead": "f1732de45907bd500bf4eedb4803beca8bf4bfb0"
|
|
55
55
|
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
5
|
+
import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
6
|
+
import type {ArrowTable, ArrowTableBatch} from '@loaders.gl/schema';
|
|
7
|
+
import {convertTable, convertBatches} from '@loaders.gl/schema-utils';
|
|
8
|
+
|
|
9
|
+
import type {CSVLoaderOptions} from './csv-loader';
|
|
10
|
+
import {CSVLoader} from './csv-loader';
|
|
11
|
+
|
|
12
|
+
export type CSVArrowLoaderOptions = LoaderOptions & {
|
|
13
|
+
csv?: Omit<CSVLoaderOptions['csv'], 'shape'>;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
export const CSVArrowLoader = {
|
|
17
|
+
...CSVLoader,
|
|
18
|
+
|
|
19
|
+
dataType: null as unknown as ArrowTable,
|
|
20
|
+
batchType: null as unknown as ArrowTableBatch,
|
|
21
|
+
|
|
22
|
+
parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
|
|
23
|
+
parseCSVToArrow(new TextDecoder().decode(arrayBuffer), options),
|
|
24
|
+
parseText: (text: string, options?: CSVLoaderOptions) => parseCSVToArrow(text, options),
|
|
25
|
+
parseInBatches: parseCSVToArrowBatches
|
|
26
|
+
} as const satisfies LoaderWithParser<ArrowTable, ArrowTableBatch, CSVArrowLoaderOptions>;
|
|
27
|
+
|
|
28
|
+
async function parseCSVToArrow(csvText: string, options?: CSVLoaderOptions): Promise<ArrowTable> {
|
|
29
|
+
// Apps can call the parse method directly, we so apply default options here
|
|
30
|
+
// const csvOptions = {...CSVArrowLoader.options.csv, ...options?.csv};
|
|
31
|
+
const table = await CSVLoader.parseText(csvText, options);
|
|
32
|
+
return convertTable(table, 'arrow-table');
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function parseCSVToArrowBatches(
|
|
36
|
+
asyncIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>,
|
|
37
|
+
options?: CSVArrowLoaderOptions
|
|
38
|
+
): AsyncIterable<ArrowTableBatch> {
|
|
39
|
+
const tableIterator = CSVLoader.parseInBatches(asyncIterator, options);
|
|
40
|
+
return convertBatches(tableIterator, 'arrow-table');
|
|
41
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
|
|
5
|
+
import type {Format} from '@loaders.gl/loader-utils';
|
|
6
|
+
|
|
7
|
+
/** Comma-Separated Values */
|
|
8
|
+
export const CSVFormat = {
|
|
9
|
+
id: 'csv',
|
|
10
|
+
module: 'csv',
|
|
11
|
+
name: 'CSV',
|
|
12
|
+
extensions: ['csv', 'tsv', 'dsv'],
|
|
13
|
+
mimeTypes: ['text/csv', 'text/tab-separated-values', 'text/dsv'],
|
|
14
|
+
category: 'table'
|
|
15
|
+
} as const satisfies Format;
|
package/src/csv-loader.ts
CHANGED
|
@@ -3,19 +3,19 @@
|
|
|
3
3
|
// Copyright (c) vis.gl contributors
|
|
4
4
|
|
|
5
5
|
import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
6
|
-
import type {ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';
|
|
6
|
+
import type {Schema, ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';
|
|
7
7
|
|
|
8
|
+
import {log} from '@loaders.gl/loader-utils';
|
|
8
9
|
import {
|
|
9
10
|
AsyncQueue,
|
|
11
|
+
deduceTableSchema,
|
|
10
12
|
TableBatchBuilder,
|
|
11
13
|
convertToArrayRow,
|
|
12
14
|
convertToObjectRow
|
|
13
|
-
} from '@loaders.gl/schema';
|
|
15
|
+
} from '@loaders.gl/schema-utils';
|
|
14
16
|
import Papa from './papaparse/papaparse';
|
|
15
17
|
import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
|
|
16
|
-
|
|
17
|
-
type ObjectField = {name: string; index: number; type: any};
|
|
18
|
-
type ObjectSchema = {[key: string]: ObjectField} | ObjectField[];
|
|
18
|
+
import {CSVFormat} from './csv-format';
|
|
19
19
|
|
|
20
20
|
// __VERSION__ is injected by babel-plugin-version-inline
|
|
21
21
|
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
|
|
@@ -48,16 +48,11 @@ export type CSVLoaderOptions = LoaderOptions & {
|
|
|
48
48
|
};
|
|
49
49
|
|
|
50
50
|
export const CSVLoader = {
|
|
51
|
+
...CSVFormat,
|
|
52
|
+
|
|
51
53
|
dataType: null as unknown as ObjectRowTable | ArrayRowTable,
|
|
52
54
|
batchType: null as unknown as TableBatch,
|
|
53
|
-
|
|
54
|
-
id: 'csv',
|
|
55
|
-
module: 'csv',
|
|
56
|
-
name: 'CSV',
|
|
57
55
|
version: VERSION,
|
|
58
|
-
extensions: ['csv', 'tsv', 'dsv'],
|
|
59
|
-
mimeTypes: ['text/csv', 'text/tab-separated-values', 'text/dsv'],
|
|
60
|
-
category: 'table',
|
|
61
56
|
parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
|
|
62
57
|
parseCSV(new TextDecoder().decode(arrayBuffer), options),
|
|
63
58
|
parseText: (text: string, options?: CSVLoaderOptions) => parseCSV(text, options),
|
|
@@ -89,7 +84,7 @@ async function parseCSV(
|
|
|
89
84
|
csvText: string,
|
|
90
85
|
options?: CSVLoaderOptions
|
|
91
86
|
): Promise<ObjectRowTable | ArrayRowTable> {
|
|
92
|
-
// Apps can call the parse method directly, we
|
|
87
|
+
// Apps can call the parse method directly, so we apply default options here
|
|
93
88
|
const csvOptions = {...CSVLoader.options.csv, ...options?.csv};
|
|
94
89
|
|
|
95
90
|
const firstRow = readFirstRow(csvText);
|
|
@@ -115,20 +110,25 @@ async function parseCSV(
|
|
|
115
110
|
const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);
|
|
116
111
|
|
|
117
112
|
const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
|
|
113
|
+
let table: ArrayRowTable | ObjectRowTable;
|
|
118
114
|
switch (shape) {
|
|
119
115
|
case 'object-row-table':
|
|
120
|
-
|
|
116
|
+
table = {
|
|
121
117
|
shape: 'object-row-table',
|
|
122
118
|
data: rows.map((row) => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))
|
|
123
119
|
};
|
|
120
|
+
break;
|
|
124
121
|
case 'array-row-table':
|
|
125
|
-
|
|
122
|
+
table = {
|
|
126
123
|
shape: 'array-row-table',
|
|
127
124
|
data: rows.map((row) => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
|
|
128
125
|
};
|
|
126
|
+
break;
|
|
129
127
|
default:
|
|
130
128
|
throw new Error(shape);
|
|
131
129
|
}
|
|
130
|
+
table.schema = deduceTableSchema(table!);
|
|
131
|
+
return table;
|
|
132
132
|
}
|
|
133
133
|
|
|
134
134
|
// TODO - support batch size 0 = no batching/single batch?
|
|
@@ -151,7 +151,7 @@ function parseCSVInBatches(
|
|
|
151
151
|
let isFirstRow: boolean = true;
|
|
152
152
|
let headerRow: string[] | null = null;
|
|
153
153
|
let tableBatchBuilder: TableBatchBuilder | null = null;
|
|
154
|
-
let schema:
|
|
154
|
+
let schema: Schema | null = null;
|
|
155
155
|
|
|
156
156
|
const config = {
|
|
157
157
|
// dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
|
|
@@ -199,7 +199,7 @@ function parseCSVInBatches(
|
|
|
199
199
|
if (!headerRow) {
|
|
200
200
|
headerRow = generateHeader(csvOptions.columnPrefix, row.length);
|
|
201
201
|
}
|
|
202
|
-
schema =
|
|
202
|
+
schema = deduceCSVSchema(row, headerRow);
|
|
203
203
|
}
|
|
204
204
|
|
|
205
205
|
if (csvOptions.optimizeMemoryUsage) {
|
|
@@ -314,23 +314,56 @@ function generateHeader(columnPrefix: string, count: number = 0): string[] {
|
|
|
314
314
|
return headers;
|
|
315
315
|
}
|
|
316
316
|
|
|
317
|
-
function
|
|
318
|
-
const
|
|
317
|
+
function deduceCSVSchema(row, headerRow): Schema {
|
|
318
|
+
const fields: Schema['fields'] = [];
|
|
319
319
|
for (let i = 0; i < row.length; i++) {
|
|
320
320
|
const columnName = (headerRow && headerRow[i]) || i;
|
|
321
321
|
const value = row[i];
|
|
322
322
|
switch (typeof value) {
|
|
323
323
|
case 'number':
|
|
324
|
+
fields.push({name: String(columnName), type: 'float64', nullable: true});
|
|
325
|
+
break;
|
|
324
326
|
case 'boolean':
|
|
325
|
-
|
|
326
|
-
schema[columnName] = {name: String(columnName), index: i, type: Float32Array};
|
|
327
|
+
fields.push({name: String(columnName), type: 'bool', nullable: true});
|
|
327
328
|
break;
|
|
328
329
|
case 'string':
|
|
330
|
+
fields.push({name: String(columnName), type: 'utf8', nullable: true});
|
|
331
|
+
break;
|
|
329
332
|
default:
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
// TODO we could offer a function to map strings to numbers?
|
|
333
|
+
log.warn(`CSV: Unknown column type: ${typeof value}`)();
|
|
334
|
+
fields.push({name: String(columnName), type: 'utf8', nullable: true});
|
|
333
335
|
}
|
|
334
336
|
}
|
|
335
|
-
return
|
|
337
|
+
return {
|
|
338
|
+
fields,
|
|
339
|
+
metadata: {
|
|
340
|
+
'loaders.gl#format': 'csv',
|
|
341
|
+
'loaders.gl#loader': 'CSVLoader'
|
|
342
|
+
}
|
|
343
|
+
};
|
|
336
344
|
}
|
|
345
|
+
|
|
346
|
+
// TODO - remove
|
|
347
|
+
// type ObjectField = {name: string; index: number; type: any};
|
|
348
|
+
// type ObjectSchema = {[key: string]: ObjectField} | ObjectField[];
|
|
349
|
+
|
|
350
|
+
// function deduceObjectSchema(row, headerRow): ObjectSchema {
|
|
351
|
+
// const schema: ObjectSchema = headerRow ? {} : [];
|
|
352
|
+
// for (let i = 0; i < row.length; i++) {
|
|
353
|
+
// const columnName = (headerRow && headerRow[i]) || i;
|
|
354
|
+
// const value = row[i];
|
|
355
|
+
// switch (typeof value) {
|
|
356
|
+
// case 'number':
|
|
357
|
+
// case 'boolean':
|
|
358
|
+
// // TODO - booleans could be handled differently...
|
|
359
|
+
// schema[columnName] = {name: String(columnName), index: i, type: Float32Array};
|
|
360
|
+
// break;
|
|
361
|
+
// case 'string':
|
|
362
|
+
// default:
|
|
363
|
+
// schema[columnName] = {name: String(columnName), index: i, type: Array};
|
|
364
|
+
// // We currently only handle numeric rows
|
|
365
|
+
// // TODO we could offer a function to map strings to numbers?
|
|
366
|
+
// }
|
|
367
|
+
// }
|
|
368
|
+
// return schema;
|
|
369
|
+
// }
|
package/src/csv-writer.ts
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
import type {WriterWithEncoder, WriterOptions} from '@loaders.gl/loader-utils';
|
|
7
7
|
import type {Table, TableBatch} from '@loaders.gl/schema';
|
|
8
8
|
import {encodeTableAsCSV} from './lib/encoders/encode-csv';
|
|
9
|
+
import {CSVFormat} from './csv-format';
|
|
9
10
|
|
|
10
11
|
export type CSVWriterOptions = WriterOptions & {
|
|
11
12
|
csv?: {
|
|
@@ -14,12 +15,8 @@ export type CSVWriterOptions = WriterOptions & {
|
|
|
14
15
|
};
|
|
15
16
|
|
|
16
17
|
export const CSVWriter = {
|
|
17
|
-
|
|
18
|
+
...CSVFormat,
|
|
18
19
|
version: 'latest',
|
|
19
|
-
module: 'csv',
|
|
20
|
-
name: 'CSV',
|
|
21
|
-
extensions: ['csv'],
|
|
22
|
-
mimeTypes: ['text/csv'],
|
|
23
20
|
options: {
|
|
24
21
|
csv: {
|
|
25
22
|
useDisplayNames: false
|
package/src/index.ts
CHANGED
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
// Copyright (c) vis.gl contributors
|
|
4
4
|
// Copyright 2022 Foursquare Labs, Inc.
|
|
5
5
|
|
|
6
|
-
import {Table
|
|
6
|
+
import type {Table} from '@loaders.gl/schema';
|
|
7
|
+
import {makeArrayRowIterator, getTableNumCols} from '@loaders.gl/schema-utils';
|
|
7
8
|
import {csvFormatRows} from 'd3-dsv';
|
|
8
9
|
import type {CSVWriterOptions} from '../../csv-writer';
|
|
9
10
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
//
|
|
2
|
-
//
|
|
3
|
-
//
|
|
4
|
-
//
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
// Copyright (c) 2015 Matthew Holt
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
// This is a fork of papaparse v5.0.0-beta.0 under MIT license
|
|
7
|
+
// https://github.com/mholt/PapaParse
|
|
7
8
|
|
|
8
|
-
// Note: papaparse is not an ES6 module
|
|
9
9
|
import Papa from './papaparse';
|
|
10
10
|
const {ChunkStreamer} = Papa;
|
|
11
11
|
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
// loaders.gl
|
|
2
|
+
// SPDX-License-Identifier: MIT
|
|
3
|
+
// Copyright (c) vis.gl contributors
|
|
4
|
+
// Copyright (c) 2015 Matthew Holt
|
|
5
|
+
|
|
6
|
+
// This is a fork of papaparse v5.0.0-beta.0 under MIT license
|
|
7
|
+
// https://github.com/mholt/PapaParse
|
|
8
|
+
|
|
9
|
+
const BYTE_ORDER_MARK = '\ufeff';
|
|
10
|
+
|
|
11
|
+
export const Papa = {
|
|
12
|
+
RECORD_SEP: String.fromCharCode(30),
|
|
13
|
+
UNIT_SEP: String.fromCharCode(31),
|
|
14
|
+
BYTE_ORDER_MARK,
|
|
15
|
+
BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
|
|
16
|
+
WORKERS_SUPPORTED: false, // !IS_WORKER && !!globalThis.Worker
|
|
17
|
+
NODE_STREAM_INPUT: 1,
|
|
18
|
+
|
|
19
|
+
// Configurable chunk sizes for local and remote files, respectively
|
|
20
|
+
LocalChunkSize: 1024 * 1024 * 10, // 10 M,
|
|
21
|
+
RemoteChunkSize: 1024 * 1024 * 5, // 5 M,
|
|
22
|
+
DefaultDelimiter: ',' // Used if not specified and detection fail,
|
|
23
|
+
};
|