@loaders.gl/csv 4.0.0-alpha.5 → 4.0.0-alpha.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bundle.js +2 -2
- package/dist/csv-loader.d.ts +4 -18
- package/dist/csv-loader.d.ts.map +1 -1
- package/dist/csv-loader.js +248 -220
- package/dist/csv-writer.d.ts +5 -0
- package/dist/csv-writer.d.ts.map +1 -1
- package/dist/csv-writer.js +23 -2
- package/dist/dist.min.js +351 -27
- package/dist/es5/bundle.js +6 -0
- package/dist/es5/bundle.js.map +1 -0
- package/dist/es5/csv-loader.js +246 -0
- package/dist/es5/csv-loader.js.map +1 -0
- package/dist/es5/csv-writer.js +48 -0
- package/dist/es5/csv-writer.js.map +1 -0
- package/dist/es5/index.js +20 -0
- package/dist/es5/index.js.map +1 -0
- package/dist/es5/lib/encoders/encode-csv.js +60 -0
- package/dist/es5/lib/encoders/encode-csv.js.map +1 -0
- package/dist/es5/papaparse/async-iterator-streamer.js +100 -0
- package/dist/es5/papaparse/async-iterator-streamer.js.map +1 -0
- package/dist/es5/papaparse/papaparse.js +703 -0
- package/dist/es5/papaparse/papaparse.js.map +1 -0
- package/dist/esm/bundle.js +4 -0
- package/dist/esm/bundle.js.map +1 -0
- package/dist/esm/csv-loader.js +205 -0
- package/dist/esm/csv-loader.js.map +1 -0
- package/dist/esm/csv-writer.js +20 -0
- package/dist/esm/csv-writer.js.map +1 -0
- package/dist/esm/index.js +3 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/lib/encoders/encode-csv.js +40 -0
- package/dist/esm/lib/encoders/encode-csv.js.map +1 -0
- package/dist/esm/papaparse/async-iterator-streamer.js +30 -0
- package/dist/esm/papaparse/async-iterator-streamer.js.map +1 -0
- package/dist/esm/papaparse/papaparse.js +694 -0
- package/dist/esm/papaparse/papaparse.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -2
- package/dist/lib/encoders/encode-csv.d.ts +13 -0
- package/dist/lib/encoders/encode-csv.d.ts.map +1 -0
- package/dist/lib/encoders/encode-csv.js +50 -0
- package/dist/papaparse/async-iterator-streamer.js +60 -32
- package/dist/papaparse/papaparse.js +870 -795
- package/package.json +6 -6
- package/src/csv-loader.ts +19 -13
- package/src/csv-writer.ts +29 -5
- package/src/index.ts +5 -0
- package/src/lib/encoders/encode-csv.ts +66 -0
- package/dist/bundle.js.map +0 -1
- package/dist/csv-loader.js.map +0 -1
- package/dist/csv-writer.js.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/papaparse/async-iterator-streamer.js.map +0 -1
- package/dist/papaparse/papaparse.js.map +0 -1
package/dist/bundle.js
CHANGED
package/dist/csv-loader.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { LoaderWithParser, LoaderOptions } from '@loaders.gl/loader-utils';
|
|
2
|
-
import type {
|
|
3
|
-
|
|
2
|
+
import type { TableBatch } from '@loaders.gl/schema';
|
|
3
|
+
import { Table } from 'modules/schema/src/lib/table/arrow-api';
|
|
4
|
+
export type CSVLoaderOptions = LoaderOptions & {
|
|
4
5
|
csv?: {
|
|
5
6
|
shape?: 'array-row-table' | 'object-row-table' | 'columnar-table';
|
|
6
7
|
/** optimizes memory usage but increases parsing time. */
|
|
@@ -15,20 +16,5 @@ export declare type CSVLoaderOptions = LoaderOptions & {
|
|
|
15
16
|
delimitersToGuess?: string[];
|
|
16
17
|
};
|
|
17
18
|
};
|
|
18
|
-
export declare const CSVLoader:
|
|
19
|
-
id: string;
|
|
20
|
-
module: string;
|
|
21
|
-
name: string;
|
|
22
|
-
version: any;
|
|
23
|
-
extensions: string[];
|
|
24
|
-
mimeTypes: string[];
|
|
25
|
-
category: string;
|
|
26
|
-
parse: (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions | undefined) => Promise<any>;
|
|
27
|
-
parseText: (text: string, options?: CSVLoaderOptions | undefined) => Promise<any>;
|
|
28
|
-
parseInBatches: typeof parseCSVInBatches;
|
|
29
|
-
options: CSVLoaderOptions;
|
|
30
|
-
};
|
|
31
|
-
declare function parseCSVInBatches(asyncIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>, options?: CSVLoaderOptions): AsyncIterable<Batch>;
|
|
32
|
-
export declare const _typecheckCSVLoader: LoaderWithParser;
|
|
33
|
-
export {};
|
|
19
|
+
export declare const CSVLoader: LoaderWithParser<Table, TableBatch, CSVLoaderOptions>;
|
|
34
20
|
//# sourceMappingURL=csv-loader.d.ts.map
|
package/dist/csv-loader.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"csv-loader.d.ts","sourceRoot":"","sources":["../src/csv-loader.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"csv-loader.d.ts","sourceRoot":"","sources":["../src/csv-loader.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAC,gBAAgB,EAAE,aAAa,EAAC,MAAM,0BAA0B,CAAC;AAC9E,OAAO,KAAK,EAAQ,UAAU,EAAC,MAAM,oBAAoB,CAAC;AAU1D,OAAO,EAAC,KAAK,EAAC,MAAM,wCAAwC,CAAC;AAS7D,MAAM,MAAM,gBAAgB,GAAG,aAAa,GAAG;IAC7C,GAAG,CAAC,EAAE;QAEJ,KAAK,CAAC,EAAE,iBAAiB,GAAG,kBAAkB,GAAG,gBAAgB,CAAC;QAClE,yDAAyD;QACzD,mBAAmB,CAAC,EAAE,OAAO,CAAC;QAC9B,YAAY,CAAC,EAAE,MAAM,CAAC;QACtB,MAAM,CAAC,EAAE,MAAM,CAAC;QAKhB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,CAAC;QAEpB,aAAa,CAAC,EAAE,OAAO,CAAC;QACxB,QAAQ,CAAC,EAAE,OAAO,CAAC;QACnB,cAAc,CAAC,EAAE,OAAO,GAAG,QAAQ,CAAC;QAEpC,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;KAE9B,CAAC;CACH,CAAC;AAsBF,eAAO,MAAM,SAAS,EAAE,gBAAgB,CAAC,KAAK,EAAE,UAAU,EAAE,gBAAgB,CAe3E,CAAC"}
|
package/dist/csv-loader.js
CHANGED
|
@@ -1,240 +1,268 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
1
|
+
"use strict";
|
|
2
|
+
// loaders.gl, MIT license
|
|
3
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
4
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
5
|
+
};
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.CSVLoader = void 0;
|
|
8
|
+
const schema_1 = require("@loaders.gl/schema");
|
|
9
|
+
const papaparse_1 = __importDefault(require("./papaparse/papaparse"));
|
|
10
|
+
const async_iterator_streamer_1 = __importDefault(require("./papaparse/async-iterator-streamer"));
|
|
11
|
+
// __VERSION__ is injected by babel-plugin-version-inline
|
|
12
|
+
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
|
|
13
|
+
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
|
|
5
14
|
const DEFAULT_CSV_LOADER_OPTIONS = {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
15
|
+
csv: {
|
|
16
|
+
shape: 'object-row-table',
|
|
17
|
+
optimizeMemoryUsage: false,
|
|
18
|
+
// CSV options
|
|
19
|
+
header: 'auto',
|
|
20
|
+
columnPrefix: 'column',
|
|
21
|
+
// delimiter: auto
|
|
22
|
+
// newline: auto
|
|
23
|
+
quoteChar: '"',
|
|
24
|
+
escapeChar: '"',
|
|
25
|
+
dynamicTyping: true,
|
|
26
|
+
comments: false,
|
|
27
|
+
skipEmptyLines: true,
|
|
28
|
+
// transform: null?
|
|
29
|
+
delimitersToGuess: [',', '\t', '|', ';']
|
|
30
|
+
// fastMode: auto
|
|
31
|
+
}
|
|
18
32
|
};
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
33
|
+
exports.CSVLoader = {
|
|
34
|
+
id: 'csv',
|
|
35
|
+
module: 'csv',
|
|
36
|
+
name: 'CSV',
|
|
37
|
+
version: VERSION,
|
|
38
|
+
extensions: ['csv', 'tsv', 'dsv'],
|
|
39
|
+
mimeTypes: ['text/csv', 'text/tab-separated-values', 'text/dsv'],
|
|
40
|
+
category: 'table',
|
|
41
|
+
parse: async (arrayBuffer, options) => parseCSV(new TextDecoder().decode(arrayBuffer), options),
|
|
42
|
+
parseText: (text, options) => parseCSV(text, options),
|
|
43
|
+
parseInBatches: parseCSVInBatches,
|
|
44
|
+
// @ts-ignore
|
|
45
|
+
// testText: null,
|
|
46
|
+
options: DEFAULT_CSV_LOADER_OPTIONS
|
|
31
47
|
};
|
|
32
|
-
|
|
33
48
|
async function parseCSV(csvText, options) {
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
49
|
+
// Apps can call the parse method directly, we so apply default options here
|
|
50
|
+
const csvOptions = { ...DEFAULT_CSV_LOADER_OPTIONS.csv, ...options?.csv };
|
|
51
|
+
const firstRow = readFirstRow(csvText);
|
|
52
|
+
const header = csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);
|
|
53
|
+
const parseWithHeader = header;
|
|
54
|
+
const papaparseConfig = {
|
|
55
|
+
// dynamicTyping: true,
|
|
56
|
+
...csvOptions,
|
|
57
|
+
header: parseWithHeader,
|
|
58
|
+
download: false,
|
|
59
|
+
transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,
|
|
60
|
+
error: (e) => {
|
|
61
|
+
throw new Error(e);
|
|
62
|
+
}
|
|
63
|
+
};
|
|
64
|
+
const result = papaparse_1.default.parse(csvText, papaparseConfig);
|
|
65
|
+
let { data: rows } = result;
|
|
66
|
+
const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);
|
|
67
|
+
switch (csvOptions.shape) {
|
|
68
|
+
case 'object-row-table':
|
|
69
|
+
rows = rows.map((row) => (Array.isArray(row) ? (0, schema_1.convertToObjectRow)(row, headerRow) : row));
|
|
70
|
+
break;
|
|
71
|
+
case 'array-row-table':
|
|
72
|
+
rows = rows.map((row) => (Array.isArray(row) ? row : (0, schema_1.convertToArrayRow)(row, headerRow)));
|
|
73
|
+
break;
|
|
74
|
+
default:
|
|
75
|
+
}
|
|
76
|
+
/*
|
|
77
|
+
if (!header && shape === 'object-row-table') {
|
|
78
|
+
// If the dataset has no header, transform the array result into an object shape with an
|
|
79
|
+
// autogenerated header
|
|
80
|
+
return result.data.map((row) =>
|
|
81
|
+
row.reduce((acc, value, i) => {
|
|
82
|
+
acc[headerRow[i]] = value;
|
|
83
|
+
return acc;
|
|
84
|
+
}, {})
|
|
85
|
+
);
|
|
46
86
|
}
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
let {
|
|
50
|
-
data: rows
|
|
51
|
-
} = result;
|
|
52
|
-
const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);
|
|
53
|
-
|
|
54
|
-
switch (csvOptions.shape) {
|
|
55
|
-
case 'object-row-table':
|
|
56
|
-
rows = rows.map(row => Array.isArray(row) ? convertToObjectRow(row, headerRow) : row);
|
|
57
|
-
break;
|
|
58
|
-
|
|
59
|
-
case 'array-row-table':
|
|
60
|
-
rows = rows.map(row => Array.isArray(row) ? row : convertToArrayRow(row, headerRow));
|
|
61
|
-
break;
|
|
62
|
-
|
|
63
|
-
default:
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
return rows;
|
|
87
|
+
*/
|
|
88
|
+
return rows;
|
|
67
89
|
}
|
|
68
|
-
|
|
90
|
+
// TODO - support batch size 0 = no batching/single batch?
|
|
69
91
|
function parseCSVInBatches(asyncIterator, options) {
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
if (options.batchSize === 'auto') {
|
|
76
|
-
options.batchSize = 4000;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
const csvOptions = { ...DEFAULT_CSV_LOADER_OPTIONS.csv,
|
|
80
|
-
...((_options = options) === null || _options === void 0 ? void 0 : _options.csv)
|
|
81
|
-
};
|
|
82
|
-
const asyncQueue = new AsyncQueue();
|
|
83
|
-
let isFirstRow = true;
|
|
84
|
-
let headerRow = null;
|
|
85
|
-
let tableBatchBuilder = null;
|
|
86
|
-
let schema = null;
|
|
87
|
-
const config = { ...csvOptions,
|
|
88
|
-
header: false,
|
|
89
|
-
download: false,
|
|
90
|
-
chunkSize: 1024 * 1024 * 5,
|
|
91
|
-
skipEmptyLines: false,
|
|
92
|
-
|
|
93
|
-
step(results) {
|
|
94
|
-
let row = results.data;
|
|
95
|
-
|
|
96
|
-
if (csvOptions.skipEmptyLines) {
|
|
97
|
-
const collapsedRow = row.flat().join('').trim();
|
|
98
|
-
|
|
99
|
-
if (collapsedRow === '') {
|
|
100
|
-
return;
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
const bytesUsed = results.meta.cursor;
|
|
105
|
-
|
|
106
|
-
if (isFirstRow && !headerRow) {
|
|
107
|
-
const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);
|
|
108
|
-
|
|
109
|
-
if (header) {
|
|
110
|
-
headerRow = row.map(duplicateColumnTransformer());
|
|
111
|
-
return;
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
if (isFirstRow) {
|
|
116
|
-
isFirstRow = false;
|
|
117
|
-
|
|
118
|
-
if (!headerRow) {
|
|
119
|
-
headerRow = generateHeader(csvOptions.columnPrefix, row.length);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
schema = deduceSchema(row, headerRow);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
if (csvOptions.optimizeMemoryUsage) {
|
|
126
|
-
row = JSON.parse(JSON.stringify(row));
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
tableBatchBuilder = tableBatchBuilder || new TableBatchBuilder(schema, {
|
|
130
|
-
shape: csvOptions.shape || 'array-row-table',
|
|
131
|
-
...options
|
|
132
|
-
});
|
|
133
|
-
|
|
134
|
-
try {
|
|
135
|
-
tableBatchBuilder.addRow(row);
|
|
136
|
-
const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({
|
|
137
|
-
bytesUsed
|
|
138
|
-
});
|
|
139
|
-
|
|
140
|
-
if (batch) {
|
|
141
|
-
asyncQueue.enqueue(batch);
|
|
142
|
-
}
|
|
143
|
-
} catch (error) {
|
|
144
|
-
asyncQueue.enqueue(error);
|
|
145
|
-
}
|
|
146
|
-
},
|
|
147
|
-
|
|
148
|
-
complete(results) {
|
|
149
|
-
try {
|
|
150
|
-
const bytesUsed = results.meta.cursor;
|
|
151
|
-
const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({
|
|
152
|
-
bytesUsed
|
|
153
|
-
});
|
|
154
|
-
|
|
155
|
-
if (batch) {
|
|
156
|
-
asyncQueue.enqueue(batch);
|
|
157
|
-
}
|
|
158
|
-
} catch (error) {
|
|
159
|
-
asyncQueue.enqueue(error);
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
asyncQueue.close();
|
|
92
|
+
// Papaparse does not support standard batch size handling
|
|
93
|
+
// TODO - investigate papaparse chunks mode
|
|
94
|
+
options = { ...options };
|
|
95
|
+
if (options.batchSize === 'auto') {
|
|
96
|
+
options.batchSize = 4000;
|
|
163
97
|
}
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
98
|
+
// Apps can call the parse method directly, we so apply default options here
|
|
99
|
+
const csvOptions = { ...DEFAULT_CSV_LOADER_OPTIONS.csv, ...options?.csv };
|
|
100
|
+
const asyncQueue = new schema_1.AsyncQueue();
|
|
101
|
+
let isFirstRow = true;
|
|
102
|
+
let headerRow = null;
|
|
103
|
+
let tableBatchBuilder = null;
|
|
104
|
+
let schema = null;
|
|
105
|
+
const config = {
|
|
106
|
+
// dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
|
|
107
|
+
...csvOptions,
|
|
108
|
+
header: false,
|
|
109
|
+
download: false,
|
|
110
|
+
// chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
|
|
111
|
+
// streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
|
|
112
|
+
// See https://github.com/mholt/PapaParse/issues/465
|
|
113
|
+
chunkSize: 1024 * 1024 * 5,
|
|
114
|
+
// skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
|
|
115
|
+
// skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
|
|
116
|
+
// both of the skipEmptyLines and step callback options are provided:
|
|
117
|
+
// - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
|
|
118
|
+
// - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
|
|
119
|
+
skipEmptyLines: false,
|
|
120
|
+
// step is called on every row
|
|
121
|
+
// eslint-disable-next-line complexity
|
|
122
|
+
step(results) {
|
|
123
|
+
let row = results.data;
|
|
124
|
+
if (csvOptions.skipEmptyLines) {
|
|
125
|
+
// Manually reject lines that are empty
|
|
126
|
+
const collapsedRow = row.flat().join('').trim();
|
|
127
|
+
if (collapsedRow === '') {
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
const bytesUsed = results.meta.cursor;
|
|
132
|
+
// Check if we need to save a header row
|
|
133
|
+
if (isFirstRow && !headerRow) {
|
|
134
|
+
// Auto detects or can be forced with csvOptions.header
|
|
135
|
+
const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);
|
|
136
|
+
if (header) {
|
|
137
|
+
headerRow = row.map(duplicateColumnTransformer());
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// If first data row, we can deduce the schema
|
|
142
|
+
if (isFirstRow) {
|
|
143
|
+
isFirstRow = false;
|
|
144
|
+
if (!headerRow) {
|
|
145
|
+
headerRow = generateHeader(csvOptions.columnPrefix, row.length);
|
|
146
|
+
}
|
|
147
|
+
schema = deduceSchema(row, headerRow);
|
|
148
|
+
}
|
|
149
|
+
if (csvOptions.optimizeMemoryUsage) {
|
|
150
|
+
// A workaround to allocate new strings and don't retain pointers to original strings.
|
|
151
|
+
// https://bugs.chromium.org/p/v8/issues/detail?id=2869
|
|
152
|
+
row = JSON.parse(JSON.stringify(row));
|
|
153
|
+
}
|
|
154
|
+
// Add the row
|
|
155
|
+
tableBatchBuilder =
|
|
156
|
+
tableBatchBuilder ||
|
|
157
|
+
new schema_1.TableBatchBuilder(
|
|
158
|
+
// @ts-expect-error TODO this is not a proper schema
|
|
159
|
+
schema, {
|
|
160
|
+
shape: csvOptions.shape || 'array-row-table',
|
|
161
|
+
...options
|
|
162
|
+
});
|
|
163
|
+
try {
|
|
164
|
+
tableBatchBuilder.addRow(row);
|
|
165
|
+
// If a batch has been completed, emit it
|
|
166
|
+
const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({ bytesUsed });
|
|
167
|
+
if (batch) {
|
|
168
|
+
asyncQueue.enqueue(batch);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
catch (error) {
|
|
172
|
+
asyncQueue.enqueue(error);
|
|
173
|
+
}
|
|
174
|
+
},
|
|
175
|
+
// complete is called when all rows have been read
|
|
176
|
+
complete(results) {
|
|
177
|
+
try {
|
|
178
|
+
const bytesUsed = results.meta.cursor;
|
|
179
|
+
// Ensure any final (partial) batch gets emitted
|
|
180
|
+
const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({ bytesUsed });
|
|
181
|
+
if (batch) {
|
|
182
|
+
asyncQueue.enqueue(batch);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
catch (error) {
|
|
186
|
+
asyncQueue.enqueue(error);
|
|
187
|
+
}
|
|
188
|
+
asyncQueue.close();
|
|
189
|
+
}
|
|
190
|
+
};
|
|
191
|
+
papaparse_1.default.parse(asyncIterator, config, async_iterator_streamer_1.default);
|
|
192
|
+
// TODO - Does it matter if we return asyncIterable or asyncIterator
|
|
193
|
+
// return asyncQueue[Symbol.asyncIterator]();
|
|
194
|
+
return asyncQueue;
|
|
168
195
|
}
|
|
169
|
-
|
|
196
|
+
/**
|
|
197
|
+
* Checks if a certain row is a header row
|
|
198
|
+
* @param row the row to check
|
|
199
|
+
* @returns true if the row looks like a header
|
|
200
|
+
*/
|
|
170
201
|
function isHeaderRow(row) {
|
|
171
|
-
|
|
202
|
+
return row && row.every((value) => typeof value === 'string');
|
|
172
203
|
}
|
|
173
|
-
|
|
204
|
+
/**
|
|
205
|
+
* Reads, parses, and returns the first row of a CSV text
|
|
206
|
+
* @param csvText the csv text to parse
|
|
207
|
+
* @returns the first row
|
|
208
|
+
*/
|
|
174
209
|
function readFirstRow(csvText) {
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
210
|
+
const result = papaparse_1.default.parse(csvText, {
|
|
211
|
+
download: false,
|
|
212
|
+
dynamicTyping: true,
|
|
213
|
+
preview: 1
|
|
214
|
+
});
|
|
215
|
+
return result.data[0];
|
|
181
216
|
}
|
|
182
|
-
|
|
217
|
+
/**
|
|
218
|
+
* Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
|
|
219
|
+
* duplicate header columns and would use the latest occurrence by default.
|
|
220
|
+
* See the header option in https://www.papaparse.com/docs#config
|
|
221
|
+
* @returns a transform function that returns sanitized names for duplicate fields
|
|
222
|
+
*/
|
|
183
223
|
function duplicateColumnTransformer() {
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
return colName;
|
|
196
|
-
};
|
|
224
|
+
const observedColumns = new Set();
|
|
225
|
+
return (col) => {
|
|
226
|
+
let colName = col;
|
|
227
|
+
let counter = 1;
|
|
228
|
+
while (observedColumns.has(colName)) {
|
|
229
|
+
colName = `${col}.${counter}`;
|
|
230
|
+
counter++;
|
|
231
|
+
}
|
|
232
|
+
observedColumns.add(colName);
|
|
233
|
+
return colName;
|
|
234
|
+
};
|
|
197
235
|
}
|
|
198
|
-
|
|
236
|
+
/**
|
|
237
|
+
* Generates the header of a CSV given a prefix and a column count
|
|
238
|
+
* @param columnPrefix the columnPrefix to use
|
|
239
|
+
* @param count the count of column names to generate
|
|
240
|
+
* @returns an array of column names
|
|
241
|
+
*/
|
|
199
242
|
function generateHeader(columnPrefix, count = 0) {
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
return headers;
|
|
243
|
+
const headers = [];
|
|
244
|
+
for (let i = 0; i < count; i++) {
|
|
245
|
+
headers.push(`${columnPrefix}${i + 1}`);
|
|
246
|
+
}
|
|
247
|
+
return headers;
|
|
207
248
|
}
|
|
208
|
-
|
|
209
249
|
function deduceSchema(row, headerRow) {
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
case 'string':
|
|
227
|
-
default:
|
|
228
|
-
schema[columnName] = {
|
|
229
|
-
name: String(columnName),
|
|
230
|
-
index: i,
|
|
231
|
-
type: Array
|
|
232
|
-
};
|
|
250
|
+
const schema = headerRow ? {} : [];
|
|
251
|
+
for (let i = 0; i < row.length; i++) {
|
|
252
|
+
const columnName = (headerRow && headerRow[i]) || i;
|
|
253
|
+
const value = row[i];
|
|
254
|
+
switch (typeof value) {
|
|
255
|
+
case 'number':
|
|
256
|
+
case 'boolean':
|
|
257
|
+
// TODO - booleans could be handled differently...
|
|
258
|
+
schema[columnName] = { name: String(columnName), index: i, type: Float32Array };
|
|
259
|
+
break;
|
|
260
|
+
case 'string':
|
|
261
|
+
default:
|
|
262
|
+
schema[columnName] = { name: String(columnName), index: i, type: Array };
|
|
263
|
+
// We currently only handle numeric rows
|
|
264
|
+
// TODO we could offer a function to map strings to numbers?
|
|
265
|
+
}
|
|
233
266
|
}
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
return schema;
|
|
267
|
+
return schema;
|
|
237
268
|
}
|
|
238
|
-
|
|
239
|
-
export const _typecheckCSVLoader = CSVLoader;
|
|
240
|
-
//# sourceMappingURL=csv-loader.js.map
|
package/dist/csv-writer.d.ts
CHANGED
|
@@ -1 +1,6 @@
|
|
|
1
|
+
import type { Writer } from '@loaders.gl/loader-utils';
|
|
2
|
+
import type { Table, TableBatch } from '@loaders.gl/schema';
|
|
3
|
+
import type { CSVWriterOptions } from './lib/encoders/encode-csv';
|
|
4
|
+
export type { CSVWriterOptions };
|
|
5
|
+
export declare const CSVWriter: Writer<Table, TableBatch, CSVWriterOptions>;
|
|
1
6
|
//# sourceMappingURL=csv-writer.d.ts.map
|
package/dist/csv-writer.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"csv-writer.d.ts","sourceRoot":"","sources":["../src/csv-writer.ts"],"names":[],"mappings":""}
|
|
1
|
+
{"version":3,"file":"csv-writer.d.ts","sourceRoot":"","sources":["../src/csv-writer.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAC,MAAM,EAAC,MAAM,0BAA0B,CAAC;AACrD,OAAO,KAAK,EAAC,KAAK,EAAE,UAAU,EAAC,MAAM,oBAAoB,CAAC;AAC1D,OAAO,KAAK,EAAC,gBAAgB,EAAC,MAAM,2BAA2B,CAAC;AAGhE,YAAY,EAAC,gBAAgB,EAAC,CAAC;AAS/B,eAAO,MAAM,SAAS,EAAE,MAAM,CAAC,KAAK,EAAE,UAAU,EAAE,gBAAgB,CAYjE,CAAC"}
|
package/dist/csv-writer.js
CHANGED
|
@@ -1,2 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
"use strict";
|
|
2
|
+
// loaders.gl, MIT license
|
|
3
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
+
exports.CSVWriter = void 0;
|
|
5
|
+
const encode_csv_1 = require("./lib/encoders/encode-csv");
|
|
6
|
+
const DEFAULT_WRITER_OPTIONS = {
|
|
7
|
+
csv: {
|
|
8
|
+
useDisplayNames: false
|
|
9
|
+
},
|
|
10
|
+
useDisplayNames: false
|
|
11
|
+
};
|
|
12
|
+
exports.CSVWriter = {
|
|
13
|
+
id: 'csv',
|
|
14
|
+
version: 'latest',
|
|
15
|
+
module: 'csv',
|
|
16
|
+
name: 'CSV',
|
|
17
|
+
extensions: ['csv'],
|
|
18
|
+
mimeTypes: ['text/csv'],
|
|
19
|
+
options: DEFAULT_WRITER_OPTIONS,
|
|
20
|
+
text: true,
|
|
21
|
+
encode: async (table, options) => new TextEncoder().encode((0, encode_csv_1.encodeTableAsCSV)(table, options)).buffer,
|
|
22
|
+
encodeText: (table, options) => (0, encode_csv_1.encodeTableAsCSV)(table, options)
|
|
23
|
+
};
|