@loaders.gl/csv 4.0.0-alpha.23 → 4.0.0-alpha.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es5/csv-loader.js +1 -1
- package/dist/esm/csv-loader.js +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +4 -4
- package/dist/src/bundle.js +0 -5
- package/dist/src/csv-loader.js +0 -258
- package/dist/src/csv-writer.js +0 -23
- package/dist/src/index.js +0 -8
- package/dist/src/lib/encoders/encode-csv.js +0 -50
- package/dist/src/papaparse/async-iterator-streamer.js +0 -63
- package/dist/src/papaparse/papaparse.js +0 -935
package/dist/src/csv-loader.js
DELETED
|
@@ -1,258 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
// loaders.gl, MIT license
|
|
3
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
4
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
5
|
-
};
|
|
6
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
-
exports.CSVLoader = void 0;
|
|
8
|
-
const schema_1 = require("@loaders.gl/schema");
|
|
9
|
-
const papaparse_1 = __importDefault(require("./papaparse/papaparse"));
|
|
10
|
-
const async_iterator_streamer_1 = __importDefault(require("./papaparse/async-iterator-streamer"));
|
|
11
|
-
// __VERSION__ is injected by babel-plugin-version-inline
|
|
12
|
-
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
|
|
13
|
-
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
|
|
14
|
-
exports.CSVLoader = {
|
|
15
|
-
id: 'csv',
|
|
16
|
-
module: 'csv',
|
|
17
|
-
name: 'CSV',
|
|
18
|
-
version: VERSION,
|
|
19
|
-
extensions: ['csv', 'tsv', 'dsv'],
|
|
20
|
-
mimeTypes: ['text/csv', 'text/tab-separated-values', 'text/dsv'],
|
|
21
|
-
category: 'table',
|
|
22
|
-
parse: async (arrayBuffer, options) => parseCSV(new TextDecoder().decode(arrayBuffer), options),
|
|
23
|
-
parseText: (text, options) => parseCSV(text, options),
|
|
24
|
-
parseInBatches: parseCSVInBatches,
|
|
25
|
-
// @ts-ignore
|
|
26
|
-
// testText: null,
|
|
27
|
-
options: {
|
|
28
|
-
csv: {
|
|
29
|
-
shape: 'object-row-table',
|
|
30
|
-
optimizeMemoryUsage: false,
|
|
31
|
-
// CSV options
|
|
32
|
-
header: 'auto',
|
|
33
|
-
columnPrefix: 'column',
|
|
34
|
-
// delimiter: auto
|
|
35
|
-
// newline: auto
|
|
36
|
-
quoteChar: '"',
|
|
37
|
-
escapeChar: '"',
|
|
38
|
-
dynamicTyping: true,
|
|
39
|
-
comments: false,
|
|
40
|
-
skipEmptyLines: true,
|
|
41
|
-
// transform: null?
|
|
42
|
-
delimitersToGuess: [',', '\t', '|', ';']
|
|
43
|
-
// fastMode: auto
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
};
|
|
47
|
-
async function parseCSV(csvText, options) {
|
|
48
|
-
// Apps can call the parse method directly, we so apply default options here
|
|
49
|
-
const csvOptions = { ...exports.CSVLoader.options.csv, ...options?.csv };
|
|
50
|
-
const firstRow = readFirstRow(csvText);
|
|
51
|
-
const header = csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);
|
|
52
|
-
const parseWithHeader = header;
|
|
53
|
-
const papaparseConfig = {
|
|
54
|
-
// dynamicTyping: true,
|
|
55
|
-
...csvOptions,
|
|
56
|
-
header: parseWithHeader,
|
|
57
|
-
download: false,
|
|
58
|
-
transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,
|
|
59
|
-
error: (e) => {
|
|
60
|
-
throw new Error(e);
|
|
61
|
-
}
|
|
62
|
-
};
|
|
63
|
-
const result = papaparse_1.default.parse(csvText, papaparseConfig);
|
|
64
|
-
const rows = result.data;
|
|
65
|
-
const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);
|
|
66
|
-
switch (csvOptions.shape || 'object-row-table') {
|
|
67
|
-
case 'object-row-table':
|
|
68
|
-
return {
|
|
69
|
-
shape: 'object-row-table',
|
|
70
|
-
data: rows.map((row) => (Array.isArray(row) ? (0, schema_1.convertToObjectRow)(row, headerRow) : row))
|
|
71
|
-
};
|
|
72
|
-
case 'array-row-table':
|
|
73
|
-
return {
|
|
74
|
-
shape: 'array-row-table',
|
|
75
|
-
data: rows.map((row) => (Array.isArray(row) ? row : (0, schema_1.convertToArrayRow)(row, headerRow)))
|
|
76
|
-
};
|
|
77
|
-
}
|
|
78
|
-
throw new Error(csvOptions.shape);
|
|
79
|
-
}
|
|
80
|
-
// TODO - support batch size 0 = no batching/single batch?
|
|
81
|
-
function parseCSVInBatches(asyncIterator, options) {
|
|
82
|
-
// Papaparse does not support standard batch size handling
|
|
83
|
-
// TODO - investigate papaparse chunks mode
|
|
84
|
-
options = { ...options };
|
|
85
|
-
if (options.batchSize === 'auto') {
|
|
86
|
-
options.batchSize = 4000;
|
|
87
|
-
}
|
|
88
|
-
// Apps can call the parse method directly, we so apply default options here
|
|
89
|
-
const csvOptions = { ...exports.CSVLoader.options.csv, ...options?.csv };
|
|
90
|
-
const asyncQueue = new schema_1.AsyncQueue();
|
|
91
|
-
let isFirstRow = true;
|
|
92
|
-
let headerRow = null;
|
|
93
|
-
let tableBatchBuilder = null;
|
|
94
|
-
let schema = null;
|
|
95
|
-
const config = {
|
|
96
|
-
// dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
|
|
97
|
-
...csvOptions,
|
|
98
|
-
header: false,
|
|
99
|
-
download: false,
|
|
100
|
-
// chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
|
|
101
|
-
// streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
|
|
102
|
-
// See https://github.com/mholt/PapaParse/issues/465
|
|
103
|
-
chunkSize: 1024 * 1024 * 5,
|
|
104
|
-
// skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
|
|
105
|
-
// skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
|
|
106
|
-
// both of the skipEmptyLines and step callback options are provided:
|
|
107
|
-
// - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
|
|
108
|
-
// - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
|
|
109
|
-
skipEmptyLines: false,
|
|
110
|
-
// step is called on every row
|
|
111
|
-
// eslint-disable-next-line complexity
|
|
112
|
-
step(results) {
|
|
113
|
-
let row = results.data;
|
|
114
|
-
if (csvOptions.skipEmptyLines) {
|
|
115
|
-
// Manually reject lines that are empty
|
|
116
|
-
const collapsedRow = row.flat().join('').trim();
|
|
117
|
-
if (collapsedRow === '') {
|
|
118
|
-
return;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
const bytesUsed = results.meta.cursor;
|
|
122
|
-
// Check if we need to save a header row
|
|
123
|
-
if (isFirstRow && !headerRow) {
|
|
124
|
-
// Auto detects or can be forced with csvOptions.header
|
|
125
|
-
const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);
|
|
126
|
-
if (header) {
|
|
127
|
-
headerRow = row.map(duplicateColumnTransformer());
|
|
128
|
-
return;
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
// If first data row, we can deduce the schema
|
|
132
|
-
if (isFirstRow) {
|
|
133
|
-
isFirstRow = false;
|
|
134
|
-
if (!headerRow) {
|
|
135
|
-
headerRow = generateHeader(csvOptions.columnPrefix, row.length);
|
|
136
|
-
}
|
|
137
|
-
schema = deduceSchema(row, headerRow);
|
|
138
|
-
}
|
|
139
|
-
if (csvOptions.optimizeMemoryUsage) {
|
|
140
|
-
// A workaround to allocate new strings and don't retain pointers to original strings.
|
|
141
|
-
// https://bugs.chromium.org/p/v8/issues/detail?id=2869
|
|
142
|
-
row = JSON.parse(JSON.stringify(row));
|
|
143
|
-
}
|
|
144
|
-
// Add the row
|
|
145
|
-
tableBatchBuilder =
|
|
146
|
-
tableBatchBuilder ||
|
|
147
|
-
new schema_1.TableBatchBuilder(
|
|
148
|
-
// @ts-expect-error TODO this is not a proper schema
|
|
149
|
-
schema, {
|
|
150
|
-
shape: csvOptions.shape || 'array-row-table',
|
|
151
|
-
...options
|
|
152
|
-
});
|
|
153
|
-
try {
|
|
154
|
-
tableBatchBuilder.addRow(row);
|
|
155
|
-
// If a batch has been completed, emit it
|
|
156
|
-
const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({ bytesUsed });
|
|
157
|
-
if (batch) {
|
|
158
|
-
asyncQueue.enqueue(batch);
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
catch (error) {
|
|
162
|
-
asyncQueue.enqueue(error);
|
|
163
|
-
}
|
|
164
|
-
},
|
|
165
|
-
// complete is called when all rows have been read
|
|
166
|
-
complete(results) {
|
|
167
|
-
try {
|
|
168
|
-
const bytesUsed = results.meta.cursor;
|
|
169
|
-
// Ensure any final (partial) batch gets emitted
|
|
170
|
-
const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({ bytesUsed });
|
|
171
|
-
if (batch) {
|
|
172
|
-
asyncQueue.enqueue(batch);
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
catch (error) {
|
|
176
|
-
asyncQueue.enqueue(error);
|
|
177
|
-
}
|
|
178
|
-
asyncQueue.close();
|
|
179
|
-
}
|
|
180
|
-
};
|
|
181
|
-
papaparse_1.default.parse(asyncIterator, config, async_iterator_streamer_1.default);
|
|
182
|
-
// TODO - Does it matter if we return asyncIterable or asyncIterator
|
|
183
|
-
// return asyncQueue[Symbol.asyncIterator]();
|
|
184
|
-
return asyncQueue;
|
|
185
|
-
}
|
|
186
|
-
/**
|
|
187
|
-
* Checks if a certain row is a header row
|
|
188
|
-
* @param row the row to check
|
|
189
|
-
* @returns true if the row looks like a header
|
|
190
|
-
*/
|
|
191
|
-
function isHeaderRow(row) {
|
|
192
|
-
return row && row.every((value) => typeof value === 'string');
|
|
193
|
-
}
|
|
194
|
-
/**
|
|
195
|
-
* Reads, parses, and returns the first row of a CSV text
|
|
196
|
-
* @param csvText the csv text to parse
|
|
197
|
-
* @returns the first row
|
|
198
|
-
*/
|
|
199
|
-
function readFirstRow(csvText) {
|
|
200
|
-
const result = papaparse_1.default.parse(csvText, {
|
|
201
|
-
download: false,
|
|
202
|
-
dynamicTyping: true,
|
|
203
|
-
preview: 1
|
|
204
|
-
});
|
|
205
|
-
return result.data[0];
|
|
206
|
-
}
|
|
207
|
-
/**
|
|
208
|
-
* Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
|
|
209
|
-
* duplicate header columns and would use the latest occurrence by default.
|
|
210
|
-
* See the header option in https://www.papaparse.com/docs#config
|
|
211
|
-
* @returns a transform function that returns sanitized names for duplicate fields
|
|
212
|
-
*/
|
|
213
|
-
function duplicateColumnTransformer() {
|
|
214
|
-
const observedColumns = new Set();
|
|
215
|
-
return (col) => {
|
|
216
|
-
let colName = col;
|
|
217
|
-
let counter = 1;
|
|
218
|
-
while (observedColumns.has(colName)) {
|
|
219
|
-
colName = `${col}.${counter}`;
|
|
220
|
-
counter++;
|
|
221
|
-
}
|
|
222
|
-
observedColumns.add(colName);
|
|
223
|
-
return colName;
|
|
224
|
-
};
|
|
225
|
-
}
|
|
226
|
-
/**
|
|
227
|
-
* Generates the header of a CSV given a prefix and a column count
|
|
228
|
-
* @param columnPrefix the columnPrefix to use
|
|
229
|
-
* @param count the count of column names to generate
|
|
230
|
-
* @returns an array of column names
|
|
231
|
-
*/
|
|
232
|
-
function generateHeader(columnPrefix, count = 0) {
|
|
233
|
-
const headers = [];
|
|
234
|
-
for (let i = 0; i < count; i++) {
|
|
235
|
-
headers.push(`${columnPrefix}${i + 1}`);
|
|
236
|
-
}
|
|
237
|
-
return headers;
|
|
238
|
-
}
|
|
239
|
-
function deduceSchema(row, headerRow) {
|
|
240
|
-
const schema = headerRow ? {} : [];
|
|
241
|
-
for (let i = 0; i < row.length; i++) {
|
|
242
|
-
const columnName = (headerRow && headerRow[i]) || i;
|
|
243
|
-
const value = row[i];
|
|
244
|
-
switch (typeof value) {
|
|
245
|
-
case 'number':
|
|
246
|
-
case 'boolean':
|
|
247
|
-
// TODO - booleans could be handled differently...
|
|
248
|
-
schema[columnName] = { name: String(columnName), index: i, type: Float32Array };
|
|
249
|
-
break;
|
|
250
|
-
case 'string':
|
|
251
|
-
default:
|
|
252
|
-
schema[columnName] = { name: String(columnName), index: i, type: Array };
|
|
253
|
-
// We currently only handle numeric rows
|
|
254
|
-
// TODO we could offer a function to map strings to numbers?
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
return schema;
|
|
258
|
-
}
|
package/dist/src/csv-writer.js
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
// loaders.gl, MIT license
|
|
3
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
-
exports.CSVWriter = void 0;
|
|
5
|
-
const encode_csv_1 = require("./lib/encoders/encode-csv");
|
|
6
|
-
const DEFAULT_WRITER_OPTIONS = {
|
|
7
|
-
csv: {
|
|
8
|
-
useDisplayNames: false
|
|
9
|
-
},
|
|
10
|
-
useDisplayNames: false
|
|
11
|
-
};
|
|
12
|
-
exports.CSVWriter = {
|
|
13
|
-
id: 'csv',
|
|
14
|
-
version: 'latest',
|
|
15
|
-
module: 'csv',
|
|
16
|
-
name: 'CSV',
|
|
17
|
-
extensions: ['csv'],
|
|
18
|
-
mimeTypes: ['text/csv'],
|
|
19
|
-
options: DEFAULT_WRITER_OPTIONS,
|
|
20
|
-
text: true,
|
|
21
|
-
encode: async (table, options) => new TextEncoder().encode((0, encode_csv_1.encodeTableAsCSV)(table, options)).buffer,
|
|
22
|
-
encodeText: (table, options) => (0, encode_csv_1.encodeTableAsCSV)(table, options)
|
|
23
|
-
};
|
package/dist/src/index.js
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
// loaders.gl, MIT license
|
|
3
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
4
|
-
exports.CSVWriter = exports.CSVLoader = void 0;
|
|
5
|
-
var csv_loader_1 = require("./csv-loader");
|
|
6
|
-
Object.defineProperty(exports, "CSVLoader", { enumerable: true, get: function () { return csv_loader_1.CSVLoader; } });
|
|
7
|
-
var csv_writer_1 = require("./csv-writer");
|
|
8
|
-
Object.defineProperty(exports, "CSVWriter", { enumerable: true, get: function () { return csv_writer_1.CSVWriter; } });
|
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
// loaders.gl, MIT license
|
|
3
|
-
// Copyright 2022 Foursquare Labs, Inc.
|
|
4
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
-
exports.encodeTableAsCSV = void 0;
|
|
6
|
-
const schema_1 = require("@loaders.gl/schema");
|
|
7
|
-
const d3_dsv_1 = require("d3-dsv");
|
|
8
|
-
/**
|
|
9
|
-
* Encode a Table object as CSV
|
|
10
|
-
*/
|
|
11
|
-
function encodeTableAsCSV(table, options = { csv: { useDisplayNames: true } }) {
|
|
12
|
-
const useDisplayNames = options.useDisplayNames || options.csv?.useDisplayNames;
|
|
13
|
-
const fields = table.schema?.fields || [];
|
|
14
|
-
const columnNames = fields.map((f) => {
|
|
15
|
-
// This is a leaky abstraction, assuming Kepler metadata
|
|
16
|
-
const displayName = f.metadata?.displayName;
|
|
17
|
-
return useDisplayNames && typeof displayName === 'string' ? displayName : f.name;
|
|
18
|
-
});
|
|
19
|
-
const formattedData = [columnNames];
|
|
20
|
-
for (const row of (0, schema_1.makeArrayRowIterator)(table)) {
|
|
21
|
-
const formattedRow = [];
|
|
22
|
-
for (let columnIndex = 0; columnIndex < (0, schema_1.getTableNumCols)(table); ++columnIndex) {
|
|
23
|
-
const value = row[columnIndex];
|
|
24
|
-
formattedRow[columnIndex] = preformatFieldValue(value);
|
|
25
|
-
}
|
|
26
|
-
formattedData.push(formattedRow);
|
|
27
|
-
}
|
|
28
|
-
return (0, d3_dsv_1.csvFormatRows)(formattedData);
|
|
29
|
-
}
|
|
30
|
-
exports.encodeTableAsCSV = encodeTableAsCSV;
|
|
31
|
-
/**
|
|
32
|
-
* Stringifies a value
|
|
33
|
-
* @todo Why is it called parse?
|
|
34
|
-
*/
|
|
35
|
-
const preformatFieldValue = (value) => {
|
|
36
|
-
if (value === null || value === undefined) {
|
|
37
|
-
// TODO: It would be nice to distinguish between missing values and the empty string
|
|
38
|
-
// https://github.com/d3/d3-dsv/issues/84
|
|
39
|
-
return null;
|
|
40
|
-
}
|
|
41
|
-
if (value instanceof Date) {
|
|
42
|
-
// d3-dsv formats dates without timezones if they don't have time info;
|
|
43
|
-
// this forces them to always use fully-qualified ISO time strings
|
|
44
|
-
return value.toISOString();
|
|
45
|
-
}
|
|
46
|
-
if (typeof value === 'object') {
|
|
47
|
-
return JSON.stringify(value);
|
|
48
|
-
}
|
|
49
|
-
return String(value);
|
|
50
|
-
};
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
// @ts-nocheck
|
|
3
|
-
// A custom papaparse `Streamer` for async iterators
|
|
4
|
-
// Ideally this can be contributed back to papaparse
|
|
5
|
-
// Or papaparse can expose Streamer API so we can extend without forking.
|
|
6
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
7
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
8
|
-
};
|
|
9
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
10
|
-
/* eslint-disable no-invalid-this */
|
|
11
|
-
// Note: papaparse is not an ES6 module
|
|
12
|
-
const papaparse_1 = __importDefault(require("./papaparse"));
|
|
13
|
-
const { ChunkStreamer } = papaparse_1.default;
|
|
14
|
-
function AsyncIteratorStreamer(config) {
|
|
15
|
-
config = config || {};
|
|
16
|
-
ChunkStreamer.call(this, config);
|
|
17
|
-
this.textDecoder = new TextDecoder(this._config.encoding);
|
|
18
|
-
// Implement ChunkStreamer base class methods
|
|
19
|
-
// this.pause = function() {
|
|
20
|
-
// ChunkStreamer.prototype.pause.apply(this, arguments);
|
|
21
|
-
// };
|
|
22
|
-
// this.resume = function() {
|
|
23
|
-
// ChunkStreamer.prototype.resume.apply(this, arguments);
|
|
24
|
-
// this._input.resume();
|
|
25
|
-
// };
|
|
26
|
-
this.stream = async function (asyncIterator) {
|
|
27
|
-
this._input = asyncIterator;
|
|
28
|
-
try {
|
|
29
|
-
// ES2018 version
|
|
30
|
-
// TODO - check for pause and abort flags?
|
|
31
|
-
for await (const chunk of asyncIterator) {
|
|
32
|
-
this.parseChunk(this.getStringChunk(chunk));
|
|
33
|
-
}
|
|
34
|
-
// ES5 VERSION
|
|
35
|
-
// while (true) {
|
|
36
|
-
// asyncIterator.next().then(function(value) {
|
|
37
|
-
// if (value.done) {
|
|
38
|
-
// // finalize iterator?
|
|
39
|
-
// }
|
|
40
|
-
// }
|
|
41
|
-
// const = await ;
|
|
42
|
-
// if (done) return total;
|
|
43
|
-
// total += value.length;
|
|
44
|
-
// }
|
|
45
|
-
this._finished = true;
|
|
46
|
-
this.parseChunk('');
|
|
47
|
-
}
|
|
48
|
-
catch (error) {
|
|
49
|
-
// Inform ChunkStreamer base class of error
|
|
50
|
-
this._sendError(error);
|
|
51
|
-
}
|
|
52
|
-
};
|
|
53
|
-
this._nextChunk = function nextChunk() {
|
|
54
|
-
// Left empty, as async iterator automatically pulls next chunk
|
|
55
|
-
};
|
|
56
|
-
// HELPER METHODS
|
|
57
|
-
this.getStringChunk = function (chunk) {
|
|
58
|
-
return typeof chunk === 'string' ? chunk : this.textDecoder.decode(chunk, { stream: true });
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
exports.default = AsyncIteratorStreamer;
|
|
62
|
-
AsyncIteratorStreamer.prototype = Object.create(ChunkStreamer.prototype);
|
|
63
|
-
AsyncIteratorStreamer.prototype.constructor = AsyncIteratorStreamer;
|