@loaders.gl/csv 4.0.0-alpha.23 → 4.0.0-alpha.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,258 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- var __importDefault = (this && this.__importDefault) || function (mod) {
4
- return (mod && mod.__esModule) ? mod : { "default": mod };
5
- };
6
- Object.defineProperty(exports, "__esModule", { value: true });
7
- exports.CSVLoader = void 0;
8
- const schema_1 = require("@loaders.gl/schema");
9
- const papaparse_1 = __importDefault(require("./papaparse/papaparse"));
10
- const async_iterator_streamer_1 = __importDefault(require("./papaparse/async-iterator-streamer"));
11
- // __VERSION__ is injected by babel-plugin-version-inline
12
- // @ts-ignore TS2304: Cannot find name '__VERSION__'.
13
- const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
14
- exports.CSVLoader = {
15
- id: 'csv',
16
- module: 'csv',
17
- name: 'CSV',
18
- version: VERSION,
19
- extensions: ['csv', 'tsv', 'dsv'],
20
- mimeTypes: ['text/csv', 'text/tab-separated-values', 'text/dsv'],
21
- category: 'table',
22
- parse: async (arrayBuffer, options) => parseCSV(new TextDecoder().decode(arrayBuffer), options),
23
- parseText: (text, options) => parseCSV(text, options),
24
- parseInBatches: parseCSVInBatches,
25
- // @ts-ignore
26
- // testText: null,
27
- options: {
28
- csv: {
29
- shape: 'object-row-table',
30
- optimizeMemoryUsage: false,
31
- // CSV options
32
- header: 'auto',
33
- columnPrefix: 'column',
34
- // delimiter: auto
35
- // newline: auto
36
- quoteChar: '"',
37
- escapeChar: '"',
38
- dynamicTyping: true,
39
- comments: false,
40
- skipEmptyLines: true,
41
- // transform: null?
42
- delimitersToGuess: [',', '\t', '|', ';']
43
- // fastMode: auto
44
- }
45
- }
46
- };
47
- async function parseCSV(csvText, options) {
48
- // Apps can call the parse method directly, we so apply default options here
49
- const csvOptions = { ...exports.CSVLoader.options.csv, ...options?.csv };
50
- const firstRow = readFirstRow(csvText);
51
- const header = csvOptions.header === 'auto' ? isHeaderRow(firstRow) : Boolean(csvOptions.header);
52
- const parseWithHeader = header;
53
- const papaparseConfig = {
54
- // dynamicTyping: true,
55
- ...csvOptions,
56
- header: parseWithHeader,
57
- download: false,
58
- transformHeader: parseWithHeader ? duplicateColumnTransformer() : undefined,
59
- error: (e) => {
60
- throw new Error(e);
61
- }
62
- };
63
- const result = papaparse_1.default.parse(csvText, papaparseConfig);
64
- const rows = result.data;
65
- const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);
66
- switch (csvOptions.shape || 'object-row-table') {
67
- case 'object-row-table':
68
- return {
69
- shape: 'object-row-table',
70
- data: rows.map((row) => (Array.isArray(row) ? (0, schema_1.convertToObjectRow)(row, headerRow) : row))
71
- };
72
- case 'array-row-table':
73
- return {
74
- shape: 'array-row-table',
75
- data: rows.map((row) => (Array.isArray(row) ? row : (0, schema_1.convertToArrayRow)(row, headerRow)))
76
- };
77
- }
78
- throw new Error(csvOptions.shape);
79
- }
80
- // TODO - support batch size 0 = no batching/single batch?
81
- function parseCSVInBatches(asyncIterator, options) {
82
- // Papaparse does not support standard batch size handling
83
- // TODO - investigate papaparse chunks mode
84
- options = { ...options };
85
- if (options.batchSize === 'auto') {
86
- options.batchSize = 4000;
87
- }
88
- // Apps can call the parse method directly, we so apply default options here
89
- const csvOptions = { ...exports.CSVLoader.options.csv, ...options?.csv };
90
- const asyncQueue = new schema_1.AsyncQueue();
91
- let isFirstRow = true;
92
- let headerRow = null;
93
- let tableBatchBuilder = null;
94
- let schema = null;
95
- const config = {
96
- // dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
97
- ...csvOptions,
98
- header: false,
99
- download: false,
100
- // chunkSize is set to 5MB explicitly (same as Papaparse default) due to a bug where the
101
- // streaming parser gets stuck if skipEmptyLines and a step callback are both supplied.
102
- // See https://github.com/mholt/PapaParse/issues/465
103
- chunkSize: 1024 * 1024 * 5,
104
- // skipEmptyLines is set to a boolean value if supplied. Greedy is set to true
105
- // skipEmptyLines is handled manually given two bugs where the streaming parser gets stuck if
106
- // both of the skipEmptyLines and step callback options are provided:
107
- // - true doesn't work unless chunkSize is set: https://github.com/mholt/PapaParse/issues/465
108
- // - greedy doesn't work: https://github.com/mholt/PapaParse/issues/825
109
- skipEmptyLines: false,
110
- // step is called on every row
111
- // eslint-disable-next-line complexity
112
- step(results) {
113
- let row = results.data;
114
- if (csvOptions.skipEmptyLines) {
115
- // Manually reject lines that are empty
116
- const collapsedRow = row.flat().join('').trim();
117
- if (collapsedRow === '') {
118
- return;
119
- }
120
- }
121
- const bytesUsed = results.meta.cursor;
122
- // Check if we need to save a header row
123
- if (isFirstRow && !headerRow) {
124
- // Auto detects or can be forced with csvOptions.header
125
- const header = csvOptions.header === 'auto' ? isHeaderRow(row) : Boolean(csvOptions.header);
126
- if (header) {
127
- headerRow = row.map(duplicateColumnTransformer());
128
- return;
129
- }
130
- }
131
- // If first data row, we can deduce the schema
132
- if (isFirstRow) {
133
- isFirstRow = false;
134
- if (!headerRow) {
135
- headerRow = generateHeader(csvOptions.columnPrefix, row.length);
136
- }
137
- schema = deduceSchema(row, headerRow);
138
- }
139
- if (csvOptions.optimizeMemoryUsage) {
140
- // A workaround to allocate new strings and don't retain pointers to original strings.
141
- // https://bugs.chromium.org/p/v8/issues/detail?id=2869
142
- row = JSON.parse(JSON.stringify(row));
143
- }
144
- // Add the row
145
- tableBatchBuilder =
146
- tableBatchBuilder ||
147
- new schema_1.TableBatchBuilder(
148
- // @ts-expect-error TODO this is not a proper schema
149
- schema, {
150
- shape: csvOptions.shape || 'array-row-table',
151
- ...options
152
- });
153
- try {
154
- tableBatchBuilder.addRow(row);
155
- // If a batch has been completed, emit it
156
- const batch = tableBatchBuilder && tableBatchBuilder.getFullBatch({ bytesUsed });
157
- if (batch) {
158
- asyncQueue.enqueue(batch);
159
- }
160
- }
161
- catch (error) {
162
- asyncQueue.enqueue(error);
163
- }
164
- },
165
- // complete is called when all rows have been read
166
- complete(results) {
167
- try {
168
- const bytesUsed = results.meta.cursor;
169
- // Ensure any final (partial) batch gets emitted
170
- const batch = tableBatchBuilder && tableBatchBuilder.getFinalBatch({ bytesUsed });
171
- if (batch) {
172
- asyncQueue.enqueue(batch);
173
- }
174
- }
175
- catch (error) {
176
- asyncQueue.enqueue(error);
177
- }
178
- asyncQueue.close();
179
- }
180
- };
181
- papaparse_1.default.parse(asyncIterator, config, async_iterator_streamer_1.default);
182
- // TODO - Does it matter if we return asyncIterable or asyncIterator
183
- // return asyncQueue[Symbol.asyncIterator]();
184
- return asyncQueue;
185
- }
186
- /**
187
- * Checks if a certain row is a header row
188
- * @param row the row to check
189
- * @returns true if the row looks like a header
190
- */
191
- function isHeaderRow(row) {
192
- return row && row.every((value) => typeof value === 'string');
193
- }
194
- /**
195
- * Reads, parses, and returns the first row of a CSV text
196
- * @param csvText the csv text to parse
197
- * @returns the first row
198
- */
199
- function readFirstRow(csvText) {
200
- const result = papaparse_1.default.parse(csvText, {
201
- download: false,
202
- dynamicTyping: true,
203
- preview: 1
204
- });
205
- return result.data[0];
206
- }
207
- /**
208
- * Creates a transformer that renames duplicate columns. This is needed as Papaparse doesn't handle
209
- * duplicate header columns and would use the latest occurrence by default.
210
- * See the header option in https://www.papaparse.com/docs#config
211
- * @returns a transform function that returns sanitized names for duplicate fields
212
- */
213
- function duplicateColumnTransformer() {
214
- const observedColumns = new Set();
215
- return (col) => {
216
- let colName = col;
217
- let counter = 1;
218
- while (observedColumns.has(colName)) {
219
- colName = `${col}.${counter}`;
220
- counter++;
221
- }
222
- observedColumns.add(colName);
223
- return colName;
224
- };
225
- }
226
- /**
227
- * Generates the header of a CSV given a prefix and a column count
228
- * @param columnPrefix the columnPrefix to use
229
- * @param count the count of column names to generate
230
- * @returns an array of column names
231
- */
232
- function generateHeader(columnPrefix, count = 0) {
233
- const headers = [];
234
- for (let i = 0; i < count; i++) {
235
- headers.push(`${columnPrefix}${i + 1}`);
236
- }
237
- return headers;
238
- }
239
- function deduceSchema(row, headerRow) {
240
- const schema = headerRow ? {} : [];
241
- for (let i = 0; i < row.length; i++) {
242
- const columnName = (headerRow && headerRow[i]) || i;
243
- const value = row[i];
244
- switch (typeof value) {
245
- case 'number':
246
- case 'boolean':
247
- // TODO - booleans could be handled differently...
248
- schema[columnName] = { name: String(columnName), index: i, type: Float32Array };
249
- break;
250
- case 'string':
251
- default:
252
- schema[columnName] = { name: String(columnName), index: i, type: Array };
253
- // We currently only handle numeric rows
254
- // TODO we could offer a function to map strings to numbers?
255
- }
256
- }
257
- return schema;
258
- }
@@ -1,23 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.CSVWriter = void 0;
5
- const encode_csv_1 = require("./lib/encoders/encode-csv");
6
- const DEFAULT_WRITER_OPTIONS = {
7
- csv: {
8
- useDisplayNames: false
9
- },
10
- useDisplayNames: false
11
- };
12
- exports.CSVWriter = {
13
- id: 'csv',
14
- version: 'latest',
15
- module: 'csv',
16
- name: 'CSV',
17
- extensions: ['csv'],
18
- mimeTypes: ['text/csv'],
19
- options: DEFAULT_WRITER_OPTIONS,
20
- text: true,
21
- encode: async (table, options) => new TextEncoder().encode((0, encode_csv_1.encodeTableAsCSV)(table, options)).buffer,
22
- encodeText: (table, options) => (0, encode_csv_1.encodeTableAsCSV)(table, options)
23
- };
package/dist/src/index.js DELETED
@@ -1,8 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.CSVWriter = exports.CSVLoader = void 0;
5
- var csv_loader_1 = require("./csv-loader");
6
- Object.defineProperty(exports, "CSVLoader", { enumerable: true, get: function () { return csv_loader_1.CSVLoader; } });
7
- var csv_writer_1 = require("./csv-writer");
8
- Object.defineProperty(exports, "CSVWriter", { enumerable: true, get: function () { return csv_writer_1.CSVWriter; } });
@@ -1,50 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- // Copyright 2022 Foursquare Labs, Inc.
4
- Object.defineProperty(exports, "__esModule", { value: true });
5
- exports.encodeTableAsCSV = void 0;
6
- const schema_1 = require("@loaders.gl/schema");
7
- const d3_dsv_1 = require("d3-dsv");
8
- /**
9
- * Encode a Table object as CSV
10
- */
11
- function encodeTableAsCSV(table, options = { csv: { useDisplayNames: true } }) {
12
- const useDisplayNames = options.useDisplayNames || options.csv?.useDisplayNames;
13
- const fields = table.schema?.fields || [];
14
- const columnNames = fields.map((f) => {
15
- // This is a leaky abstraction, assuming Kepler metadata
16
- const displayName = f.metadata?.displayName;
17
- return useDisplayNames && typeof displayName === 'string' ? displayName : f.name;
18
- });
19
- const formattedData = [columnNames];
20
- for (const row of (0, schema_1.makeArrayRowIterator)(table)) {
21
- const formattedRow = [];
22
- for (let columnIndex = 0; columnIndex < (0, schema_1.getTableNumCols)(table); ++columnIndex) {
23
- const value = row[columnIndex];
24
- formattedRow[columnIndex] = preformatFieldValue(value);
25
- }
26
- formattedData.push(formattedRow);
27
- }
28
- return (0, d3_dsv_1.csvFormatRows)(formattedData);
29
- }
30
- exports.encodeTableAsCSV = encodeTableAsCSV;
31
- /**
32
- * Stringifies a value
33
- * @todo Why is it called parse?
34
- */
35
- const preformatFieldValue = (value) => {
36
- if (value === null || value === undefined) {
37
- // TODO: It would be nice to distinguish between missing values and the empty string
38
- // https://github.com/d3/d3-dsv/issues/84
39
- return null;
40
- }
41
- if (value instanceof Date) {
42
- // d3-dsv formats dates without timezones if they don't have time info;
43
- // this forces them to always use fully-qualified ISO time strings
44
- return value.toISOString();
45
- }
46
- if (typeof value === 'object') {
47
- return JSON.stringify(value);
48
- }
49
- return String(value);
50
- };
@@ -1,63 +0,0 @@
1
- "use strict";
2
- // @ts-nocheck
3
- // A custom papaparse `Streamer` for async iterators
4
- // Ideally this can be contributed back to papaparse
5
- // Or papaparse can expose Streamer API so we can extend without forking.
6
- var __importDefault = (this && this.__importDefault) || function (mod) {
7
- return (mod && mod.__esModule) ? mod : { "default": mod };
8
- };
9
- Object.defineProperty(exports, "__esModule", { value: true });
10
- /* eslint-disable no-invalid-this */
11
- // Note: papaparse is not an ES6 module
12
- const papaparse_1 = __importDefault(require("./papaparse"));
13
- const { ChunkStreamer } = papaparse_1.default;
14
- function AsyncIteratorStreamer(config) {
15
- config = config || {};
16
- ChunkStreamer.call(this, config);
17
- this.textDecoder = new TextDecoder(this._config.encoding);
18
- // Implement ChunkStreamer base class methods
19
- // this.pause = function() {
20
- // ChunkStreamer.prototype.pause.apply(this, arguments);
21
- // };
22
- // this.resume = function() {
23
- // ChunkStreamer.prototype.resume.apply(this, arguments);
24
- // this._input.resume();
25
- // };
26
- this.stream = async function (asyncIterator) {
27
- this._input = asyncIterator;
28
- try {
29
- // ES2018 version
30
- // TODO - check for pause and abort flags?
31
- for await (const chunk of asyncIterator) {
32
- this.parseChunk(this.getStringChunk(chunk));
33
- }
34
- // ES5 VERSION
35
- // while (true) {
36
- // asyncIterator.next().then(function(value) {
37
- // if (value.done) {
38
- // // finalize iterator?
39
- // }
40
- // }
41
- // const = await ;
42
- // if (done) return total;
43
- // total += value.length;
44
- // }
45
- this._finished = true;
46
- this.parseChunk('');
47
- }
48
- catch (error) {
49
- // Inform ChunkStreamer base class of error
50
- this._sendError(error);
51
- }
52
- };
53
- this._nextChunk = function nextChunk() {
54
- // Left empty, as async iterator automatically pulls next chunk
55
- };
56
- // HELPER METHODS
57
- this.getStringChunk = function (chunk) {
58
- return typeof chunk === 'string' ? chunk : this.textDecoder.decode(chunk, { stream: true });
59
- };
60
- }
61
- exports.default = AsyncIteratorStreamer;
62
- AsyncIteratorStreamer.prototype = Object.create(ChunkStreamer.prototype);
63
- AsyncIteratorStreamer.prototype.constructor = AsyncIteratorStreamer;