@loaders.gl/csv 4.3.4 → 4.4.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/csv-arrow-loader.d.ts +37 -0
  2. package/dist/csv-arrow-loader.d.ts.map +1 -0
  3. package/dist/csv-arrow-loader.js +24 -0
  4. package/dist/csv-arrow-loader.js.map +1 -0
  5. package/dist/csv-format.d.ts +10 -0
  6. package/dist/csv-format.d.ts.map +1 -0
  7. package/dist/csv-format.js +13 -0
  8. package/dist/csv-format.js.map +1 -0
  9. package/dist/csv-loader.d.ts +7 -7
  10. package/dist/csv-loader.d.ts.map +1 -1
  11. package/dist/csv-loader.js +59 -25
  12. package/dist/csv-loader.js.map +1 -0
  13. package/dist/csv-writer.d.ts +6 -5
  14. package/dist/csv-writer.d.ts.map +1 -1
  15. package/dist/csv-writer.js +3 -5
  16. package/dist/csv-writer.js.map +1 -0
  17. package/dist/dist.dev.js +13346 -454
  18. package/dist/dist.min.js +23 -20
  19. package/dist/index.cjs +323 -267
  20. package/dist/index.cjs.map +4 -4
  21. package/dist/index.d.ts +2 -0
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +2 -0
  24. package/dist/index.js.map +1 -0
  25. package/dist/lib/encoders/encode-csv.d.ts +1 -1
  26. package/dist/lib/encoders/encode-csv.d.ts.map +1 -1
  27. package/dist/lib/encoders/encode-csv.js +2 -1
  28. package/dist/lib/encoders/encode-csv.js.map +1 -0
  29. package/dist/papaparse/async-iterator-streamer.d.ts +1 -21
  30. package/dist/papaparse/async-iterator-streamer.d.ts.map +1 -1
  31. package/dist/papaparse/async-iterator-streamer.js +7 -6
  32. package/dist/papaparse/async-iterator-streamer.js.map +1 -0
  33. package/dist/papaparse/papa-constants.d.ts +12 -0
  34. package/dist/papaparse/papa-constants.d.ts.map +1 -0
  35. package/dist/papaparse/papa-constants.js +20 -0
  36. package/dist/papaparse/papa-constants.js.map +1 -0
  37. package/dist/papaparse/papa-parser.d.ts +110 -0
  38. package/dist/papaparse/papa-parser.d.ts.map +1 -0
  39. package/dist/papaparse/papa-parser.js +734 -0
  40. package/dist/papaparse/papa-parser.js.map +1 -0
  41. package/dist/papaparse/papa-writer.d.ts +22 -0
  42. package/dist/papaparse/papa-writer.d.ts.map +1 -0
  43. package/dist/papaparse/papa-writer.js +167 -0
  44. package/dist/papaparse/papa-writer.js.map +1 -0
  45. package/dist/papaparse/papaparse.d.ts +9 -113
  46. package/dist/papaparse/papaparse.d.ts.map +1 -1
  47. package/dist/papaparse/papaparse.js +14 -882
  48. package/dist/papaparse/papaparse.js.map +1 -0
  49. package/package.json +5 -5
  50. package/src/csv-arrow-loader.ts +43 -0
  51. package/src/csv-format.ts +15 -0
  52. package/src/csv-loader.ts +66 -31
  53. package/src/csv-writer.ts +2 -5
  54. package/src/index.ts +3 -0
  55. package/src/lib/encoders/encode-csv.ts +2 -1
  56. package/src/papaparse/async-iterator-streamer.ts +6 -6
  57. package/src/papaparse/papa-constants.ts +23 -0
  58. package/src/papaparse/papa-parser.ts +872 -0
  59. package/src/papaparse/papa-writer.ts +219 -0
  60. package/src/papaparse/papaparse.ts +17 -1048
@@ -0,0 +1 @@
1
+ {"version":3,"file":"papaparse.js","sourceRoot":"","sources":["../../src/papaparse/papaparse.ts"],"names":[],"mappings":"AAAA,aAAa;AACb,+BAA+B;AAC/B,oCAAoC;AACpC,kCAAkC;AAElC,8DAA8D;AAC9D,qCAAqC;AAErC,gBAAgB;AAChB,sBAAsB;AACtB,wCAAwC;AACxC,6EAA6E;AAC7E,yEAAyE;AACzE,wCAAwC;AAExC,OAAO,EAAC,SAAS,EAAE,MAAM,EAAE,YAAY,EAAE,aAAa,EAAC,yBAAsB;AAC7E,OAAO,EAAC,SAAS,EAAC,yBAAsB;AACxC,OAAO,EAAC,IAAI,EAAC,4BAAyB;AAKtC,eAAe;IACb,GAAG,IAAI;IAEP,KAAK,EAAE,SAAS;IAChB,OAAO,EAAE,SAAS;IAElB,aAAa;IAEb,2CAA2C;IAC3C,MAAM;IACN,YAAY;CACb,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loaders.gl/csv",
3
- "version": "4.3.4",
3
+ "version": "4.4.0-alpha.10",
4
4
  "description": "Framework-independent loader for CSV and DSV table formats",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -44,12 +44,12 @@
44
44
  "build-bundle-dev": "ocular-bundle ./bundle.ts --env=dev --output=dist/dist.dev.js"
45
45
  },
46
46
  "dependencies": {
47
- "@loaders.gl/loader-utils": "4.3.4",
48
- "@loaders.gl/schema": "4.3.4",
47
+ "@loaders.gl/loader-utils": "4.4.0-alpha.10",
48
+ "@loaders.gl/schema": "4.4.0-alpha.10",
49
49
  "d3-dsv": "^1.2.0"
50
50
  },
51
51
  "peerDependencies": {
52
- "@loaders.gl/core": "^4.3.0"
52
+ "@loaders.gl/core": "4.4.0-alpha.1"
53
53
  },
54
- "gitHead": "d18246f4ef6382f787a6ae2e9e21d8a7f40e5917"
54
+ "gitHead": "7b4dc3fdbaed20a2597c70c57efdcda5c404147f"
55
55
  }
@@ -0,0 +1,43 @@
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
3
+ // Copyright (c) vis.gl contributors
4
+
5
+ import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
6
+ import type {ArrowTable, ArrowTableBatch} from '@loaders.gl/schema';
7
+ import {convertTable, convertBatches} from '@loaders.gl/schema-utils';
8
+
9
+ import type {CSVLoaderOptions} from './csv-loader';
10
+ import {CSVLoader} from './csv-loader';
11
+
12
+ export type CSVArrowLoaderOptions = LoaderOptions & {
13
+ csv?: Omit<CSVLoaderOptions['csv'], 'shape'>;
14
+ };
15
+
16
+ export const CSVArrowLoader = {
17
+ ...CSVLoader,
18
+
19
+ dataType: null as unknown as ArrowTable,
20
+ batchType: null as unknown as ArrowTableBatch,
21
+
22
+ parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
23
+ parseCSVToArrow(new TextDecoder().decode(arrayBuffer), options),
24
+ parseText: (text: string, options?: CSVLoaderOptions) => parseCSVToArrow(text, options),
25
+ parseInBatches: parseCSVToArrowBatches
26
+ } as const satisfies LoaderWithParser<ArrowTable, ArrowTableBatch, CSVArrowLoaderOptions>;
27
+
28
+ async function parseCSVToArrow(csvText: string, options?: CSVLoaderOptions): Promise<ArrowTable> {
29
+ // Apps can call the parse method directly, we so apply default options here
30
+ // const csvOptions = {...CSVArrowLoader.options.csv, ...options?.csv};
31
+ const table = await CSVLoader.parseText(csvText, options);
32
+ return convertTable(table, 'arrow-table');
33
+ }
34
+
35
+ function parseCSVToArrowBatches(
36
+ asyncIterator:
37
+ | AsyncIterable<ArrayBufferLike | ArrayBufferView>
38
+ | Iterable<ArrayBufferLike | ArrayBufferView>,
39
+ options?: CSVArrowLoaderOptions
40
+ ): AsyncIterable<ArrowTableBatch> {
41
+ const tableIterator = CSVLoader.parseInBatches(asyncIterator, options);
42
+ return convertBatches(tableIterator, 'arrow-table');
43
+ }
@@ -0,0 +1,15 @@
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
3
+ // Copyright (c) vis.gl contributors
4
+
5
+ import type {Format} from '@loaders.gl/loader-utils';
6
+
7
+ /** Comma-Separated Values */
8
+ export const CSVFormat = {
9
+ id: 'csv',
10
+ module: 'csv',
11
+ name: 'CSV',
12
+ extensions: ['csv', 'tsv', 'dsv'],
13
+ mimeTypes: ['text/csv', 'text/tab-separated-values', 'text/dsv'],
14
+ category: 'table'
15
+ } as const satisfies Format;
package/src/csv-loader.ts CHANGED
@@ -3,19 +3,19 @@
3
3
  // Copyright (c) vis.gl contributors
4
4
 
5
5
  import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
6
- import type {ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';
6
+ import type {Schema, ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';
7
7
 
8
+ import {log, toArrayBufferIterator} from '@loaders.gl/loader-utils';
8
9
  import {
9
10
  AsyncQueue,
11
+ deduceTableSchema,
10
12
  TableBatchBuilder,
11
13
  convertToArrayRow,
12
14
  convertToObjectRow
13
- } from '@loaders.gl/schema';
15
+ } from '@loaders.gl/schema-utils';
14
16
  import Papa from './papaparse/papaparse';
15
17
  import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
16
-
17
- type ObjectField = {name: string; index: number; type: any};
18
- type ObjectSchema = {[key: string]: ObjectField} | ObjectField[];
18
+ import {CSVFormat} from './csv-format';
19
19
 
20
20
  // __VERSION__ is injected by babel-plugin-version-inline
21
21
  // @ts-ignore TS2304: Cannot find name '__VERSION__'.
@@ -48,16 +48,11 @@ export type CSVLoaderOptions = LoaderOptions & {
48
48
  };
49
49
 
50
50
  export const CSVLoader = {
51
+ ...CSVFormat,
52
+
51
53
  dataType: null as unknown as ObjectRowTable | ArrayRowTable,
52
54
  batchType: null as unknown as TableBatch,
53
-
54
- id: 'csv',
55
- module: 'csv',
56
- name: 'CSV',
57
55
  version: VERSION,
58
- extensions: ['csv', 'tsv', 'dsv'],
59
- mimeTypes: ['text/csv', 'text/tab-separated-values', 'text/dsv'],
60
- category: 'table',
61
56
  parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
62
57
  parseCSV(new TextDecoder().decode(arrayBuffer), options),
63
58
  parseText: (text: string, options?: CSVLoaderOptions) => parseCSV(text, options),
@@ -89,7 +84,7 @@ async function parseCSV(
89
84
  csvText: string,
90
85
  options?: CSVLoaderOptions
91
86
  ): Promise<ObjectRowTable | ArrayRowTable> {
92
- // Apps can call the parse method directly, we so apply default options here
87
+ // Apps can call the parse method directly, so we apply default options here
93
88
  const csvOptions = {...CSVLoader.options.csv, ...options?.csv};
94
89
 
95
90
  const firstRow = readFirstRow(csvText);
@@ -115,32 +110,39 @@ async function parseCSV(
115
110
  const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);
116
111
 
117
112
  const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
113
+ let table: ArrayRowTable | ObjectRowTable;
118
114
  switch (shape) {
119
115
  case 'object-row-table':
120
- return {
116
+ table = {
121
117
  shape: 'object-row-table',
122
118
  data: rows.map((row) => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))
123
119
  };
120
+ break;
124
121
  case 'array-row-table':
125
- return {
122
+ table = {
126
123
  shape: 'array-row-table',
127
124
  data: rows.map((row) => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
128
125
  };
126
+ break;
129
127
  default:
130
128
  throw new Error(shape);
131
129
  }
130
+ table.schema = deduceTableSchema(table!);
131
+ return table;
132
132
  }
133
133
 
134
134
  // TODO - support batch size 0 = no batching/single batch?
135
135
  function parseCSVInBatches(
136
- asyncIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>,
136
+ asyncIterator:
137
+ | AsyncIterable<ArrayBufferLike | ArrayBufferView>
138
+ | Iterable<ArrayBufferLike | ArrayBufferView>,
137
139
  options?: CSVLoaderOptions
138
140
  ): AsyncIterable<TableBatch> {
139
141
  // Papaparse does not support standard batch size handling
140
142
  // TODO - investigate papaparse chunks mode
141
143
  options = {...options};
142
- if (options.batchSize === 'auto') {
143
- options.batchSize = 4000;
144
+ if (options?.core?.batchSize === 'auto') {
145
+ options.core.batchSize = 4000;
144
146
  }
145
147
 
146
148
  // Apps can call the parse method directly, we so apply default options here
@@ -151,7 +153,7 @@ function parseCSVInBatches(
151
153
  let isFirstRow: boolean = true;
152
154
  let headerRow: string[] | null = null;
153
155
  let tableBatchBuilder: TableBatchBuilder | null = null;
154
- let schema: ObjectSchema | null = null;
156
+ let schema: Schema | null = null;
155
157
 
156
158
  const config = {
157
159
  // dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
@@ -199,7 +201,7 @@ function parseCSVInBatches(
199
201
  if (!headerRow) {
200
202
  headerRow = generateHeader(csvOptions.columnPrefix, row.length);
201
203
  }
202
- schema = deduceSchema(row, headerRow);
204
+ schema = deduceCSVSchema(row, headerRow);
203
205
  }
204
206
 
205
207
  if (csvOptions.optimizeMemoryUsage) {
@@ -208,7 +210,7 @@ function parseCSVInBatches(
208
210
  row = JSON.parse(JSON.stringify(row));
209
211
  }
210
212
 
211
- const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
213
+ const shape = (options as any)?.shape || csvOptions.shape || DEFAULT_CSV_SHAPE;
212
214
 
213
215
  // Add the row
214
216
  tableBatchBuilder =
@@ -218,7 +220,7 @@ function parseCSVInBatches(
218
220
  schema,
219
221
  {
220
222
  shape,
221
- ...options
223
+ ...(options?.core || {})
222
224
  }
223
225
  );
224
226
 
@@ -251,7 +253,7 @@ function parseCSVInBatches(
251
253
  }
252
254
  };
253
255
 
254
- Papa.parse(asyncIterator, config, AsyncIteratorStreamer);
256
+ Papa.parse(toArrayBufferIterator(asyncIterator), config, AsyncIteratorStreamer);
255
257
 
256
258
  // TODO - Does it matter if we return asyncIterable or asyncIterator
257
259
  // return asyncQueue[Symbol.asyncIterator]();
@@ -314,23 +316,56 @@ function generateHeader(columnPrefix: string, count: number = 0): string[] {
314
316
  return headers;
315
317
  }
316
318
 
317
- function deduceSchema(row, headerRow): ObjectSchema {
318
- const schema: ObjectSchema = headerRow ? {} : [];
319
+ function deduceCSVSchema(row, headerRow): Schema {
320
+ const fields: Schema['fields'] = [];
319
321
  for (let i = 0; i < row.length; i++) {
320
322
  const columnName = (headerRow && headerRow[i]) || i;
321
323
  const value = row[i];
322
324
  switch (typeof value) {
323
325
  case 'number':
326
+ fields.push({name: String(columnName), type: 'float64', nullable: true});
327
+ break;
324
328
  case 'boolean':
325
- // TODO - booleans could be handled differently...
326
- schema[columnName] = {name: String(columnName), index: i, type: Float32Array};
329
+ fields.push({name: String(columnName), type: 'bool', nullable: true});
327
330
  break;
328
331
  case 'string':
332
+ fields.push({name: String(columnName), type: 'utf8', nullable: true});
333
+ break;
329
334
  default:
330
- schema[columnName] = {name: String(columnName), index: i, type: Array};
331
- // We currently only handle numeric rows
332
- // TODO we could offer a function to map strings to numbers?
335
+ log.warn(`CSV: Unknown column type: ${typeof value}`)();
336
+ fields.push({name: String(columnName), type: 'utf8', nullable: true});
333
337
  }
334
338
  }
335
- return schema;
339
+ return {
340
+ fields,
341
+ metadata: {
342
+ 'loaders.gl#format': 'csv',
343
+ 'loaders.gl#loader': 'CSVLoader'
344
+ }
345
+ };
336
346
  }
347
+
348
+ // TODO - remove
349
+ // type ObjectField = {name: string; index: number; type: any};
350
+ // type ObjectSchema = {[key: string]: ObjectField} | ObjectField[];
351
+
352
+ // function deduceObjectSchema(row, headerRow): ObjectSchema {
353
+ // const schema: ObjectSchema = headerRow ? {} : [];
354
+ // for (let i = 0; i < row.length; i++) {
355
+ // const columnName = (headerRow && headerRow[i]) || i;
356
+ // const value = row[i];
357
+ // switch (typeof value) {
358
+ // case 'number':
359
+ // case 'boolean':
360
+ // // TODO - booleans could be handled differently...
361
+ // schema[columnName] = {name: String(columnName), index: i, type: Float32Array};
362
+ // break;
363
+ // case 'string':
364
+ // default:
365
+ // schema[columnName] = {name: String(columnName), index: i, type: Array};
366
+ // // We currently only handle numeric rows
367
+ // // TODO we could offer a function to map strings to numbers?
368
+ // }
369
+ // }
370
+ // return schema;
371
+ // }
package/src/csv-writer.ts CHANGED
@@ -6,6 +6,7 @@
6
6
  import type {WriterWithEncoder, WriterOptions} from '@loaders.gl/loader-utils';
7
7
  import type {Table, TableBatch} from '@loaders.gl/schema';
8
8
  import {encodeTableAsCSV} from './lib/encoders/encode-csv';
9
+ import {CSVFormat} from './csv-format';
9
10
 
10
11
  export type CSVWriterOptions = WriterOptions & {
11
12
  csv?: {
@@ -14,12 +15,8 @@ export type CSVWriterOptions = WriterOptions & {
14
15
  };
15
16
 
16
17
  export const CSVWriter = {
17
- id: 'csv',
18
+ ...CSVFormat,
18
19
  version: 'latest',
19
- module: 'csv',
20
- name: 'CSV',
21
- extensions: ['csv'],
22
- mimeTypes: ['text/csv'],
23
20
  options: {
24
21
  csv: {
25
22
  useDisplayNames: false
package/src/index.ts CHANGED
@@ -7,3 +7,6 @@ export {CSVLoader} from './csv-loader';
7
7
 
8
8
  export type {CSVWriterOptions} from './csv-writer';
9
9
  export {CSVWriter} from './csv-writer';
10
+
11
+ export type {CSVArrowLoaderOptions} from './csv-arrow-loader';
12
+ export {CSVArrowLoader} from './csv-arrow-loader';
@@ -3,7 +3,8 @@
3
3
  // Copyright (c) vis.gl contributors
4
4
  // Copyright 2022 Foursquare Labs, Inc.
5
5
 
6
- import {Table, makeArrayRowIterator, getTableNumCols} from '@loaders.gl/schema';
6
+ import type {Table} from '@loaders.gl/schema';
7
+ import {makeArrayRowIterator, getTableNumCols} from '@loaders.gl/schema-utils';
7
8
  import {csvFormatRows} from 'd3-dsv';
8
9
  import type {CSVWriterOptions} from '../../csv-writer';
9
10
 
@@ -1,11 +1,11 @@
1
- // @ts-nocheck
2
- // A custom papaparse `Streamer` for async iterators
3
- // Ideally this can be contributed back to papaparse
4
- // Or papaparse can expose Streamer API so we can extend without forking.
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
3
+ // Copyright (c) vis.gl contributors
4
+ // Copyright (c) 2015 Matthew Holt
5
5
 
6
- /* eslint-disable no-invalid-this */
6
+ // This is a fork of papaparse v5.0.0-beta.0 under MIT license
7
+ // https://github.com/mholt/PapaParse
7
8
 
8
- // Note: papaparse is not an ES6 module
9
9
  import Papa from './papaparse';
10
10
  const {ChunkStreamer} = Papa;
11
11
 
@@ -0,0 +1,23 @@
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
3
+ // Copyright (c) vis.gl contributors
4
+ // Copyright (c) 2015 Matthew Holt
5
+
6
+ // This is a fork of papaparse v5.0.0-beta.0 under MIT license
7
+ // https://github.com/mholt/PapaParse
8
+
9
+ const BYTE_ORDER_MARK = '\ufeff';
10
+
11
+ export const Papa = {
12
+ RECORD_SEP: String.fromCharCode(30),
13
+ UNIT_SEP: String.fromCharCode(31),
14
+ BYTE_ORDER_MARK,
15
+ BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
16
+ WORKERS_SUPPORTED: false, // !IS_WORKER && !!globalThis.Worker
17
+ NODE_STREAM_INPUT: 1,
18
+
19
+ // Configurable chunk sizes for local and remote files, respectively
20
+ LocalChunkSize: 1024 * 1024 * 10, // 10 M,
21
+ RemoteChunkSize: 1024 * 1024 * 5, // 5 M,
22
+ DefaultDelimiter: ',' // Used if not specified and detection fail,
23
+ };