@loaders.gl/csv 4.3.1 → 4.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/dist/csv-arrow-loader.d.ts +37 -0
  2. package/dist/csv-arrow-loader.d.ts.map +1 -0
  3. package/dist/csv-arrow-loader.js +23 -0
  4. package/dist/csv-format.d.ts +10 -0
  5. package/dist/csv-format.d.ts.map +1 -0
  6. package/dist/csv-format.js +12 -0
  7. package/dist/csv-loader.d.ts +6 -6
  8. package/dist/csv-loader.d.ts.map +1 -1
  9. package/dist/csv-loader.js +53 -20
  10. package/dist/csv-writer.d.ts +6 -5
  11. package/dist/csv-writer.d.ts.map +1 -1
  12. package/dist/csv-writer.js +2 -5
  13. package/dist/dist.dev.js +13318 -449
  14. package/dist/dist.min.js +23 -20
  15. package/dist/index.cjs +317 -262
  16. package/dist/index.cjs.map +4 -4
  17. package/dist/index.d.ts +2 -0
  18. package/dist/index.d.ts.map +1 -1
  19. package/dist/index.js +1 -0
  20. package/dist/lib/encoders/encode-csv.d.ts +1 -1
  21. package/dist/lib/encoders/encode-csv.d.ts.map +1 -1
  22. package/dist/lib/encoders/encode-csv.js +1 -1
  23. package/dist/papaparse/async-iterator-streamer.d.ts +1 -21
  24. package/dist/papaparse/async-iterator-streamer.d.ts.map +1 -1
  25. package/dist/papaparse/async-iterator-streamer.js +6 -6
  26. package/dist/papaparse/papa-constants.d.ts +12 -0
  27. package/dist/papaparse/papa-constants.d.ts.map +1 -0
  28. package/dist/papaparse/papa-constants.js +19 -0
  29. package/dist/papaparse/papa-parser.d.ts +110 -0
  30. package/dist/papaparse/papa-parser.d.ts.map +1 -0
  31. package/dist/papaparse/papa-parser.js +733 -0
  32. package/dist/papaparse/papa-writer.d.ts +22 -0
  33. package/dist/papaparse/papa-writer.d.ts.map +1 -0
  34. package/dist/papaparse/papa-writer.js +166 -0
  35. package/dist/papaparse/papaparse.d.ts +9 -113
  36. package/dist/papaparse/papaparse.d.ts.map +1 -1
  37. package/dist/papaparse/papaparse.js +13 -882
  38. package/package.json +5 -5
  39. package/src/csv-arrow-loader.ts +41 -0
  40. package/src/csv-format.ts +15 -0
  41. package/src/csv-loader.ts +58 -25
  42. package/src/csv-writer.ts +2 -5
  43. package/src/index.ts +3 -0
  44. package/src/lib/encoders/encode-csv.ts +2 -1
  45. package/src/papaparse/async-iterator-streamer.ts +6 -6
  46. package/src/papaparse/papa-constants.ts +23 -0
  47. package/src/papaparse/papa-parser.ts +872 -0
  48. package/src/papaparse/papa-writer.ts +219 -0
  49. package/src/papaparse/papaparse.ts +17 -1048
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loaders.gl/csv",
3
- "version": "4.3.1",
3
+ "version": "4.4.0-alpha.1",
4
4
  "description": "Framework-independent loader for CSV and DSV table formats",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -44,12 +44,12 @@
44
44
  "build-bundle-dev": "ocular-bundle ./bundle.ts --env=dev --output=dist/dist.dev.js"
45
45
  },
46
46
  "dependencies": {
47
- "@loaders.gl/loader-utils": "4.3.1",
48
- "@loaders.gl/schema": "4.3.1",
47
+ "@loaders.gl/loader-utils": "4.4.0-alpha.1",
48
+ "@loaders.gl/schema": "4.4.0-alpha.1",
49
49
  "d3-dsv": "^1.2.0"
50
50
  },
51
51
  "peerDependencies": {
52
- "@loaders.gl/core": "^4.3.0"
52
+ "@loaders.gl/core": "4.4.0-alpha.0"
53
53
  },
54
- "gitHead": "70a883ab6bc84647c49963215dd6ff62d4d61de3"
54
+ "gitHead": "f1732de45907bd500bf4eedb4803beca8bf4bfb0"
55
55
  }
@@ -0,0 +1,41 @@
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
3
+ // Copyright (c) vis.gl contributors
4
+
5
+ import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
6
+ import type {ArrowTable, ArrowTableBatch} from '@loaders.gl/schema';
7
+ import {convertTable, convertBatches} from '@loaders.gl/schema-utils';
8
+
9
+ import type {CSVLoaderOptions} from './csv-loader';
10
+ import {CSVLoader} from './csv-loader';
11
+
12
+ export type CSVArrowLoaderOptions = LoaderOptions & {
13
+ csv?: Omit<CSVLoaderOptions['csv'], 'shape'>;
14
+ };
15
+
16
+ export const CSVArrowLoader = {
17
+ ...CSVLoader,
18
+
19
+ dataType: null as unknown as ArrowTable,
20
+ batchType: null as unknown as ArrowTableBatch,
21
+
22
+ parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
23
+ parseCSVToArrow(new TextDecoder().decode(arrayBuffer), options),
24
+ parseText: (text: string, options?: CSVLoaderOptions) => parseCSVToArrow(text, options),
25
+ parseInBatches: parseCSVToArrowBatches
26
+ } as const satisfies LoaderWithParser<ArrowTable, ArrowTableBatch, CSVArrowLoaderOptions>;
27
+
28
+ async function parseCSVToArrow(csvText: string, options?: CSVLoaderOptions): Promise<ArrowTable> {
29
+ // Apps can call the parse method directly, we so apply default options here
30
+ // const csvOptions = {...CSVArrowLoader.options.csv, ...options?.csv};
31
+ const table = await CSVLoader.parseText(csvText, options);
32
+ return convertTable(table, 'arrow-table');
33
+ }
34
+
35
+ function parseCSVToArrowBatches(
36
+ asyncIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>,
37
+ options?: CSVArrowLoaderOptions
38
+ ): AsyncIterable<ArrowTableBatch> {
39
+ const tableIterator = CSVLoader.parseInBatches(asyncIterator, options);
40
+ return convertBatches(tableIterator, 'arrow-table');
41
+ }
@@ -0,0 +1,15 @@
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
3
+ // Copyright (c) vis.gl contributors
4
+
5
+ import type {Format} from '@loaders.gl/loader-utils';
6
+
7
+ /** Comma-Separated Values */
8
+ export const CSVFormat = {
9
+ id: 'csv',
10
+ module: 'csv',
11
+ name: 'CSV',
12
+ extensions: ['csv', 'tsv', 'dsv'],
13
+ mimeTypes: ['text/csv', 'text/tab-separated-values', 'text/dsv'],
14
+ category: 'table'
15
+ } as const satisfies Format;
package/src/csv-loader.ts CHANGED
@@ -3,19 +3,19 @@
3
3
  // Copyright (c) vis.gl contributors
4
4
 
5
5
  import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
6
- import type {ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';
6
+ import type {Schema, ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';
7
7
 
8
+ import {log} from '@loaders.gl/loader-utils';
8
9
  import {
9
10
  AsyncQueue,
11
+ deduceTableSchema,
10
12
  TableBatchBuilder,
11
13
  convertToArrayRow,
12
14
  convertToObjectRow
13
- } from '@loaders.gl/schema';
15
+ } from '@loaders.gl/schema-utils';
14
16
  import Papa from './papaparse/papaparse';
15
17
  import AsyncIteratorStreamer from './papaparse/async-iterator-streamer';
16
-
17
- type ObjectField = {name: string; index: number; type: any};
18
- type ObjectSchema = {[key: string]: ObjectField} | ObjectField[];
18
+ import {CSVFormat} from './csv-format';
19
19
 
20
20
  // __VERSION__ is injected by babel-plugin-version-inline
21
21
  // @ts-ignore TS2304: Cannot find name '__VERSION__'.
@@ -48,16 +48,11 @@ export type CSVLoaderOptions = LoaderOptions & {
48
48
  };
49
49
 
50
50
  export const CSVLoader = {
51
+ ...CSVFormat,
52
+
51
53
  dataType: null as unknown as ObjectRowTable | ArrayRowTable,
52
54
  batchType: null as unknown as TableBatch,
53
-
54
- id: 'csv',
55
- module: 'csv',
56
- name: 'CSV',
57
55
  version: VERSION,
58
- extensions: ['csv', 'tsv', 'dsv'],
59
- mimeTypes: ['text/csv', 'text/tab-separated-values', 'text/dsv'],
60
- category: 'table',
61
56
  parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) =>
62
57
  parseCSV(new TextDecoder().decode(arrayBuffer), options),
63
58
  parseText: (text: string, options?: CSVLoaderOptions) => parseCSV(text, options),
@@ -89,7 +84,7 @@ async function parseCSV(
89
84
  csvText: string,
90
85
  options?: CSVLoaderOptions
91
86
  ): Promise<ObjectRowTable | ArrayRowTable> {
92
- // Apps can call the parse method directly, we so apply default options here
87
+ // Apps can call the parse method directly, so we apply default options here
93
88
  const csvOptions = {...CSVLoader.options.csv, ...options?.csv};
94
89
 
95
90
  const firstRow = readFirstRow(csvText);
@@ -115,20 +110,25 @@ async function parseCSV(
115
110
  const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);
116
111
 
117
112
  const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
113
+ let table: ArrayRowTable | ObjectRowTable;
118
114
  switch (shape) {
119
115
  case 'object-row-table':
120
- return {
116
+ table = {
121
117
  shape: 'object-row-table',
122
118
  data: rows.map((row) => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))
123
119
  };
120
+ break;
124
121
  case 'array-row-table':
125
- return {
122
+ table = {
126
123
  shape: 'array-row-table',
127
124
  data: rows.map((row) => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
128
125
  };
126
+ break;
129
127
  default:
130
128
  throw new Error(shape);
131
129
  }
130
+ table.schema = deduceTableSchema(table!);
131
+ return table;
132
132
  }
133
133
 
134
134
  // TODO - support batch size 0 = no batching/single batch?
@@ -151,7 +151,7 @@ function parseCSVInBatches(
151
151
  let isFirstRow: boolean = true;
152
152
  let headerRow: string[] | null = null;
153
153
  let tableBatchBuilder: TableBatchBuilder | null = null;
154
- let schema: ObjectSchema | null = null;
154
+ let schema: Schema | null = null;
155
155
 
156
156
  const config = {
157
157
  // dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
@@ -199,7 +199,7 @@ function parseCSVInBatches(
199
199
  if (!headerRow) {
200
200
  headerRow = generateHeader(csvOptions.columnPrefix, row.length);
201
201
  }
202
- schema = deduceSchema(row, headerRow);
202
+ schema = deduceCSVSchema(row, headerRow);
203
203
  }
204
204
 
205
205
  if (csvOptions.optimizeMemoryUsage) {
@@ -314,23 +314,56 @@ function generateHeader(columnPrefix: string, count: number = 0): string[] {
314
314
  return headers;
315
315
  }
316
316
 
317
- function deduceSchema(row, headerRow): ObjectSchema {
318
- const schema: ObjectSchema = headerRow ? {} : [];
317
+ function deduceCSVSchema(row, headerRow): Schema {
318
+ const fields: Schema['fields'] = [];
319
319
  for (let i = 0; i < row.length; i++) {
320
320
  const columnName = (headerRow && headerRow[i]) || i;
321
321
  const value = row[i];
322
322
  switch (typeof value) {
323
323
  case 'number':
324
+ fields.push({name: String(columnName), type: 'float64', nullable: true});
325
+ break;
324
326
  case 'boolean':
325
- // TODO - booleans could be handled differently...
326
- schema[columnName] = {name: String(columnName), index: i, type: Float32Array};
327
+ fields.push({name: String(columnName), type: 'bool', nullable: true});
327
328
  break;
328
329
  case 'string':
330
+ fields.push({name: String(columnName), type: 'utf8', nullable: true});
331
+ break;
329
332
  default:
330
- schema[columnName] = {name: String(columnName), index: i, type: Array};
331
- // We currently only handle numeric rows
332
- // TODO we could offer a function to map strings to numbers?
333
+ log.warn(`CSV: Unknown column type: ${typeof value}`)();
334
+ fields.push({name: String(columnName), type: 'utf8', nullable: true});
333
335
  }
334
336
  }
335
- return schema;
337
+ return {
338
+ fields,
339
+ metadata: {
340
+ 'loaders.gl#format': 'csv',
341
+ 'loaders.gl#loader': 'CSVLoader'
342
+ }
343
+ };
336
344
  }
345
+
346
+ // TODO - remove
347
+ // type ObjectField = {name: string; index: number; type: any};
348
+ // type ObjectSchema = {[key: string]: ObjectField} | ObjectField[];
349
+
350
+ // function deduceObjectSchema(row, headerRow): ObjectSchema {
351
+ // const schema: ObjectSchema = headerRow ? {} : [];
352
+ // for (let i = 0; i < row.length; i++) {
353
+ // const columnName = (headerRow && headerRow[i]) || i;
354
+ // const value = row[i];
355
+ // switch (typeof value) {
356
+ // case 'number':
357
+ // case 'boolean':
358
+ // // TODO - booleans could be handled differently...
359
+ // schema[columnName] = {name: String(columnName), index: i, type: Float32Array};
360
+ // break;
361
+ // case 'string':
362
+ // default:
363
+ // schema[columnName] = {name: String(columnName), index: i, type: Array};
364
+ // // We currently only handle numeric rows
365
+ // // TODO we could offer a function to map strings to numbers?
366
+ // }
367
+ // }
368
+ // return schema;
369
+ // }
package/src/csv-writer.ts CHANGED
@@ -6,6 +6,7 @@
6
6
  import type {WriterWithEncoder, WriterOptions} from '@loaders.gl/loader-utils';
7
7
  import type {Table, TableBatch} from '@loaders.gl/schema';
8
8
  import {encodeTableAsCSV} from './lib/encoders/encode-csv';
9
+ import {CSVFormat} from './csv-format';
9
10
 
10
11
  export type CSVWriterOptions = WriterOptions & {
11
12
  csv?: {
@@ -14,12 +15,8 @@ export type CSVWriterOptions = WriterOptions & {
14
15
  };
15
16
 
16
17
  export const CSVWriter = {
17
- id: 'csv',
18
+ ...CSVFormat,
18
19
  version: 'latest',
19
- module: 'csv',
20
- name: 'CSV',
21
- extensions: ['csv'],
22
- mimeTypes: ['text/csv'],
23
20
  options: {
24
21
  csv: {
25
22
  useDisplayNames: false
package/src/index.ts CHANGED
@@ -7,3 +7,6 @@ export {CSVLoader} from './csv-loader';
7
7
 
8
8
  export type {CSVWriterOptions} from './csv-writer';
9
9
  export {CSVWriter} from './csv-writer';
10
+
11
+ export type {CSVArrowLoaderOptions} from './csv-arrow-loader';
12
+ export {CSVArrowLoader} from './csv-arrow-loader';
@@ -3,7 +3,8 @@
3
3
  // Copyright (c) vis.gl contributors
4
4
  // Copyright 2022 Foursquare Labs, Inc.
5
5
 
6
- import {Table, makeArrayRowIterator, getTableNumCols} from '@loaders.gl/schema';
6
+ import type {Table} from '@loaders.gl/schema';
7
+ import {makeArrayRowIterator, getTableNumCols} from '@loaders.gl/schema-utils';
7
8
  import {csvFormatRows} from 'd3-dsv';
8
9
  import type {CSVWriterOptions} from '../../csv-writer';
9
10
 
@@ -1,11 +1,11 @@
1
- // @ts-nocheck
2
- // A custom papaparse `Streamer` for async iterators
3
- // Ideally this can be contributed back to papaparse
4
- // Or papaparse can expose Streamer API so we can extend without forking.
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
3
+ // Copyright (c) vis.gl contributors
4
+ // Copyright (c) 2015 Matthew Holt
5
5
 
6
- /* eslint-disable no-invalid-this */
6
+ // This is a fork of papaparse v5.0.0-beta.0 under MIT license
7
+ // https://github.com/mholt/PapaParse
7
8
 
8
- // Note: papaparse is not an ES6 module
9
9
  import Papa from './papaparse';
10
10
  const {ChunkStreamer} = Papa;
11
11
 
@@ -0,0 +1,23 @@
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
3
+ // Copyright (c) vis.gl contributors
4
+ // Copyright (c) 2015 Matthew Holt
5
+
6
+ // This is a fork of papaparse v5.0.0-beta.0 under MIT license
7
+ // https://github.com/mholt/PapaParse
8
+
9
+ const BYTE_ORDER_MARK = '\ufeff';
10
+
11
+ export const Papa = {
12
+ RECORD_SEP: String.fromCharCode(30),
13
+ UNIT_SEP: String.fromCharCode(31),
14
+ BYTE_ORDER_MARK,
15
+ BAD_DELIMITERS: ['\r', '\n', '"', BYTE_ORDER_MARK],
16
+ WORKERS_SUPPORTED: false, // !IS_WORKER && !!globalThis.Worker
17
+ NODE_STREAM_INPUT: 1,
18
+
19
+ // Configurable chunk sizes for local and remote files, respectively
20
+ LocalChunkSize: 1024 * 1024 * 10, // 10 M,
21
+ RemoteChunkSize: 1024 * 1024 * 5, // 5 M,
22
+ DefaultDelimiter: ',' // Used if not specified and detection fail,
23
+ };