@loaders.gl/parquet 4.0.4 → 4.1.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/dist/index.cjs +138 -66
  2. package/dist/index.d.ts +1 -8
  3. package/dist/index.d.ts.map +1 -1
  4. package/dist/index.js +1 -21
  5. package/dist/index.js.map +1 -1
  6. package/dist/lib/parsers/get-parquet-schema.d.ts.map +1 -1
  7. package/dist/lib/parsers/get-parquet-schema.js.map +1 -1
  8. package/dist/lib/parsers/parse-geoparquet.d.ts +6 -0
  9. package/dist/lib/parsers/parse-geoparquet.d.ts.map +1 -0
  10. package/dist/lib/parsers/parse-geoparquet.js +56 -0
  11. package/dist/lib/parsers/parse-geoparquet.js.map +1 -0
  12. package/dist/lib/parsers/parse-parquet-to-columns.d.ts +6 -0
  13. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
  14. package/dist/lib/parsers/parse-parquet-to-columns.js.map +1 -1
  15. package/dist/lib/parsers/parse-parquet.d.ts +17 -0
  16. package/dist/lib/parsers/parse-parquet.d.ts.map +1 -0
  17. package/dist/lib/parsers/{parse-parquet-to-rows.js → parse-parquet.js} +2 -8
  18. package/dist/lib/parsers/parse-parquet.js.map +1 -0
  19. package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +1 -1
  20. package/dist/lib/wasm/encode-parquet-wasm.js.map +1 -1
  21. package/dist/lib/wasm/load-wasm-browser.d.ts.map +1 -1
  22. package/dist/lib/wasm/load-wasm-browser.js.map +1 -1
  23. package/dist/lib/wasm/load-wasm-node.d.ts.map +1 -1
  24. package/dist/lib/wasm/load-wasm-node.js.map +1 -1
  25. package/dist/lib/wasm/load-wasm.d.ts.map +1 -1
  26. package/dist/lib/wasm/load-wasm.js.map +1 -1
  27. package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -1
  28. package/dist/lib/wasm/parse-parquet-wasm.js.map +1 -1
  29. package/dist/parquet-loader.d.ts +14 -4
  30. package/dist/parquet-loader.d.ts.map +1 -1
  31. package/dist/parquet-loader.js +50 -4
  32. package/dist/parquet-loader.js.map +1 -1
  33. package/dist/parquet-wasm-loader.d.ts +4 -2
  34. package/dist/parquet-wasm-loader.d.ts.map +1 -1
  35. package/dist/parquet-wasm-loader.js +6 -1
  36. package/dist/parquet-wasm-loader.js.map +1 -1
  37. package/dist/workers/parquet-worker.js +1 -1
  38. package/dist/workers/parquet-worker.js.map +1 -1
  39. package/package.json +15 -12
  40. package/src/index.ts +7 -60
  41. package/src/lib/parsers/get-parquet-schema.ts +3 -0
  42. package/src/lib/parsers/parse-geoparquet.ts +87 -0
  43. package/src/lib/parsers/parse-parquet-to-columns.ts +6 -0
  44. package/src/lib/parsers/{parse-parquet-to-rows.ts → parse-parquet.ts} +20 -21
  45. package/src/lib/wasm/encode-parquet-wasm.ts +3 -0
  46. package/src/lib/wasm/load-wasm-browser.ts +3 -0
  47. package/src/lib/wasm/load-wasm-node.ts +3 -0
  48. package/src/lib/wasm/load-wasm.ts +3 -0
  49. package/src/lib/wasm/parse-parquet-wasm.ts +3 -0
  50. package/src/parquet-loader.ts +89 -9
  51. package/src/parquet-wasm-loader.ts +10 -2
  52. package/src/workers/parquet-worker.ts +1 -1
  53. package/dist/lib/parsers/parse-parquet-to-rows.d.ts +0 -6
  54. package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +0 -1
  55. package/dist/lib/parsers/parse-parquet-to-rows.js.map +0 -1
@@ -1,13 +1,23 @@
1
1
  // loaders.gl, MIT license
2
2
  // Copyright (c) vis.gl contributors
3
3
 
4
- import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';
4
+ import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
5
5
  import type {
6
6
  ObjectRowTable,
7
7
  ObjectRowTableBatch,
8
+ GeoJSONTable,
9
+ GeoJSONTableBatch,
8
10
  ColumnarTable,
9
11
  ColumnarTableBatch
10
12
  } from '@loaders.gl/schema';
13
+ import {BlobFile} from '@loaders.gl/loader-utils';
14
+
15
+ import {parseParquetFile, parseParquetFileInBatches} from './lib/parsers/parse-parquet';
16
+ import {parseGeoParquetFile, parseGeoParquetFileInBatches} from './lib/parsers/parse-geoparquet';
17
+ import {
18
+ parseParquetFileInColumns,
19
+ parseParquetFileInColumnarBatches
20
+ } from './lib/parsers/parse-parquet-to-columns';
11
21
 
12
22
  export {Buffer} from './polyfills/buffer/install-buffer-polyfill';
13
23
 
@@ -32,8 +42,14 @@ export type ParquetLoaderOptions = LoaderOptions & {
32
42
  };
33
43
  };
34
44
 
35
- /** ParquetJS table loader */
36
- export const ParquetLoader: Loader<ObjectRowTable, ObjectRowTableBatch, ParquetLoaderOptions> = {
45
+ /**
46
+ * ParquetJS table loader
47
+ */
48
+ export const ParquetWorkerLoader: Loader<
49
+ ObjectRowTable,
50
+ ObjectRowTableBatch,
51
+ ParquetLoaderOptions
52
+ > = {
37
53
  name: 'Apache Parquet',
38
54
  id: 'parquet',
39
55
  module: 'parquet',
@@ -55,7 +71,63 @@ export const ParquetLoader: Loader<ObjectRowTable, ObjectRowTableBatch, ParquetL
55
71
  }
56
72
  };
57
73
 
58
- export const ParquetColumnarLoader: Loader<
74
+ /** ParquetJS table loader */
75
+ export const ParquetLoader: LoaderWithParser<
76
+ ObjectRowTable | GeoJSONTable,
77
+ ObjectRowTableBatch | GeoJSONTableBatch,
78
+ ParquetLoaderOptions
79
+ > = {
80
+ ...ParquetWorkerLoader,
81
+ parse: (arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) =>
82
+ parseParquetFile(new BlobFile(arrayBuffer), options),
83
+
84
+ parseFile: parseParquetFile,
85
+ parseFileInBatches: parseParquetFileInBatches
86
+ };
87
+
88
+ // Defeat tree shaking
89
+ // @ts-ignore
90
+ ParquetLoader.Buffer = Buffer;
91
+
92
+ export const GeoParquetWorkerLoader: Loader<GeoJSONTable, GeoJSONTableBatch, ParquetLoaderOptions> =
93
+ {
94
+ name: 'Apache Parquet',
95
+ id: 'parquet',
96
+ module: 'parquet',
97
+ version: VERSION,
98
+ worker: true,
99
+ category: 'table',
100
+ extensions: ['parquet'],
101
+ mimeTypes: ['application/octet-stream'],
102
+ binary: true,
103
+ tests: ['PAR1', 'PARE'],
104
+ options: {
105
+ parquet: {
106
+ shape: 'geojson-table',
107
+ columnList: [],
108
+ geoparquet: true,
109
+ url: undefined,
110
+ preserveBinary: false
111
+ }
112
+ }
113
+ };
114
+
115
+ /** ParquetJS table loader */
116
+ export const GeoParquetLoader: LoaderWithParser<
117
+ ObjectRowTable | GeoJSONTable,
118
+ ObjectRowTableBatch | GeoJSONTableBatch,
119
+ ParquetLoaderOptions
120
+ > = {
121
+ ...GeoParquetWorkerLoader,
122
+ parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
123
+ return parseGeoParquetFile(new BlobFile(arrayBuffer), options);
124
+ },
125
+ parseFile: parseGeoParquetFile,
126
+ parseFileInBatches: parseGeoParquetFileInBatches
127
+ };
128
+
129
+ /** @deprecated Test to see if we can improve perf of parquetjs loader */
130
+ export const ParquetColumnarWorkerLoader: Loader<
59
131
  ColumnarTable,
60
132
  ColumnarTableBatch,
61
133
  ParquetLoaderOptions
@@ -73,8 +145,16 @@ export const ParquetColumnarLoader: Loader<
73
145
  options: ParquetLoader.options
74
146
  };
75
147
 
76
- // Defeat tree shaking
77
- // @ts-ignore
78
- ParquetLoader.Buffer = Buffer;
79
- // @ts-ignore
80
- ParquetColumnarLoader.Buffer = Buffer;
148
+ /** @deprecated Test to see if we can improve perf of parquetjs loader */
149
+ export const ParquetColumnarLoader: LoaderWithParser<
150
+ ColumnarTable,
151
+ ColumnarTableBatch,
152
+ ParquetLoaderOptions
153
+ > = {
154
+ ...ParquetColumnarWorkerLoader,
155
+ parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
156
+ return parseParquetFileInColumns(new BlobFile(arrayBuffer), options);
157
+ },
158
+ parseFile: parseParquetFileInColumns,
159
+ parseFileInBatches: parseParquetFileInColumnarBatches
160
+ };
@@ -1,9 +1,11 @@
1
1
  // loaders.gl, MIT license
2
2
  // Copyright (c) vis.gl contributors
3
3
 
4
- import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';
4
+ import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
5
5
  import type {ArrowTable} from '@loaders.gl/arrow';
6
6
 
7
+ import {parseParquetWasm} from './lib/wasm/parse-parquet-wasm';
8
+
7
9
  // __VERSION__ is injected by babel-plugin-version-inline
8
10
  // @ts-ignore TS2304: Cannot find name '__VERSION__'.
9
11
  const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
@@ -17,7 +19,7 @@ export type ParquetWasmLoaderOptions = LoaderOptions & {
17
19
  };
18
20
 
19
21
  /** Parquet WASM table loader */
20
- export const ParquetWasmLoader: Loader<ArrowTable, never, ParquetWasmLoaderOptions> = {
22
+ export const ParquetWasmWorkerLoader: Loader<ArrowTable, never, ParquetWasmLoaderOptions> = {
21
23
  name: 'Apache Parquet',
22
24
  id: 'parquet-wasm',
23
25
  module: 'parquet',
@@ -35,3 +37,9 @@ export const ParquetWasmLoader: Loader<ArrowTable, never, ParquetWasmLoaderOptio
35
37
  }
36
38
  }
37
39
  };
40
+
41
+ /** Parquet WASM table loader */
42
+ export const ParquetWasmLoader: LoaderWithParser<ArrowTable, never, ParquetWasmLoaderOptions> = {
43
+ ...ParquetWasmWorkerLoader,
44
+ parse: parseParquetWasm
45
+ };
@@ -2,6 +2,6 @@
2
2
  // Copyright (c) vis.gl contributors
3
3
 
4
4
  import {createLoaderWorker} from '@loaders.gl/loader-utils';
5
- import {ParquetLoader} from '../index';
5
+ import {ParquetLoader} from '../parquet-loader';
6
6
 
7
7
  createLoaderWorker(ParquetLoader);
@@ -1,6 +0,0 @@
1
- import type { ReadableFile } from '@loaders.gl/loader-utils';
2
- import type { GeoJSONTable, GeoJSONTableBatch, ObjectRowTable, ObjectRowTableBatch } from '@loaders.gl/schema';
3
- import type { ParquetLoaderOptions } from '../../parquet-loader';
4
- export declare function parseParquetFile(file: ReadableFile, options?: ParquetLoaderOptions): Promise<ObjectRowTable | GeoJSONTable>;
5
- export declare function parseParquetFileInBatches(file: ReadableFile, options?: ParquetLoaderOptions): AsyncIterable<ObjectRowTableBatch | GeoJSONTableBatch>;
6
- //# sourceMappingURL=parse-parquet-to-rows.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"parse-parquet-to-rows.d.ts","sourceRoot":"","sources":["../../../src/lib/parsers/parse-parquet-to-rows.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,0BAA0B,CAAC;AAC3D,OAAO,KAAK,EACV,YAAY,EACZ,iBAAiB,EACjB,cAAc,EACd,mBAAmB,EACpB,MAAM,oBAAoB,CAAC;AAI5B,OAAO,KAAK,EAAC,oBAAoB,EAAC,MAAM,sBAAsB,CAAC;AAM/D,wBAAsB,gBAAgB,CACpC,IAAI,EAAE,YAAY,EAClB,OAAO,CAAC,EAAE,oBAAoB,GAC7B,OAAO,CAAC,cAAc,GAAG,YAAY,CAAC,CA0BxC;AAED,wBAAuB,yBAAyB,CAC9C,IAAI,EAAE,YAAY,EAClB,OAAO,CAAC,EAAE,oBAAoB,GAC7B,aAAa,CAAC,mBAAmB,GAAG,iBAAiB,CAAC,CAuBxD"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"parse-parquet-to-rows.js","names":["convertWKBTableToGeoJSON","WKTLoader","WKBLoader","ParquetReader","getSchemaFromParquetReader","installBufferPolyfill","parseParquetFile","file","options","_options$parquet","_options$parquet2","reader","preserveBinary","parquet","schema","rows","rowBatches","rowBatchIterator","rowBatch","row","push","objectRowTable","shape","data","convertTable","parseParquetFileInBatches","_options$parquet3","_options$parquet4","table","batchType","length","error","Error"],"sources":["../../../src/lib/parsers/parse-parquet-to-rows.ts"],"sourcesContent":["// import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';\n// import {ColumnarTableBatch} from '@loaders.gl/schema';\nimport type {ReadableFile} from '@loaders.gl/loader-utils';\nimport type {\n GeoJSONTable,\n GeoJSONTableBatch,\n ObjectRowTable,\n ObjectRowTableBatch\n} from '@loaders.gl/schema';\nimport {convertWKBTableToGeoJSON} from '@loaders.gl/gis';\nimport {WKTLoader, WKBLoader} from '@loaders.gl/wkt';\n\nimport type {ParquetLoaderOptions} from '../../parquet-loader';\nimport type {ParquetRow} from '../../parquetjs/schema/declare';\nimport {ParquetReader} from '../../parquetjs/parser/parquet-reader';\nimport {getSchemaFromParquetReader} from './get-parquet-schema';\nimport {installBufferPolyfill} from '../../polyfills/buffer';\n\nexport async function parseParquetFile(\n file: ReadableFile,\n options?: ParquetLoaderOptions\n): Promise<ObjectRowTable | GeoJSONTable> {\n installBufferPolyfill();\n\n const reader = new ParquetReader(file, {\n preserveBinary: options?.parquet?.preserveBinary\n });\n\n const schema = await getSchemaFromParquetReader(reader);\n\n const rows: ParquetRow[] = [];\n\n const rowBatches = reader.rowBatchIterator(options?.parquet);\n for await (const rowBatch of rowBatches) {\n // we have only one input batch so return\n for (const row of rowBatch) {\n rows.push(row);\n }\n }\n const objectRowTable: ObjectRowTable = {\n shape: 'object-row-table',\n schema,\n data: rows\n };\n\n const shape = options?.parquet?.shape;\n return convertTable(objectRowTable, shape);\n}\n\nexport async function* parseParquetFileInBatches(\n file: ReadableFile,\n options?: ParquetLoaderOptions\n): AsyncIterable<ObjectRowTableBatch | GeoJSONTableBatch> {\n const reader = new ParquetReader(file, {\n preserveBinary: options?.parquet?.preserveBinary\n });\n\n const schema = await getSchemaFromParquetReader(reader);\n const rowBatches = reader.rowBatchIterator(options?.parquet);\n for await (const rows of rowBatches) {\n const objectRowTable: ObjectRowTable = {\n shape: 'object-row-table',\n schema,\n data: rows\n };\n const shape = options?.parquet?.shape;\n const table = convertTable(objectRowTable, shape);\n\n yield {\n batchType: 'data',\n schema,\n ...table,\n length: rows.length\n };\n }\n}\n\nfunction convertTable(\n objectRowTable: ObjectRowTable,\n shape?: 'object-row-table' | 'geojson-table'\n): ObjectRowTable | GeoJSONTable {\n switch (shape) {\n case 'object-row-table':\n return objectRowTable;\n\n case 'geojson-table':\n try {\n return convertWKBTableToGeoJSON(objectRowTable, objectRowTable.schema!, [\n WKTLoader,\n WKBLoader\n ]);\n } catch (error) {\n return objectRowTable;\n }\n\n default:\n throw new Error(shape);\n }\n}\n"],"mappings":"AASA,SAAQA,wBAAwB,QAAO,iBAAiB;AACxD,SAAQC,SAAS,EAAEC,SAAS,QAAO,iBAAiB;AAAC,SAI7CC,aAAa;AAAA,SACbC,0BAA0B;AAAA,SAC1BC,qBAAqB;AAE7B,OAAO,eAAeC,gBAAgBA,CACpCC,IAAkB,EAClBC,OAA8B,EACU;EAAA,IAAAC,gBAAA,EAAAC,iBAAA;EACxCL,qBAAqB,CAAC,CAAC;EAEvB,MAAMM,MAAM,GAAG,IAAIR,aAAa,CAACI,IAAI,EAAE;IACrCK,cAAc,EAAEJ,OAAO,aAAPA,OAAO,wBAAAC,gBAAA,GAAPD,OAAO,CAAEK,OAAO,cAAAJ,gBAAA,uBAAhBA,gBAAA,CAAkBG;EACpC,CAAC,CAAC;EAEF,MAAME,MAAM,GAAG,MAAMV,0BAA0B,CAACO,MAAM,CAAC;EAEvD,MAAMI,IAAkB,GAAG,EAAE;EAE7B,MAAMC,UAAU,GAAGL,MAAM,CAACM,gBAAgB,CAACT,OAAO,aAAPA,OAAO,uBAAPA,OAAO,CAAEK,OAAO,CAAC;EAC5D,WAAW,MAAMK,QAAQ,IAAIF,UAAU,EAAE;IAEvC,KAAK,MAAMG,GAAG,IAAID,QAAQ,EAAE;MAC1BH,IAAI,CAACK,IAAI,CAACD,GAAG,CAAC;IAChB;EACF;EACA,MAAME,cAA8B,GAAG;IACrCC,KAAK,EAAE,kBAAkB;IACzBR,MAAM;IACNS,IAAI,EAAER;EACR,CAAC;EAED,MAAMO,KAAK,GAAGd,OAAO,aAAPA,OAAO,wBAAAE,iBAAA,GAAPF,OAAO,CAAEK,OAAO,cAAAH,iBAAA,uBAAhBA,iBAAA,CAAkBY,KAAK;EACrC,OAAOE,YAAY,CAACH,cAAc,EAAEC,KAAK,CAAC;AAC5C;AAEA,OAAO,gBAAgBG,yBAAyBA,CAC9ClB,IAAkB,EAClBC,OAA8B,EAC0B;EAAA,IAAAkB,iBAAA;EACxD,MAAMf,MAAM,GAAG,IAAIR,aAAa,CAACI,IAAI,EAAE;IACrCK,cAAc,EAAEJ,OAAO,aAAPA,OAAO,wBAAAkB,iBAAA,GAAPlB,OAAO,CAAEK,OAAO,cAAAa,iBAAA,uBAAhBA,iBAAA,CAAkBd;EACpC,CAAC,CAAC;EAEF,MAAME,MAAM,GAAG,MAAMV,0BAA0B,CAACO,MAAM,CAAC;EACvD,MAAMK,UAAU,GAAGL,MAAM,CAACM,gBAAgB,CAACT,OAAO,aAAPA,OAAO,uBAAPA,OAAO,CAAEK,OAAO,CAAC;EAC5D,WAAW,MAAME,IAAI,IAAIC,UAAU,EAAE;IAAA,IAAAW,iBAAA;IACnC,MAAMN,cAA8B,GAAG;MACrCC,KAAK,EAAE,kBAAkB;MACzBR,MAAM;MACNS,IAAI,EAAER;IACR,CAAC;IACD,MAAMO,KAAK,GAAGd,OAAO,aAAPA,OAAO,wBAAAmB,iBAAA,GAAPnB,OAAO,CAAEK,OAAO,cAAAc,iBAAA,uBAAhBA,iBAAA,CAAkBL,KAAK;IACrC,MAAMM,KAAK,GAAGJ,YAAY,CAACH,cAAc,EAAEC,KAAK,CAAC;IAEjD,MAAM;MACJO,SAAS,EAAE,MAAM;MACjBf,MAAM;MACN,GAAGc,KAAK;MACRE,MAAM,EAAEf,IAAI,CAACe;IACf,CAAC;EACH;AACF;AAEA,SAASN,YAAYA,CACnBH,cAA8B,EAC9BC,KAA4C,EACb;EAC/B,QAAQA,KAAK;IACX,KAAK,kBAAkB;MACrB,OAAOD,cAAc;IAEvB,KAAK,eAAe;MAClB,IAAI;QACF,OAAOrB,wBAAwB,CAACqB,cAAc,EAAEA,cAAc,CAACP,MAAM,EAAG,CACtEb,SAAS,EACTC,SAAS,CACV,CAAC;MACJ,CAAC,CAAC,OAAO6B,KAAK,EAAE;QACd,OAAOV,cAAc;MACvB;IAEF;MACE,MAAM,IAAIW,KAAK,CAACV,KAAK,CAAC;EAC1B;AACF"}