@loaders.gl/parquet 3.2.0-alpha.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/dist.min.js +31 -15
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/index.js +25 -1
  4. package/dist/es5/index.js.map +1 -1
  5. package/dist/es5/lib/wasm/encode-parquet-wasm.js +56 -0
  6. package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -0
  7. package/dist/es5/lib/wasm/load-wasm/index.js +14 -0
  8. package/dist/es5/lib/wasm/load-wasm/index.js.map +1 -0
  9. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +58 -0
  10. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -0
  11. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +43 -0
  12. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -0
  13. package/dist/es5/lib/wasm/parse-parquet-wasm.js +80 -0
  14. package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -0
  15. package/dist/es5/parquet-loader.js +1 -1
  16. package/dist/es5/parquet-loader.js.map +1 -1
  17. package/dist/es5/parquet-wasm-loader.js +30 -0
  18. package/dist/es5/parquet-wasm-loader.js.map +1 -0
  19. package/dist/es5/parquet-wasm-writer.js +28 -0
  20. package/dist/es5/parquet-wasm-writer.js.map +1 -0
  21. package/dist/es5/parquet-writer.js +1 -1
  22. package/dist/es5/parquet-writer.js.map +1 -1
  23. package/dist/esm/index.js +7 -1
  24. package/dist/esm/index.js.map +1 -1
  25. package/dist/esm/lib/wasm/encode-parquet-wasm.js +16 -0
  26. package/dist/esm/lib/wasm/encode-parquet-wasm.js.map +1 -0
  27. package/dist/esm/lib/wasm/load-wasm/index.js +2 -0
  28. package/dist/esm/lib/wasm/load-wasm/index.js.map +1 -0
  29. package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js +12 -0
  30. package/dist/esm/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -0
  31. package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js +5 -0
  32. package/dist/esm/lib/wasm/load-wasm/load-wasm-node.js.map +1 -0
  33. package/dist/esm/lib/wasm/parse-parquet-wasm.js +25 -0
  34. package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -0
  35. package/dist/esm/parquet-loader.js +1 -1
  36. package/dist/esm/parquet-loader.js.map +1 -1
  37. package/dist/esm/parquet-wasm-loader.js +22 -0
  38. package/dist/esm/parquet-wasm-loader.js.map +1 -0
  39. package/dist/esm/parquet-wasm-writer.js +19 -0
  40. package/dist/esm/parquet-wasm-writer.js.map +1 -0
  41. package/dist/esm/parquet-writer.js +1 -1
  42. package/dist/esm/parquet-writer.js.map +1 -1
  43. package/dist/index.d.ts +18 -1
  44. package/dist/index.d.ts.map +1 -1
  45. package/dist/index.js +10 -1
  46. package/dist/lib/wasm/encode-parquet-wasm.d.ts +21 -0
  47. package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +1 -0
  48. package/dist/lib/wasm/encode-parquet-wasm.js +30 -0
  49. package/dist/lib/wasm/load-wasm/index.d.ts +2 -0
  50. package/dist/lib/wasm/load-wasm/index.d.ts.map +1 -0
  51. package/dist/lib/wasm/load-wasm/index.js +5 -0
  52. package/dist/lib/wasm/load-wasm/load-wasm-browser.d.ts +3 -0
  53. package/dist/lib/wasm/load-wasm/load-wasm-browser.d.ts.map +1 -0
  54. package/dist/lib/wasm/load-wasm/load-wasm-browser.js +34 -0
  55. package/dist/lib/wasm/load-wasm/load-wasm-node.d.ts +3 -0
  56. package/dist/lib/wasm/load-wasm/load-wasm-node.d.ts.map +1 -0
  57. package/dist/lib/wasm/load-wasm/load-wasm-node.js +27 -0
  58. package/dist/lib/wasm/parse-parquet-wasm.d.ts +10 -0
  59. package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -0
  60. package/dist/lib/wasm/parse-parquet-wasm.js +27 -0
  61. package/dist/parquet-wasm-loader.d.ts +23 -0
  62. package/dist/parquet-wasm-loader.d.ts.map +1 -0
  63. package/dist/parquet-wasm-loader.js +27 -0
  64. package/dist/parquet-wasm-writer.d.ts +3 -0
  65. package/dist/parquet-wasm-writer.d.ts.map +1 -0
  66. package/dist/parquet-wasm-writer.js +23 -0
  67. package/dist/parquet-worker.js +32 -16
  68. package/dist/parquet-worker.js.map +3 -3
  69. package/package.json +13 -7
  70. package/src/index.ts +9 -1
  71. package/src/lib/wasm/encode-parquet-wasm.ts +40 -0
  72. package/src/lib/wasm/load-wasm/index.ts +1 -0
  73. package/src/lib/wasm/load-wasm/load-wasm-browser.ts +15 -0
  74. package/src/lib/wasm/load-wasm/load-wasm-node.ts +5 -0
  75. package/src/lib/wasm/parse-parquet-wasm.ts +42 -0
  76. package/src/parquet-wasm-loader.ts +36 -0
  77. package/src/parquet-wasm-writer.ts +24 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loaders.gl/parquet",
3
- "version": "3.2.0-alpha.2",
3
+ "version": "3.2.0",
4
4
  "description": "Framework-independent loader for Apache Parquet files",
5
5
  "license": "MIT",
6
6
  "publishConfig": {
@@ -36,12 +36,13 @@
36
36
  "child_process": false,
37
37
  "net": false,
38
38
  "tls": false,
39
- "lzo": false
39
+ "lzo": false,
40
+ "./src/lib/wasm/load-wasm/load-wasm-node.ts": "./src/lib/wasm/load-wasm/load-wasm-browser.ts"
40
41
  },
41
42
  "dependencies": {
42
- "@loaders.gl/compression": "3.2.0-alpha.2",
43
- "@loaders.gl/loader-utils": "3.2.0-alpha.2",
44
- "@loaders.gl/schema": "3.2.0-alpha.2",
43
+ "@loaders.gl/compression": "3.2.0",
44
+ "@loaders.gl/loader-utils": "3.2.0",
45
+ "@loaders.gl/schema": "3.2.0",
45
46
  "async-mutex": "^0.2.2",
46
47
  "brotli": "^1.3.2",
47
48
  "bson": "^1.0.4",
@@ -50,17 +51,22 @@
50
51
  "lzo": "^0.4.11",
51
52
  "node-int64": "^0.4.0",
52
53
  "object-stream": "0.0.1",
54
+ "parquet-wasm": "^0.3.1",
53
55
  "snappyjs": "^0.6.0",
54
56
  "thrift": "^0.14.2",
55
57
  "varint": "^5.0.0",
56
58
  "zstd-codec": "^0.1"
57
59
  },
60
+ "peerDependencies": {
61
+ "apache-arrow": "*"
62
+ },
58
63
  "devDependencies": {
59
64
  "@types/bson": "^4.0.0",
60
65
  "@types/node": "^10.14.15",
61
66
  "@types/node-int64": "^0.4.29",
62
67
  "@types/thrift": "^0.10.8",
63
- "@types/varint": "^5.0.0"
68
+ "@types/varint": "^5.0.0",
69
+ "apache-arrow": "^4.0.0"
64
70
  },
65
- "gitHead": "52a602739cbfce60fc314f474efc984d199dff78"
71
+ "gitHead": "6660dc9291526752c40063b73a3f9dd97cc0ccc5"
66
72
  }
package/src/index.ts CHANGED
@@ -2,10 +2,12 @@ import type {LoaderWithParser} from '@loaders.gl/loader-utils';
2
2
 
3
3
  // ParquetLoader
4
4
 
5
+ import {ParquetWasmLoader as ParquetWasmWorkerLoader} from './parquet-wasm-loader';
5
6
  import {ParquetLoader as ParquetWorkerLoader} from './parquet-loader';
6
7
  import {parseParquet, parseParquetFileInBatches} from './lib/parse-parquet';
8
+ import {parseParquet as parseParquetWasm} from './lib/wasm/parse-parquet-wasm';
7
9
 
8
- export {ParquetWorkerLoader};
10
+ export {ParquetWorkerLoader, ParquetWasmWorkerLoader};
9
11
 
10
12
  /** ParquetJS table loader */
11
13
  export const ParquetLoader = {
@@ -14,9 +16,15 @@ export const ParquetLoader = {
14
16
  parseFileInBatches: parseParquetFileInBatches
15
17
  };
16
18
 
19
+ export const ParquetWasmLoader = {
20
+ ...ParquetWasmWorkerLoader,
21
+ parse: parseParquetWasm
22
+ };
23
+
17
24
  // ParquetWriter
18
25
 
19
26
  export {ParquetWriter as _ParquetWriter} from './parquet-writer';
27
+ export {ParquetWasmWriter} from './parquet-wasm-writer';
20
28
 
21
29
  // EXPERIMENTAL - expose the internal parquetjs API
22
30
 
@@ -0,0 +1,40 @@
1
+ import type {Table} from 'apache-arrow';
2
+ import type {WriterOptions} from '@loaders.gl/loader-utils';
3
+
4
+ import {RecordBatchStreamWriter} from 'apache-arrow';
5
+ import {loadWasm} from './load-wasm';
6
+
7
+ export type ParquetWriterOptions = WriterOptions & {
8
+ parquet?: {
9
+ wasmUrl?: string;
10
+ };
11
+ };
12
+
13
+ /**
14
+ * Encode Arrow Table to Parquet buffer
15
+ */
16
+ export async function encode(table: Table, options?: ParquetWriterOptions): Promise<ArrayBuffer> {
17
+ const wasmUrl = options?.parquet?.wasmUrl;
18
+ const wasm = await loadWasm(wasmUrl);
19
+
20
+ const arrowIPCBytes = tableToIPC(table);
21
+ // TODO: provide options for how to write table.
22
+ const writerProperties = new wasm.WriterPropertiesBuilder().build();
23
+ const parquetBytes = wasm.writeParquet(arrowIPCBytes, writerProperties);
24
+ return parquetBytes.buffer.slice(
25
+ parquetBytes.byteOffset,
26
+ parquetBytes.byteLength + parquetBytes.byteOffset
27
+ );
28
+ }
29
+
30
+ /**
31
+ * Serialize a {@link Table} to the IPC format. This function is a convenience
32
+ * wrapper for {@link RecordBatchStreamWriter} and {@link RecordBatchFileWriter}.
33
+ * Opposite of {@link tableFromIPC}.
34
+ *
35
+ * @param table The Table to serialize.
36
+ * @param type Whether to serialize the Table as a file or a stream.
37
+ */
38
+ export function tableToIPC(table: Table): Uint8Array {
39
+ return RecordBatchStreamWriter.writeAll(table).toUint8Array(true);
40
+ }
@@ -0,0 +1 @@
1
+ export {loadWasm} from './load-wasm-node';
@@ -0,0 +1,15 @@
1
+ import * as wasmEsm from 'parquet-wasm/esm2/arrow1';
2
+
3
+ let cached: typeof wasmEsm | null = null;
4
+
5
+ export async function loadWasm(wasmUrl?: string) {
6
+ if (cached !== null) {
7
+ return cached;
8
+ }
9
+
10
+ // For ESM bundles, need to await the default export, which loads the WASM
11
+ await wasmEsm.default(wasmUrl);
12
+ cached = wasmEsm;
13
+
14
+ return wasmEsm;
15
+ }
@@ -0,0 +1,5 @@
1
+ import * as wasmNode from 'parquet-wasm/node/arrow1';
2
+
3
+ export async function loadWasm(wasmUrl?: string) {
4
+ return wasmNode;
5
+ }
@@ -0,0 +1,42 @@
1
+ // eslint-disable
2
+ import type {RecordBatch} from 'apache-arrow';
3
+ import type {LoaderOptions} from '@loaders.gl/loader-utils';
4
+ import {Table, RecordBatchStreamReader} from 'apache-arrow';
5
+ import {loadWasm} from './load-wasm/load-wasm-node';
6
+
7
+ export type ParquetLoaderOptions = LoaderOptions & {
8
+ parquet?: {
9
+ type?: 'arrow-table';
10
+ wasmUrl?: string;
11
+ };
12
+ };
13
+
14
+ export async function parseParquet(
15
+ arrayBuffer: ArrayBuffer,
16
+ options?: ParquetLoaderOptions
17
+ ): Promise<Table> {
18
+ const wasmUrl = options?.parquet?.wasmUrl;
19
+ const wasm = await loadWasm(wasmUrl);
20
+
21
+ const arr = new Uint8Array(arrayBuffer);
22
+ const arrowIPCUint8Arr = wasm.readParquet(arr);
23
+ const arrowIPCBuffer = arrowIPCUint8Arr.buffer.slice(
24
+ arrowIPCUint8Arr.byteOffset,
25
+ arrowIPCUint8Arr.byteLength + arrowIPCUint8Arr.byteOffset
26
+ );
27
+ const arrowTable = tableFromIPC(arrowIPCBuffer);
28
+ return arrowTable;
29
+ }
30
+
31
+ /**
32
+ * Deserialize the IPC format into a {@link Table}. This function is a
33
+ * convenience wrapper for {@link RecordBatchReader}. Opposite of {@link tableToIPC}.
34
+ */
35
+ function tableFromIPC(input: ArrayBuffer): Table {
36
+ const reader = RecordBatchStreamReader.from(input);
37
+ const recordBatches: RecordBatch[] = [];
38
+ for (const recordBatch of reader) {
39
+ recordBatches.push(recordBatch);
40
+ }
41
+ return new Table(recordBatches);
42
+ }
@@ -0,0 +1,36 @@
1
+ import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';
2
+
3
+ // __VERSION__ is injected by babel-plugin-version-inline
4
+ // @ts-ignore TS2304: Cannot find name '__VERSION__'.
5
+ const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
6
+
7
+ export type ParquetLoaderOptions = LoaderOptions & {
8
+ parquet?: {
9
+ type?: 'arrow-table';
10
+ wasmUrl?: string;
11
+ };
12
+ };
13
+
14
+ const DEFAULT_PARQUET_LOADER_OPTIONS: ParquetLoaderOptions = {
15
+ parquet: {
16
+ type: 'arrow-table',
17
+ wasmUrl: 'https://unpkg.com/parquet-wasm@0.3.1/esm2/arrow1_bg.wasm'
18
+ }
19
+ };
20
+
21
+ /** ParquetJS table loader */
22
+ export const ParquetWasmLoader = {
23
+ name: 'Apache Parquet',
24
+ id: 'parquet-wasm',
25
+ module: 'parquet',
26
+ version: VERSION,
27
+ worker: false,
28
+ category: 'table',
29
+ extensions: ['parquet'],
30
+ mimeTypes: ['application/octet-stream'],
31
+ binary: true,
32
+ tests: ['PAR1', 'PARE'],
33
+ options: DEFAULT_PARQUET_LOADER_OPTIONS
34
+ };
35
+
36
+ export const _typecheckParquetLoader: Loader = ParquetWasmLoader;
@@ -0,0 +1,24 @@
1
+ import type {Writer} from '@loaders.gl/loader-utils';
2
+ import {encode, ParquetWriterOptions} from './lib/wasm/encode-parquet-wasm';
3
+
4
+ // __VERSION__ is injected by babel-plugin-version-inline
5
+ // @ts-ignore TS2304: Cannot find name '__VERSION__'.
6
+ const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
7
+
8
+ const DEFAULT_PARQUET_WRITER_OPTIONS: ParquetWriterOptions = {
9
+ parquet: {
10
+ wasmUrl: 'https://unpkg.com/parquet-wasm@0.3.1/esm2/arrow1_bg.wasm'
11
+ }
12
+ };
13
+
14
+ export const ParquetWasmWriter: Writer = {
15
+ name: 'Apache Parquet',
16
+ id: 'parquet-wasm',
17
+ module: 'parquet',
18
+ version: VERSION,
19
+ extensions: ['parquet'],
20
+ mimeTypes: ['application/octet-stream'],
21
+ encode,
22
+ binary: true,
23
+ options: DEFAULT_PARQUET_WRITER_OPTIONS
24
+ };