@loaders.gl/parquet 4.3.0-alpha.1 → 4.3.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/dist/index.cjs +100 -24
  2. package/dist/index.cjs.map +4 -4
  3. package/dist/lib/constants.d.ts +1 -1
  4. package/dist/lib/constants.d.ts.map +1 -1
  5. package/dist/lib/constants.js +2 -2
  6. package/dist/lib/encoders/encode-parquet-wasm.d.ts.map +1 -0
  7. package/dist/lib/{wasm → encoders}/encode-parquet-wasm.js +1 -1
  8. package/dist/lib/parsers/parse-parquet-wasm.d.ts +10 -0
  9. package/dist/lib/parsers/parse-parquet-wasm.d.ts.map +1 -0
  10. package/dist/lib/parsers/parse-parquet-wasm.js +51 -0
  11. package/dist/lib/utils/load-wasm.d.ts +3 -0
  12. package/dist/lib/utils/load-wasm.d.ts.map +1 -0
  13. package/dist/lib/utils/make-stream-iterator.d.ts +11 -0
  14. package/dist/lib/utils/make-stream-iterator.d.ts.map +1 -0
  15. package/dist/lib/utils/make-stream-iterator.js +67 -0
  16. package/dist/parquet-loader.js +1 -1
  17. package/dist/parquet-wasm-loader.d.ts +17 -5
  18. package/dist/parquet-wasm-loader.d.ts.map +1 -1
  19. package/dist/parquet-wasm-loader.js +19 -4
  20. package/dist/parquet-wasm-writer.js +1 -1
  21. package/dist/parquet-writer.js +1 -1
  22. package/dist/parquet_wasm_bg.wasm +0 -0
  23. package/dist/parquetjs/codecs/rle.d.ts.map +1 -1
  24. package/dist/parquetjs/codecs/rle.js +1 -0
  25. package/package.json +12 -12
  26. package/src/lib/constants.ts +2 -1
  27. package/src/lib/{wasm → encoders}/encode-parquet-wasm.ts +1 -1
  28. package/src/lib/parsers/parse-parquet-wasm.ts +72 -0
  29. package/src/lib/utils/make-stream-iterator.ts +87 -0
  30. package/src/parquet-wasm-loader.ts +36 -9
  31. package/src/parquet-wasm-writer.ts +1 -1
  32. package/src/parquetjs/codecs/rle.ts +3 -1
  33. package/dist/arrow1_bg.wasm +0 -0
  34. package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +0 -1
  35. package/dist/lib/wasm/load-wasm.d.ts +0 -3
  36. package/dist/lib/wasm/load-wasm.d.ts.map +0 -1
  37. package/dist/lib/wasm/parse-parquet-wasm.d.ts +0 -4
  38. package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +0 -1
  39. package/dist/lib/wasm/parse-parquet-wasm.js +0 -24
  40. package/src/lib/wasm/parse-parquet-wasm.ts +0 -33
  41. package/src/lib/wip/convert-schema-deep.java.disabled +0 -910
  42. package/src/lib/wip/convert-schema-deep.rs.disabled +0 -976
  43. /package/dist/lib/{wasm → encoders}/encode-parquet-wasm.d.ts +0 -0
  44. /package/dist/lib/{wasm → utils}/load-wasm.js +0 -0
  45. /package/src/lib/{wasm → utils}/load-wasm.ts +0 -0
@@ -2,16 +2,26 @@
2
2
  // SPDX-License-Identifier: MIT
3
3
  // Copyright (c) vis.gl contributors
4
4
 
5
+ import type {ArrowTable, ArrowTableBatch} from '@loaders.gl/schema';
5
6
  import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
6
- import type {ArrowTable} from '@loaders.gl/arrow';
7
+ import {ReadableFile, BlobFile} from '@loaders.gl/loader-utils';
7
8
 
8
- import {parseParquetWasm} from './lib/wasm/parse-parquet-wasm';
9
+ import {
10
+ parseParquetFileWasm,
11
+ parseParquetFileInBatchesWasm
12
+ } from './lib/parsers/parse-parquet-wasm';
9
13
  import {VERSION, PARQUET_WASM_URL} from './lib/constants';
10
14
 
11
15
  /** Parquet WASM loader options */
12
16
  export type ParquetWasmLoaderOptions = LoaderOptions & {
13
17
  parquet?: {
14
- type?: 'arrow-table';
18
+ shape?: 'arrow-table';
19
+ limit?: number; // Provide a limit to the number of rows to be read.
20
+ offset?: number; // Provide an offset to skip over the given number of rows.
21
+ batchSize?: number; // The number of rows in each batch. If not provided, the upstream parquet default is 1024.
22
+ columns?: string[]; // The column names from the file to read.
23
+ rowGroups?: number[]; // Only read data from the provided row group indexes.
24
+ concurrency?: number; // The number of concurrent requests to make
15
25
  wasmUrl?: string;
16
26
  };
17
27
  };
@@ -19,7 +29,7 @@ export type ParquetWasmLoaderOptions = LoaderOptions & {
19
29
  /** Parquet WASM table loader */
20
30
  export const ParquetWasmWorkerLoader = {
21
31
  dataType: null as unknown as ArrowTable,
22
- batchType: null as never,
32
+ batchType: null as unknown as ArrowTableBatch,
23
33
 
24
34
  name: 'Apache Parquet',
25
35
  id: 'parquet-wasm',
@@ -33,17 +43,34 @@ export const ParquetWasmWorkerLoader = {
33
43
  tests: ['PAR1', 'PARE'],
34
44
  options: {
35
45
  parquet: {
36
- type: 'arrow-table',
46
+ shape: 'arrow-table',
47
+ limit: undefined, // Provide a limit to the number of rows to be read.
48
+ offset: 0, // Provide an offset to skip over the given number of rows.
49
+ batchSize: undefined, // The number of rows in each batch. If not provided, the upstream parquet default is 1024.
50
+ columns: undefined, // The column names from the file to read.
51
+ rowGroups: undefined, // Only read data from the provided row group indexes.
52
+ concurrency: undefined, // The number of concurrent requests to make
37
53
  wasmUrl: PARQUET_WASM_URL
38
54
  }
39
55
  }
40
- } as const satisfies Loader<ArrowTable, never, ParquetWasmLoaderOptions>;
56
+ } as const satisfies Loader<ArrowTable, ArrowTableBatch, ParquetWasmLoaderOptions>;
41
57
 
42
58
  /** Parquet WASM table loader */
43
59
  export const ParquetWasmLoader = {
44
60
  ...ParquetWasmWorkerLoader,
61
+
45
62
  parse(arrayBuffer: ArrayBuffer, options?: ParquetWasmLoaderOptions) {
46
- options = {parquet: {...ParquetWasmLoader.options.parquet, ...options?.parquet}, ...options};
47
- return parseParquetWasm(arrayBuffer, options);
63
+ const wasmOptions = {...ParquetWasmLoader.options.parquet, ...options?.parquet};
64
+ return parseParquetFileWasm(new BlobFile(arrayBuffer), wasmOptions);
65
+ },
66
+
67
+ parseFile(file: ReadableFile, options?: ParquetWasmLoaderOptions) {
68
+ const wasmOptions = {...ParquetWasmLoader.options.parquet, ...options?.parquet};
69
+ return parseParquetFileWasm(file, wasmOptions);
70
+ },
71
+
72
+ parseFileInBatches(file: ReadableFile, options?: ParquetWasmLoaderOptions) {
73
+ const wasmOptions = {...ParquetWasmLoader.options.parquet, ...options?.parquet};
74
+ return parseParquetFileInBatchesWasm(file, wasmOptions);
48
75
  }
49
- } as const satisfies LoaderWithParser<ArrowTable, never, ParquetWasmLoaderOptions>;
76
+ } as const satisfies LoaderWithParser<ArrowTable, ArrowTableBatch, ParquetWasmLoaderOptions>;
@@ -4,7 +4,7 @@
4
4
 
5
5
  import type {WriterWithEncoder} from '@loaders.gl/loader-utils';
6
6
  import type {ArrowTable} from '@loaders.gl/arrow';
7
- import {encode} from './lib/wasm/encode-parquet-wasm';
7
+ import {encode} from './lib/encoders/encode-parquet-wasm';
8
8
  import type {WriterOptions} from '@loaders.gl/loader-utils';
9
9
 
10
10
  import {VERSION, PARQUET_WASM_URL} from './lib/constants';
@@ -97,7 +97,9 @@ export function decodeValues(
97
97
  const count = header >> 1;
98
98
  decodedValues = decodeRunRepeated(cursor, count, opts);
99
99
  }
100
- for (const value of decodedValues) {
100
+
101
+ // strange failure in docusaurus / webpack if we don't cast the type here
102
+ for (const value of decodedValues as any[]) {
101
103
  values.push(value);
102
104
  }
103
105
  }
Binary file
@@ -1 +0,0 @@
1
- {"version":3,"file":"encode-parquet-wasm.d.ts","sourceRoot":"","sources":["../../../src/lib/wasm/encode-parquet-wasm.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,mBAAmB,CAAC;AAKlD,OAAO,KAAK,EAAC,oBAAoB,EAAC,qCAAkC;AAEpE;;GAEG;AACH,wBAAsB,MAAM,CAC1B,KAAK,EAAE,UAAU,EACjB,OAAO,EAAE,oBAAoB,GAC5B,OAAO,CAAC,WAAW,CAAC,CAsBtB"}
@@ -1,3 +0,0 @@
1
- import initWasm from 'parquet-wasm';
2
- export declare function loadWasm(wasmUrl?: string): Promise<typeof initWasm>;
3
- //# sourceMappingURL=load-wasm.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"load-wasm.d.ts","sourceRoot":"","sources":["../../../src/lib/wasm/load-wasm.ts"],"names":[],"mappings":"AAKA,OAAO,QAAQ,MAAM,cAAc,CAAC;AAMpC,wBAAsB,QAAQ,CAAC,OAAO,GAAE,MAAyB,4BAUhE"}
@@ -1,4 +0,0 @@
1
- import type { ArrowTable } from '@loaders.gl/arrow';
2
- import type { ParquetWasmLoaderOptions } from "../../parquet-wasm-loader.js";
3
- export declare function parseParquetWasm(arrayBuffer: ArrayBuffer, options: ParquetWasmLoaderOptions): Promise<ArrowTable>;
4
- //# sourceMappingURL=parse-parquet-wasm.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"parse-parquet-wasm.d.ts","sourceRoot":"","sources":["../../../src/lib/wasm/parse-parquet-wasm.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,mBAAmB,CAAC;AAElD,OAAO,KAAK,EAAC,wBAAwB,EAAC,qCAAkC;AAIxE,wBAAsB,gBAAgB,CACpC,WAAW,EAAE,WAAW,EACxB,OAAO,EAAE,wBAAwB,GAChC,OAAO,CAAC,UAAU,CAAC,CAkBrB"}
@@ -1,24 +0,0 @@
1
- // loaders.gl
2
- // SPDX-License-Identifier: MIT
3
- // Copyright (c) vis.gl contributors
4
- import { serializeArrowSchema } from '@loaders.gl/arrow';
5
- import { loadWasm } from "./load-wasm.js";
6
- import * as arrow from 'apache-arrow';
7
- export async function parseParquetWasm(arrayBuffer, options) {
8
- const arr = new Uint8Array(arrayBuffer);
9
- const wasmUrl = options?.parquet?.wasmUrl;
10
- const wasm = await loadWasm(wasmUrl);
11
- const wasmTable = wasm.readParquet(arr);
12
- try {
13
- const ipcStream = wasmTable.intoIPCStream();
14
- const arrowTable = arrow.tableFromIPC(ipcStream);
15
- return {
16
- shape: 'arrow-table',
17
- schema: serializeArrowSchema(arrowTable.schema),
18
- data: arrowTable
19
- };
20
- }
21
- finally {
22
- // wasmTable.free();
23
- }
24
- }
@@ -1,33 +0,0 @@
1
- // loaders.gl
2
- // SPDX-License-Identifier: MIT
3
- // Copyright (c) vis.gl contributors
4
-
5
- // eslint-disable
6
- import type {ArrowTable} from '@loaders.gl/arrow';
7
- import {serializeArrowSchema} from '@loaders.gl/arrow';
8
- import type {ParquetWasmLoaderOptions} from '../../parquet-wasm-loader';
9
- import {loadWasm} from './load-wasm';
10
- import * as arrow from 'apache-arrow';
11
-
12
- export async function parseParquetWasm(
13
- arrayBuffer: ArrayBuffer,
14
- options: ParquetWasmLoaderOptions
15
- ): Promise<ArrowTable> {
16
- const arr = new Uint8Array(arrayBuffer);
17
-
18
- const wasmUrl = options?.parquet?.wasmUrl;
19
- const wasm = await loadWasm(wasmUrl);
20
- const wasmTable = wasm.readParquet(arr);
21
- try {
22
- const ipcStream = wasmTable.intoIPCStream();
23
- const arrowTable = arrow.tableFromIPC(ipcStream);
24
-
25
- return {
26
- shape: 'arrow-table',
27
- schema: serializeArrowSchema(arrowTable.schema),
28
- data: arrowTable
29
- };
30
- } finally {
31
- // wasmTable.free();
32
- }
33
- }