@loaders.gl/parquet 4.0.3 → 4.1.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +138 -66
- package/dist/index.d.ts +1 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -21
- package/dist/index.js.map +1 -1
- package/dist/lib/parsers/get-parquet-schema.d.ts.map +1 -1
- package/dist/lib/parsers/get-parquet-schema.js.map +1 -1
- package/dist/lib/parsers/parse-geoparquet.d.ts +6 -0
- package/dist/lib/parsers/parse-geoparquet.d.ts.map +1 -0
- package/dist/lib/parsers/parse-geoparquet.js +56 -0
- package/dist/lib/parsers/parse-geoparquet.js.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts +6 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.js.map +1 -1
- package/dist/lib/parsers/parse-parquet.d.ts +17 -0
- package/dist/lib/parsers/parse-parquet.d.ts.map +1 -0
- package/dist/lib/parsers/{parse-parquet-to-rows.js → parse-parquet.js} +2 -8
- package/dist/lib/parsers/parse-parquet.js.map +1 -0
- package/dist/lib/wasm/encode-parquet-wasm.d.ts.map +1 -1
- package/dist/lib/wasm/encode-parquet-wasm.js.map +1 -1
- package/dist/lib/wasm/load-wasm-browser.d.ts.map +1 -1
- package/dist/lib/wasm/load-wasm-browser.js.map +1 -1
- package/dist/lib/wasm/load-wasm-node.d.ts.map +1 -1
- package/dist/lib/wasm/load-wasm-node.js.map +1 -1
- package/dist/lib/wasm/load-wasm.d.ts.map +1 -1
- package/dist/lib/wasm/load-wasm.js.map +1 -1
- package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -1
- package/dist/lib/wasm/parse-parquet-wasm.js.map +1 -1
- package/dist/parquet-loader.d.ts +14 -4
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +50 -4
- package/dist/parquet-loader.js.map +1 -1
- package/dist/parquet-wasm-loader.d.ts +4 -2
- package/dist/parquet-wasm-loader.d.ts.map +1 -1
- package/dist/parquet-wasm-loader.js +6 -1
- package/dist/parquet-wasm-loader.js.map +1 -1
- package/dist/workers/parquet-worker.js +1 -1
- package/dist/workers/parquet-worker.js.map +1 -1
- package/package.json +15 -12
- package/src/index.ts +7 -60
- package/src/lib/parsers/get-parquet-schema.ts +3 -0
- package/src/lib/parsers/parse-geoparquet.ts +87 -0
- package/src/lib/parsers/parse-parquet-to-columns.ts +6 -0
- package/src/lib/parsers/{parse-parquet-to-rows.ts → parse-parquet.ts} +20 -21
- package/src/lib/wasm/encode-parquet-wasm.ts +3 -0
- package/src/lib/wasm/load-wasm-browser.ts +3 -0
- package/src/lib/wasm/load-wasm-node.ts +3 -0
- package/src/lib/wasm/load-wasm.ts +3 -0
- package/src/lib/wasm/parse-parquet-wasm.ts +3 -0
- package/src/parquet-loader.ts +89 -9
- package/src/parquet-wasm-loader.ts +10 -2
- package/src/workers/parquet-worker.ts +1 -1
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts +0 -6
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +0 -1
- package/dist/lib/parsers/parse-parquet-to-rows.js.map +0 -1
package/src/parquet-loader.ts
CHANGED
|
@@ -1,13 +1,23 @@
|
|
|
1
1
|
// loaders.gl, MIT license
|
|
2
2
|
// Copyright (c) vis.gl contributors
|
|
3
3
|
|
|
4
|
-
import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
4
|
+
import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
5
5
|
import type {
|
|
6
6
|
ObjectRowTable,
|
|
7
7
|
ObjectRowTableBatch,
|
|
8
|
+
GeoJSONTable,
|
|
9
|
+
GeoJSONTableBatch,
|
|
8
10
|
ColumnarTable,
|
|
9
11
|
ColumnarTableBatch
|
|
10
12
|
} from '@loaders.gl/schema';
|
|
13
|
+
import {BlobFile} from '@loaders.gl/loader-utils';
|
|
14
|
+
|
|
15
|
+
import {parseParquetFile, parseParquetFileInBatches} from './lib/parsers/parse-parquet';
|
|
16
|
+
import {parseGeoParquetFile, parseGeoParquetFileInBatches} from './lib/parsers/parse-geoparquet';
|
|
17
|
+
import {
|
|
18
|
+
parseParquetFileInColumns,
|
|
19
|
+
parseParquetFileInColumnarBatches
|
|
20
|
+
} from './lib/parsers/parse-parquet-to-columns';
|
|
11
21
|
|
|
12
22
|
export {Buffer} from './polyfills/buffer/install-buffer-polyfill';
|
|
13
23
|
|
|
@@ -32,8 +42,14 @@ export type ParquetLoaderOptions = LoaderOptions & {
|
|
|
32
42
|
};
|
|
33
43
|
};
|
|
34
44
|
|
|
35
|
-
/**
|
|
36
|
-
|
|
45
|
+
/**
|
|
46
|
+
* ParquetJS table loader
|
|
47
|
+
*/
|
|
48
|
+
export const ParquetWorkerLoader: Loader<
|
|
49
|
+
ObjectRowTable,
|
|
50
|
+
ObjectRowTableBatch,
|
|
51
|
+
ParquetLoaderOptions
|
|
52
|
+
> = {
|
|
37
53
|
name: 'Apache Parquet',
|
|
38
54
|
id: 'parquet',
|
|
39
55
|
module: 'parquet',
|
|
@@ -55,7 +71,63 @@ export const ParquetLoader: Loader<ObjectRowTable, ObjectRowTableBatch, ParquetL
|
|
|
55
71
|
}
|
|
56
72
|
};
|
|
57
73
|
|
|
58
|
-
|
|
74
|
+
/** ParquetJS table loader */
|
|
75
|
+
export const ParquetLoader: LoaderWithParser<
|
|
76
|
+
ObjectRowTable | GeoJSONTable,
|
|
77
|
+
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
78
|
+
ParquetLoaderOptions
|
|
79
|
+
> = {
|
|
80
|
+
...ParquetWorkerLoader,
|
|
81
|
+
parse: (arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) =>
|
|
82
|
+
parseParquetFile(new BlobFile(arrayBuffer), options),
|
|
83
|
+
|
|
84
|
+
parseFile: parseParquetFile,
|
|
85
|
+
parseFileInBatches: parseParquetFileInBatches
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
// Defeat tree shaking
|
|
89
|
+
// @ts-ignore
|
|
90
|
+
ParquetLoader.Buffer = Buffer;
|
|
91
|
+
|
|
92
|
+
export const GeoParquetWorkerLoader: Loader<GeoJSONTable, GeoJSONTableBatch, ParquetLoaderOptions> =
|
|
93
|
+
{
|
|
94
|
+
name: 'Apache Parquet',
|
|
95
|
+
id: 'parquet',
|
|
96
|
+
module: 'parquet',
|
|
97
|
+
version: VERSION,
|
|
98
|
+
worker: true,
|
|
99
|
+
category: 'table',
|
|
100
|
+
extensions: ['parquet'],
|
|
101
|
+
mimeTypes: ['application/octet-stream'],
|
|
102
|
+
binary: true,
|
|
103
|
+
tests: ['PAR1', 'PARE'],
|
|
104
|
+
options: {
|
|
105
|
+
parquet: {
|
|
106
|
+
shape: 'geojson-table',
|
|
107
|
+
columnList: [],
|
|
108
|
+
geoparquet: true,
|
|
109
|
+
url: undefined,
|
|
110
|
+
preserveBinary: false
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
/** ParquetJS table loader */
|
|
116
|
+
export const GeoParquetLoader: LoaderWithParser<
|
|
117
|
+
ObjectRowTable | GeoJSONTable,
|
|
118
|
+
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
119
|
+
ParquetLoaderOptions
|
|
120
|
+
> = {
|
|
121
|
+
...GeoParquetWorkerLoader,
|
|
122
|
+
parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
|
|
123
|
+
return parseGeoParquetFile(new BlobFile(arrayBuffer), options);
|
|
124
|
+
},
|
|
125
|
+
parseFile: parseGeoParquetFile,
|
|
126
|
+
parseFileInBatches: parseGeoParquetFileInBatches
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
/** @deprecated Test to see if we can improve perf of parquetjs loader */
|
|
130
|
+
export const ParquetColumnarWorkerLoader: Loader<
|
|
59
131
|
ColumnarTable,
|
|
60
132
|
ColumnarTableBatch,
|
|
61
133
|
ParquetLoaderOptions
|
|
@@ -73,8 +145,16 @@ export const ParquetColumnarLoader: Loader<
|
|
|
73
145
|
options: ParquetLoader.options
|
|
74
146
|
};
|
|
75
147
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
148
|
+
/** @deprecated Test to see if we can improve perf of parquetjs loader */
|
|
149
|
+
export const ParquetColumnarLoader: LoaderWithParser<
|
|
150
|
+
ColumnarTable,
|
|
151
|
+
ColumnarTableBatch,
|
|
152
|
+
ParquetLoaderOptions
|
|
153
|
+
> = {
|
|
154
|
+
...ParquetColumnarWorkerLoader,
|
|
155
|
+
parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
|
|
156
|
+
return parseParquetFileInColumns(new BlobFile(arrayBuffer), options);
|
|
157
|
+
},
|
|
158
|
+
parseFile: parseParquetFileInColumns,
|
|
159
|
+
parseFileInBatches: parseParquetFileInColumnarBatches
|
|
160
|
+
};
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
// loaders.gl, MIT license
|
|
2
2
|
// Copyright (c) vis.gl contributors
|
|
3
3
|
|
|
4
|
-
import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
4
|
+
import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
|
|
5
5
|
import type {ArrowTable} from '@loaders.gl/arrow';
|
|
6
6
|
|
|
7
|
+
import {parseParquetWasm} from './lib/wasm/parse-parquet-wasm';
|
|
8
|
+
|
|
7
9
|
// __VERSION__ is injected by babel-plugin-version-inline
|
|
8
10
|
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
|
|
9
11
|
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
|
|
@@ -17,7 +19,7 @@ export type ParquetWasmLoaderOptions = LoaderOptions & {
|
|
|
17
19
|
};
|
|
18
20
|
|
|
19
21
|
/** Parquet WASM table loader */
|
|
20
|
-
export const
|
|
22
|
+
export const ParquetWasmWorkerLoader: Loader<ArrowTable, never, ParquetWasmLoaderOptions> = {
|
|
21
23
|
name: 'Apache Parquet',
|
|
22
24
|
id: 'parquet-wasm',
|
|
23
25
|
module: 'parquet',
|
|
@@ -35,3 +37,9 @@ export const ParquetWasmLoader: Loader<ArrowTable, never, ParquetWasmLoaderOptio
|
|
|
35
37
|
}
|
|
36
38
|
}
|
|
37
39
|
};
|
|
40
|
+
|
|
41
|
+
/** Parquet WASM table loader */
|
|
42
|
+
export const ParquetWasmLoader: LoaderWithParser<ArrowTable, never, ParquetWasmLoaderOptions> = {
|
|
43
|
+
...ParquetWasmWorkerLoader,
|
|
44
|
+
parse: parseParquetWasm
|
|
45
|
+
};
|
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
import type { ReadableFile } from '@loaders.gl/loader-utils';
|
|
2
|
-
import type { GeoJSONTable, GeoJSONTableBatch, ObjectRowTable, ObjectRowTableBatch } from '@loaders.gl/schema';
|
|
3
|
-
import type { ParquetLoaderOptions } from '../../parquet-loader';
|
|
4
|
-
export declare function parseParquetFile(file: ReadableFile, options?: ParquetLoaderOptions): Promise<ObjectRowTable | GeoJSONTable>;
|
|
5
|
-
export declare function parseParquetFileInBatches(file: ReadableFile, options?: ParquetLoaderOptions): AsyncIterable<ObjectRowTableBatch | GeoJSONTableBatch>;
|
|
6
|
-
//# sourceMappingURL=parse-parquet-to-rows.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"parse-parquet-to-rows.d.ts","sourceRoot":"","sources":["../../../src/lib/parsers/parse-parquet-to-rows.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,0BAA0B,CAAC;AAC3D,OAAO,KAAK,EACV,YAAY,EACZ,iBAAiB,EACjB,cAAc,EACd,mBAAmB,EACpB,MAAM,oBAAoB,CAAC;AAI5B,OAAO,KAAK,EAAC,oBAAoB,EAAC,MAAM,sBAAsB,CAAC;AAM/D,wBAAsB,gBAAgB,CACpC,IAAI,EAAE,YAAY,EAClB,OAAO,CAAC,EAAE,oBAAoB,GAC7B,OAAO,CAAC,cAAc,GAAG,YAAY,CAAC,CA0BxC;AAED,wBAAuB,yBAAyB,CAC9C,IAAI,EAAE,YAAY,EAClB,OAAO,CAAC,EAAE,oBAAoB,GAC7B,aAAa,CAAC,mBAAmB,GAAG,iBAAiB,CAAC,CAuBxD"}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"parse-parquet-to-rows.js","names":["convertWKBTableToGeoJSON","WKTLoader","WKBLoader","ParquetReader","getSchemaFromParquetReader","installBufferPolyfill","parseParquetFile","file","options","_options$parquet","_options$parquet2","reader","preserveBinary","parquet","schema","rows","rowBatches","rowBatchIterator","rowBatch","row","push","objectRowTable","shape","data","convertTable","parseParquetFileInBatches","_options$parquet3","_options$parquet4","table","batchType","length","error","Error"],"sources":["../../../src/lib/parsers/parse-parquet-to-rows.ts"],"sourcesContent":["// import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';\n// import {ColumnarTableBatch} from '@loaders.gl/schema';\nimport type {ReadableFile} from '@loaders.gl/loader-utils';\nimport type {\n GeoJSONTable,\n GeoJSONTableBatch,\n ObjectRowTable,\n ObjectRowTableBatch\n} from '@loaders.gl/schema';\nimport {convertWKBTableToGeoJSON} from '@loaders.gl/gis';\nimport {WKTLoader, WKBLoader} from '@loaders.gl/wkt';\n\nimport type {ParquetLoaderOptions} from '../../parquet-loader';\nimport type {ParquetRow} from '../../parquetjs/schema/declare';\nimport {ParquetReader} from '../../parquetjs/parser/parquet-reader';\nimport {getSchemaFromParquetReader} from './get-parquet-schema';\nimport {installBufferPolyfill} from '../../polyfills/buffer';\n\nexport async function parseParquetFile(\n file: ReadableFile,\n options?: ParquetLoaderOptions\n): Promise<ObjectRowTable | GeoJSONTable> {\n installBufferPolyfill();\n\n const reader = new ParquetReader(file, {\n preserveBinary: options?.parquet?.preserveBinary\n });\n\n const schema = await getSchemaFromParquetReader(reader);\n\n const rows: ParquetRow[] = [];\n\n const rowBatches = reader.rowBatchIterator(options?.parquet);\n for await (const rowBatch of rowBatches) {\n // we have only one input batch so return\n for (const row of rowBatch) {\n rows.push(row);\n }\n }\n const objectRowTable: ObjectRowTable = {\n shape: 'object-row-table',\n schema,\n data: rows\n };\n\n const shape = options?.parquet?.shape;\n return convertTable(objectRowTable, shape);\n}\n\nexport async function* parseParquetFileInBatches(\n file: ReadableFile,\n options?: ParquetLoaderOptions\n): AsyncIterable<ObjectRowTableBatch | GeoJSONTableBatch> {\n const reader = new ParquetReader(file, {\n preserveBinary: options?.parquet?.preserveBinary\n });\n\n const schema = await getSchemaFromParquetReader(reader);\n const rowBatches = reader.rowBatchIterator(options?.parquet);\n for await (const rows of rowBatches) {\n const objectRowTable: ObjectRowTable = {\n shape: 'object-row-table',\n schema,\n data: rows\n };\n const shape = options?.parquet?.shape;\n const table = convertTable(objectRowTable, shape);\n\n yield {\n batchType: 'data',\n schema,\n ...table,\n length: rows.length\n };\n }\n}\n\nfunction convertTable(\n objectRowTable: ObjectRowTable,\n shape?: 'object-row-table' | 'geojson-table'\n): ObjectRowTable | GeoJSONTable {\n switch (shape) {\n case 'object-row-table':\n return objectRowTable;\n\n case 'geojson-table':\n try {\n return convertWKBTableToGeoJSON(objectRowTable, objectRowTable.schema!, [\n WKTLoader,\n WKBLoader\n ]);\n } catch (error) {\n return objectRowTable;\n }\n\n default:\n throw new Error(shape);\n }\n}\n"],"mappings":"AASA,SAAQA,wBAAwB,QAAO,iBAAiB;AACxD,SAAQC,SAAS,EAAEC,SAAS,QAAO,iBAAiB;AAAC,SAI7CC,aAAa;AAAA,SACbC,0BAA0B;AAAA,SAC1BC,qBAAqB;AAE7B,OAAO,eAAeC,gBAAgBA,CACpCC,IAAkB,EAClBC,OAA8B,EACU;EAAA,IAAAC,gBAAA,EAAAC,iBAAA;EACxCL,qBAAqB,CAAC,CAAC;EAEvB,MAAMM,MAAM,GAAG,IAAIR,aAAa,CAACI,IAAI,EAAE;IACrCK,cAAc,EAAEJ,OAAO,aAAPA,OAAO,wBAAAC,gBAAA,GAAPD,OAAO,CAAEK,OAAO,cAAAJ,gBAAA,uBAAhBA,gBAAA,CAAkBG;EACpC,CAAC,CAAC;EAEF,MAAME,MAAM,GAAG,MAAMV,0BAA0B,CAACO,MAAM,CAAC;EAEvD,MAAMI,IAAkB,GAAG,EAAE;EAE7B,MAAMC,UAAU,GAAGL,MAAM,CAACM,gBAAgB,CAACT,OAAO,aAAPA,OAAO,uBAAPA,OAAO,CAAEK,OAAO,CAAC;EAC5D,WAAW,MAAMK,QAAQ,IAAIF,UAAU,EAAE;IAEvC,KAAK,MAAMG,GAAG,IAAID,QAAQ,EAAE;MAC1BH,IAAI,CAACK,IAAI,CAACD,GAAG,CAAC;IAChB;EACF;EACA,MAAME,cAA8B,GAAG;IACrCC,KAAK,EAAE,kBAAkB;IACzBR,MAAM;IACNS,IAAI,EAAER;EACR,CAAC;EAED,MAAMO,KAAK,GAAGd,OAAO,aAAPA,OAAO,wBAAAE,iBAAA,GAAPF,OAAO,CAAEK,OAAO,cAAAH,iBAAA,uBAAhBA,iBAAA,CAAkBY,KAAK;EACrC,OAAOE,YAAY,CAACH,cAAc,EAAEC,KAAK,CAAC;AAC5C;AAEA,OAAO,gBAAgBG,yBAAyBA,CAC9ClB,IAAkB,EAClBC,OAA8B,EAC0B;EAAA,IAAAkB,iBAAA;EACxD,MAAMf,MAAM,GAAG,IAAIR,aAAa,CAACI,IAAI,EAAE;IACrCK,cAAc,EAAEJ,OAAO,aAAPA,OAAO,wBAAAkB,iBAAA,GAAPlB,OAAO,CAAEK,OAAO,cAAAa,iBAAA,uBAAhBA,iBAAA,CAAkBd;EACpC,CAAC,CAAC;EAEF,MAAME,MAAM,GAAG,MAAMV,0BAA0B,CAACO,MAAM,CAAC;EACvD,MAAMK,UAAU,GAAGL,MAAM,CAACM,gBAAgB,CAACT,OAAO,aAAPA,OAAO,uBAAPA,OAAO,CAAEK,OAAO,CAAC;EAC5D,WAAW,MAAME,IAAI,IAAIC,UAAU,EAAE;IAAA,IAAAW,iBAAA;IACnC,MAAMN,cAA8B,GAAG;MACrCC,KAAK,EAAE,kBAAkB;MACzBR,MAAM;MACNS,IAAI,EAAER;IACR,CAAC;IACD,MAAMO,KAAK,GAAGd,OAAO,aAAPA,OAAO,wBAAAmB,iBAAA,GAAPnB,OAAO,CAAEK,OAAO,cAAAc,iBAAA,uBAAhBA,iBAAA,CAAkBL,KAAK;IACrC,MAAMM,KAAK,GAAGJ,YAAY,CAACH,cAAc,EAAEC,KAAK,CAAC;IAEjD,MAAM;MACJO,SAAS,EAAE,MAAM;MACjBf,MAAM;MACN,GAAGc,KAAK;MACRE,MAAM,EAAEf,IAAI,CAACe;IACf,CAAC;EACH;AACF;AAEA,SAASN,YAAYA,CACnBH,cAA8B,EAC9BC,KAA4C,EACb;EAC/B,QAAQA,KAAK;IACX,KAAK,kBAAkB;MACrB,OAAOD,cAAc;IAEvB,KAAK,eAAe;MAClB,IAAI;QACF,OAAOrB,wBAAwB,CAACqB,cAAc,EAAEA,cAAc,CAACP,MAAM,EAAG,CACtEb,SAAS,EACTC,SAAS,CACV,CAAC;MACJ,CAAC,CAAC,OAAO6B,KAAK,EAAE;QACd,OAAOV,cAAc;MACvB;IAEF;MACE,MAAM,IAAIW,KAAK,CAACV,KAAK,CAAC;EAC1B;AACF"}
|