@loaders.gl/parquet 4.2.0-alpha.6 → 4.2.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +26 -8
- package/dist/index.cjs.map +3 -3
- package/dist/lib/constants.js +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.js +4 -0
- package/dist/lib/parsers/parse-parquet.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet.js +4 -0
- package/dist/parquet-loader.d.ts +151 -7
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +9 -1
- package/dist/parquet-wasm-loader.d.ts +22 -3
- package/dist/parquet-wasm-loader.d.ts.map +1 -1
- package/dist/parquet-wasm-loader.js +2 -0
- package/dist/parquet-wasm-writer.d.ts +1 -3
- package/dist/parquet-wasm-writer.d.ts.map +1 -1
- package/dist/parquet-writer.d.ts +15 -3
- package/dist/parquet-writer.d.ts.map +1 -1
- package/dist/parquet-writer.js +1 -1
- package/dist/parquetjs/compression.d.ts +1 -1
- package/dist/parquetjs/compression.d.ts.map +1 -1
- package/dist/parquetjs/compression.js +3 -1
- package/dist/parquetjs/schema/types.js +1 -0
- package/package.json +9 -9
- package/src/lib/parsers/parse-parquet-to-columns.ts +6 -0
- package/src/lib/parsers/parse-parquet.ts +5 -0
- package/src/parquet-loader.ts +52 -51
- package/src/parquet-wasm-loader.ts +7 -4
- package/src/parquet-wasm-writer.ts +2 -2
- package/src/parquet-writer.ts +2 -2
- package/src/parquetjs/compression.ts +6 -2
- package/src/parquetjs/schema/types.ts +2 -1
package/src/parquet-loader.ts
CHANGED
|
@@ -46,11 +46,10 @@ export type ParquetLoaderOptions = LoaderOptions & {
|
|
|
46
46
|
/**
|
|
47
47
|
* ParquetJS table loader
|
|
48
48
|
*/
|
|
49
|
-
export const ParquetWorkerLoader
|
|
50
|
-
ObjectRowTable,
|
|
51
|
-
ObjectRowTableBatch,
|
|
52
|
-
|
|
53
|
-
> = {
|
|
49
|
+
export const ParquetWorkerLoader = {
|
|
50
|
+
dataType: null as unknown as ObjectRowTable,
|
|
51
|
+
batchType: null as unknown as ObjectRowTableBatch,
|
|
52
|
+
|
|
54
53
|
name: 'Apache Parquet',
|
|
55
54
|
id: 'parquet',
|
|
56
55
|
module: 'parquet',
|
|
@@ -70,69 +69,75 @@ export const ParquetWorkerLoader: Loader<
|
|
|
70
69
|
preserveBinary: false
|
|
71
70
|
}
|
|
72
71
|
}
|
|
73
|
-
}
|
|
72
|
+
} as const satisfies Loader<ObjectRowTable, ObjectRowTableBatch, ParquetLoaderOptions>;
|
|
74
73
|
|
|
75
74
|
/** ParquetJS table loader */
|
|
76
|
-
export const ParquetLoader
|
|
77
|
-
ObjectRowTable | GeoJSONTable,
|
|
78
|
-
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
79
|
-
ParquetLoaderOptions
|
|
80
|
-
> = {
|
|
75
|
+
export const ParquetLoader = {
|
|
81
76
|
...ParquetWorkerLoader,
|
|
77
|
+
|
|
78
|
+
dataType: null as unknown as ObjectRowTable | GeoJSONTable,
|
|
79
|
+
batchType: null as unknown as ObjectRowTableBatch | GeoJSONTableBatch,
|
|
80
|
+
|
|
82
81
|
parse: (arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) =>
|
|
83
82
|
parseParquetFile(new BlobFile(arrayBuffer), options),
|
|
84
83
|
|
|
85
84
|
parseFile: parseParquetFile,
|
|
86
85
|
parseFileInBatches: parseParquetFileInBatches
|
|
87
|
-
}
|
|
86
|
+
} as const satisfies LoaderWithParser<
|
|
87
|
+
ObjectRowTable | GeoJSONTable,
|
|
88
|
+
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
89
|
+
ParquetLoaderOptions
|
|
90
|
+
>;
|
|
88
91
|
|
|
89
92
|
// Defeat tree shaking
|
|
90
93
|
// @ts-ignore
|
|
91
94
|
ParquetLoader.Buffer = Buffer;
|
|
92
95
|
|
|
93
|
-
export const GeoParquetWorkerLoader
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
96
|
+
export const GeoParquetWorkerLoader = {
|
|
97
|
+
dataType: null as unknown as GeoJSONTable,
|
|
98
|
+
batchType: null as unknown as GeoJSONTableBatch,
|
|
99
|
+
|
|
100
|
+
name: 'Apache Parquet',
|
|
101
|
+
id: 'parquet',
|
|
102
|
+
module: 'parquet',
|
|
103
|
+
version: VERSION,
|
|
104
|
+
worker: true,
|
|
105
|
+
category: 'table',
|
|
106
|
+
extensions: ['parquet'],
|
|
107
|
+
mimeTypes: ['application/octet-stream'],
|
|
108
|
+
binary: true,
|
|
109
|
+
tests: ['PAR1', 'PARE'],
|
|
110
|
+
options: {
|
|
111
|
+
parquet: {
|
|
112
|
+
shape: 'geojson-table',
|
|
113
|
+
columnList: [],
|
|
114
|
+
geoparquet: true,
|
|
115
|
+
url: undefined,
|
|
116
|
+
preserveBinary: false
|
|
113
117
|
}
|
|
114
|
-
}
|
|
118
|
+
}
|
|
119
|
+
} as const satisfies Loader<GeoJSONTable, GeoJSONTableBatch, ParquetLoaderOptions>;
|
|
115
120
|
|
|
116
121
|
/** ParquetJS table loader */
|
|
117
|
-
export const GeoParquetLoader
|
|
118
|
-
ObjectRowTable | GeoJSONTable,
|
|
119
|
-
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
120
|
-
ParquetLoaderOptions
|
|
121
|
-
> = {
|
|
122
|
+
export const GeoParquetLoader = {
|
|
122
123
|
...GeoParquetWorkerLoader,
|
|
124
|
+
|
|
123
125
|
parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
|
|
124
126
|
return parseGeoParquetFile(new BlobFile(arrayBuffer), options);
|
|
125
127
|
},
|
|
126
128
|
parseFile: parseGeoParquetFile,
|
|
127
129
|
parseFileInBatches: parseGeoParquetFileInBatches
|
|
128
|
-
}
|
|
130
|
+
} as const satisfies LoaderWithParser<
|
|
131
|
+
ObjectRowTable | GeoJSONTable,
|
|
132
|
+
ObjectRowTableBatch | GeoJSONTableBatch,
|
|
133
|
+
ParquetLoaderOptions
|
|
134
|
+
>;
|
|
129
135
|
|
|
130
136
|
/** @deprecated Test to see if we can improve perf of parquetjs loader */
|
|
131
|
-
export const ParquetColumnarWorkerLoader
|
|
132
|
-
ColumnarTable,
|
|
133
|
-
ColumnarTableBatch,
|
|
134
|
-
|
|
135
|
-
> = {
|
|
137
|
+
export const ParquetColumnarWorkerLoader = {
|
|
138
|
+
dataType: null as any as ColumnarTable,
|
|
139
|
+
batchType: null as any as ColumnarTableBatch,
|
|
140
|
+
|
|
136
141
|
name: 'Apache Parquet',
|
|
137
142
|
id: 'parquet',
|
|
138
143
|
module: 'parquet',
|
|
@@ -144,18 +149,14 @@ export const ParquetColumnarWorkerLoader: Loader<
|
|
|
144
149
|
binary: true,
|
|
145
150
|
tests: ['PAR1', 'PARE'],
|
|
146
151
|
options: ParquetLoader.options
|
|
147
|
-
}
|
|
152
|
+
} as const satisfies Loader<ColumnarTable, ColumnarTableBatch, ParquetLoaderOptions>;
|
|
148
153
|
|
|
149
154
|
/** @deprecated Test to see if we can improve perf of parquetjs loader */
|
|
150
|
-
export const ParquetColumnarLoader
|
|
151
|
-
ColumnarTable,
|
|
152
|
-
ColumnarTableBatch,
|
|
153
|
-
ParquetLoaderOptions
|
|
154
|
-
> = {
|
|
155
|
+
export const ParquetColumnarLoader = {
|
|
155
156
|
...ParquetColumnarWorkerLoader,
|
|
156
157
|
parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
|
|
157
158
|
return parseParquetFileInColumns(new BlobFile(arrayBuffer), options);
|
|
158
159
|
},
|
|
159
160
|
parseFile: parseParquetFileInColumns,
|
|
160
161
|
parseFileInBatches: parseParquetFileInColumnarBatches
|
|
161
|
-
}
|
|
162
|
+
} as const satisfies LoaderWithParser<ColumnarTable, ColumnarTableBatch, ParquetLoaderOptions>;
|
|
@@ -17,7 +17,10 @@ export type ParquetWasmLoaderOptions = LoaderOptions & {
|
|
|
17
17
|
};
|
|
18
18
|
|
|
19
19
|
/** Parquet WASM table loader */
|
|
20
|
-
export const ParquetWasmWorkerLoader
|
|
20
|
+
export const ParquetWasmWorkerLoader = {
|
|
21
|
+
dataType: null as unknown as ArrowTable,
|
|
22
|
+
batchType: null as never,
|
|
23
|
+
|
|
21
24
|
name: 'Apache Parquet',
|
|
22
25
|
id: 'parquet-wasm',
|
|
23
26
|
module: 'parquet',
|
|
@@ -34,13 +37,13 @@ export const ParquetWasmWorkerLoader: Loader<ArrowTable, never, ParquetWasmLoade
|
|
|
34
37
|
wasmUrl: PARQUET_WASM_URL
|
|
35
38
|
}
|
|
36
39
|
}
|
|
37
|
-
}
|
|
40
|
+
} as const satisfies Loader<ArrowTable, never, ParquetWasmLoaderOptions>;
|
|
38
41
|
|
|
39
42
|
/** Parquet WASM table loader */
|
|
40
|
-
export const ParquetWasmLoader
|
|
43
|
+
export const ParquetWasmLoader = {
|
|
41
44
|
...ParquetWasmWorkerLoader,
|
|
42
45
|
parse(arrayBuffer: ArrayBuffer, options?: ParquetWasmLoaderOptions) {
|
|
43
46
|
options = {parquet: {...ParquetWasmLoader.options.parquet, ...options?.parquet}, ...options};
|
|
44
47
|
return parseParquetWasm(arrayBuffer, options);
|
|
45
48
|
}
|
|
46
|
-
}
|
|
49
|
+
} as const satisfies LoaderWithParser<ArrowTable, never, ParquetWasmLoaderOptions>;
|
|
@@ -16,7 +16,7 @@ export type ParquetWriterOptions = WriterOptions & {
|
|
|
16
16
|
};
|
|
17
17
|
|
|
18
18
|
/** Parquet WASM writer */
|
|
19
|
-
export const ParquetWasmWriter
|
|
19
|
+
export const ParquetWasmWriter = {
|
|
20
20
|
name: 'Apache Parquet',
|
|
21
21
|
id: 'parquet-wasm',
|
|
22
22
|
module: 'parquet',
|
|
@@ -33,4 +33,4 @@ export const ParquetWasmWriter: WriterWithEncoder<ArrowTable, never, ParquetWrit
|
|
|
33
33
|
options = {parquet: {...ParquetWasmWriter.options.parquet, ...options?.parquet}, ...options};
|
|
34
34
|
return encode(arrowTable, options);
|
|
35
35
|
}
|
|
36
|
-
}
|
|
36
|
+
} as const satisfies WriterWithEncoder<ArrowTable, never, ParquetWriterOptions>;
|
package/src/parquet-writer.ts
CHANGED
|
@@ -11,7 +11,7 @@ const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
|
|
|
11
11
|
|
|
12
12
|
export type ParquetWriterOptions = {};
|
|
13
13
|
|
|
14
|
-
export const ParquetWriter
|
|
14
|
+
export const ParquetWriter = {
|
|
15
15
|
name: 'Apache Parquet',
|
|
16
16
|
id: 'parquet',
|
|
17
17
|
module: 'parquet',
|
|
@@ -22,7 +22,7 @@ export const ParquetWriter: WriterWithEncoder<Table, TableBatch, ParquetWriterOp
|
|
|
22
22
|
options: {},
|
|
23
23
|
encode: async (data, options) => encodeSync(data, options),
|
|
24
24
|
encodeSync
|
|
25
|
-
}
|
|
25
|
+
} as const satisfies WriterWithEncoder<Table, TableBatch, ParquetWriterOptions>;
|
|
26
26
|
|
|
27
27
|
function encodeSync(data, options?: ParquetWriterOptions) {
|
|
28
28
|
return new ArrayBuffer(0);
|
|
@@ -12,6 +12,7 @@ import {
|
|
|
12
12
|
LZ4Compression,
|
|
13
13
|
ZstdCompression
|
|
14
14
|
} from '@loaders.gl/compression';
|
|
15
|
+
import {registerJSModules} from '@loaders.gl/loader-utils';
|
|
15
16
|
|
|
16
17
|
import {ParquetCompression} from './schema/declare';
|
|
17
18
|
|
|
@@ -72,9 +73,12 @@ export const PARQUET_COMPRESSION_METHODS: Record<ParquetCompression, Compression
|
|
|
72
73
|
* Register compressions that have big external libraries
|
|
73
74
|
* @param options.modules External library dependencies
|
|
74
75
|
*/
|
|
75
|
-
export async function preloadCompressions(options?: {modules
|
|
76
|
+
export async function preloadCompressions(options?: {modules?: {[key: string]: any}}) {
|
|
77
|
+
registerJSModules(options?.modules);
|
|
76
78
|
const compressions = Object.values(PARQUET_COMPRESSION_METHODS);
|
|
77
|
-
return await Promise.all(
|
|
79
|
+
return await Promise.all(
|
|
80
|
+
compressions.map((compression) => compression.preload(options?.modules))
|
|
81
|
+
);
|
|
78
82
|
}
|
|
79
83
|
|
|
80
84
|
/**
|
|
@@ -340,7 +340,8 @@ function fromPrimitive_JSON(value: any): unknown {
|
|
|
340
340
|
}
|
|
341
341
|
|
|
342
342
|
function toPrimitive_BSON(value: any): Buffer {
|
|
343
|
-
|
|
343
|
+
// @ts-ignore
|
|
344
|
+
const arrayBuffer: ArrayBuffer = BSONWriter.encodeSync?.(value);
|
|
344
345
|
return Buffer.from(arrayBuffer);
|
|
345
346
|
}
|
|
346
347
|
|