@loaders.gl/parquet 4.4.0-alpha.2 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +17 -10
- package/dist/index.cjs.map +3 -3
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.js.map +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.js +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.js.map +1 -0
- package/dist/lib/arrow/convert-schema-from-parquet.js +1 -0
- package/dist/lib/arrow/convert-schema-from-parquet.js.map +1 -0
- package/dist/lib/arrow/convert-schema-to-parquet.js +1 -0
- package/dist/lib/arrow/convert-schema-to-parquet.js.map +1 -0
- package/dist/lib/constants.js +2 -1
- package/dist/lib/constants.js.map +1 -0
- package/dist/lib/encoders/encode-arrow-to-parquet.d.ts.map +1 -1
- package/dist/lib/encoders/encode-arrow-to-parquet.js +3 -1
- package/dist/lib/encoders/encode-arrow-to-parquet.js.map +1 -0
- package/dist/lib/parsers/get-parquet-schema.js +1 -0
- package/dist/lib/parsers/get-parquet-schema.js.map +1 -0
- package/dist/lib/parsers/parse-geoparquet-to-geojson.js +1 -0
- package/dist/lib/parsers/parse-geoparquet-to-geojson.js.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-arrow.js +1 -0
- package/dist/lib/parsers/parse-parquet-to-arrow.js.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-columns.js +1 -0
- package/dist/lib/parsers/parse-parquet-to-columns.js.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-json.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-json.js +3 -2
- package/dist/lib/parsers/parse-parquet-to-json.js.map +1 -0
- package/dist/lib/utils/load-wasm.js +1 -0
- package/dist/lib/utils/load-wasm.js.map +1 -0
- package/dist/lib/utils/make-stream-iterator.js +1 -0
- package/dist/lib/utils/make-stream-iterator.js.map +1 -0
- package/dist/parquet-arrow-loader.js +1 -0
- package/dist/parquet-arrow-loader.js.map +1 -0
- package/dist/parquet-arrow-writer.js +1 -0
- package/dist/parquet-arrow-writer.js.map +1 -0
- package/dist/parquet-format.js +1 -0
- package/dist/parquet-format.js.map +1 -0
- package/dist/parquet-json-loader.js +2 -1
- package/dist/parquet-json-loader.js.map +1 -0
- package/dist/parquet-json-writer.js +2 -1
- package/dist/parquet-json-writer.js.map +1 -0
- package/dist/parquetjs/codecs/declare.js +1 -0
- package/dist/parquetjs/codecs/declare.js.map +1 -0
- package/dist/parquetjs/codecs/dictionary.js +1 -0
- package/dist/parquetjs/codecs/dictionary.js.map +1 -0
- package/dist/parquetjs/codecs/index.js +1 -0
- package/dist/parquetjs/codecs/index.js.map +1 -0
- package/dist/parquetjs/codecs/plain.js +3 -2
- package/dist/parquetjs/codecs/plain.js.map +1 -0
- package/dist/parquetjs/codecs/rle.d.ts.map +1 -1
- package/dist/parquetjs/codecs/rle.js +13 -6
- package/dist/parquetjs/codecs/rle.js.map +1 -0
- package/dist/parquetjs/compression.js +1 -0
- package/dist/parquetjs/compression.js.map +1 -0
- package/dist/parquetjs/encoder/parquet-encoder.js +1 -0
- package/dist/parquetjs/encoder/parquet-encoder.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +1 -0
- package/dist/parquetjs/parquet-thrift/BoundaryOrder.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/BsonType.js +1 -0
- package/dist/parquetjs/parquet-thrift/BsonType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js +1 -0
- package/dist/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
- package/dist/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ConvertedType.js +1 -0
- package/dist/parquetjs/parquet-thrift/ConvertedType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js +1 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +1 -0
- package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DateType.js +1 -0
- package/dist/parquetjs/parquet-thrift/DateType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DecimalType.js +1 -0
- package/dist/parquetjs/parquet-thrift/DecimalType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +1 -0
- package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/Encoding.js +1 -0
- package/dist/parquetjs/parquet-thrift/Encoding.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/EnumType.js +1 -0
- package/dist/parquetjs/parquet-thrift/EnumType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -0
- package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/FileMetaData.js +1 -0
- package/dist/parquetjs/parquet-thrift/FileMetaData.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +1 -0
- package/dist/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/IntType.js +1 -0
- package/dist/parquetjs/parquet-thrift/IntType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/JsonType.js +1 -0
- package/dist/parquetjs/parquet-thrift/JsonType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/KeyValue.js +1 -0
- package/dist/parquetjs/parquet-thrift/KeyValue.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/ListType.js +1 -0
- package/dist/parquetjs/parquet-thrift/ListType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/LogicalType.js +1 -0
- package/dist/parquetjs/parquet-thrift/LogicalType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/MapType.js +1 -0
- package/dist/parquetjs/parquet-thrift/MapType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/MicroSeconds.js +1 -0
- package/dist/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/MilliSeconds.js +1 -0
- package/dist/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/NullType.js +1 -0
- package/dist/parquetjs/parquet-thrift/NullType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js +1 -0
- package/dist/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +1 -0
- package/dist/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/PageHeader.js +1 -0
- package/dist/parquetjs/parquet-thrift/PageHeader.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/PageLocation.js +1 -0
- package/dist/parquetjs/parquet-thrift/PageLocation.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/PageType.js +1 -0
- package/dist/parquetjs/parquet-thrift/PageType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/RowGroup.js +1 -0
- package/dist/parquetjs/parquet-thrift/RowGroup.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/SchemaElement.js +1 -0
- package/dist/parquetjs/parquet-thrift/SchemaElement.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/SortingColumn.js +1 -0
- package/dist/parquetjs/parquet-thrift/SortingColumn.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/Statistics.js +1 -0
- package/dist/parquetjs/parquet-thrift/Statistics.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/StringType.js +1 -0
- package/dist/parquetjs/parquet-thrift/StringType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/TimeType.js +1 -0
- package/dist/parquetjs/parquet-thrift/TimeType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/TimeUnit.js +1 -0
- package/dist/parquetjs/parquet-thrift/TimeUnit.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/TimestampType.js +1 -0
- package/dist/parquetjs/parquet-thrift/TimestampType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/Type.js +1 -0
- package/dist/parquetjs/parquet-thrift/Type.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +1 -0
- package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/UUIDType.js +1 -0
- package/dist/parquetjs/parquet-thrift/UUIDType.js.map +1 -0
- package/dist/parquetjs/parquet-thrift/index.js +1 -0
- package/dist/parquetjs/parquet-thrift/index.js.map +1 -0
- package/dist/parquetjs/parser/decoders.js +2 -1
- package/dist/parquetjs/parser/decoders.js.map +1 -0
- package/dist/parquetjs/parser/parquet-reader.js +1 -0
- package/dist/parquetjs/parser/parquet-reader.js.map +1 -0
- package/dist/parquetjs/schema/declare.js +1 -0
- package/dist/parquetjs/schema/declare.js.map +1 -0
- package/dist/parquetjs/schema/schema.js +1 -0
- package/dist/parquetjs/schema/schema.js.map +1 -0
- package/dist/parquetjs/schema/shred.js +1 -0
- package/dist/parquetjs/schema/shred.js.map +1 -0
- package/dist/parquetjs/schema/types.js +1 -0
- package/dist/parquetjs/schema/types.js.map +1 -0
- package/dist/parquetjs/utils/file-utils.js +1 -0
- package/dist/parquetjs/utils/file-utils.js.map +1 -0
- package/dist/parquetjs/utils/read-utils.js +1 -0
- package/dist/parquetjs/utils/read-utils.js.map +1 -0
- package/dist/polyfills/buffer/buffer-polyfill.browser.js +1 -0
- package/dist/polyfills/buffer/buffer-polyfill.browser.js.map +1 -0
- package/dist/polyfills/buffer/buffer-polyfill.node.js +1 -0
- package/dist/polyfills/buffer/buffer-polyfill.node.js.map +1 -0
- package/dist/polyfills/buffer/buffer.js +1 -0
- package/dist/polyfills/buffer/buffer.js.map +1 -0
- package/dist/polyfills/buffer/index.d.ts +1 -1
- package/dist/polyfills/buffer/index.d.ts.map +1 -1
- package/dist/polyfills/buffer/index.js +2 -1
- package/dist/polyfills/buffer/index.js.map +1 -0
- package/dist/polyfills/buffer/install-buffer-polyfill.d.ts +1 -28
- package/dist/polyfills/buffer/install-buffer-polyfill.d.ts.map +1 -1
- package/dist/polyfills/buffer/install-buffer-polyfill.js +2 -1
- package/dist/polyfills/buffer/install-buffer-polyfill.js.map +1 -0
- package/dist/polyfills/util.js +1 -0
- package/dist/polyfills/util.js.map +1 -0
- package/dist/workers/parquet-json-worker.js +1 -0
- package/dist/workers/parquet-json-worker.js.map +1 -0
- package/package.json +16 -13
- package/src/lib/encoders/encode-arrow-to-parquet.ts +4 -2
- package/src/lib/parsers/parse-parquet-to-json.ts +4 -2
- package/src/parquetjs/codecs/plain.ts +4 -4
- package/src/parquetjs/codecs/rle.ts +17 -11
- package/src/parquetjs/encoder/parquet-encoder.ts +10 -10
- package/src/parquetjs/parser/decoders.ts +2 -2
- package/src/parquetjs/utils/read-utils.ts +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@loaders.gl/parquet",
|
|
3
|
-
"version": "4.4.0
|
|
3
|
+
"version": "4.4.0",
|
|
4
4
|
"description": "Framework-independent loader for Apache Parquet files",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|
|
@@ -30,6 +30,9 @@
|
|
|
30
30
|
"types": "./dist/index.d.ts",
|
|
31
31
|
"import": "./dist/index.js",
|
|
32
32
|
"require": "./dist/index.cjs"
|
|
33
|
+
},
|
|
34
|
+
"./parquet-worker.js": {
|
|
35
|
+
"import": "./dist/parquet-worker.js"
|
|
33
36
|
}
|
|
34
37
|
},
|
|
35
38
|
"sideEffects": false,
|
|
@@ -60,20 +63,21 @@
|
|
|
60
63
|
"base64-js and ieee754 are used by buffer polyfill"
|
|
61
64
|
],
|
|
62
65
|
"dependencies": {
|
|
63
|
-
"@loaders.gl/arrow": "4.4.0
|
|
64
|
-
"@loaders.gl/bson": "4.4.0
|
|
65
|
-
"@loaders.gl/compression": "4.4.0
|
|
66
|
-
"@loaders.gl/geoarrow": "4.4.0
|
|
67
|
-
"@loaders.gl/gis": "4.4.0
|
|
68
|
-
"@loaders.gl/loader-utils": "4.4.0
|
|
69
|
-
"@loaders.gl/schema": "4.4.0
|
|
70
|
-
"@loaders.gl/wkt": "4.4.0
|
|
71
|
-
"@probe.gl/log": "^4.
|
|
66
|
+
"@loaders.gl/arrow": "4.4.0",
|
|
67
|
+
"@loaders.gl/bson": "4.4.0",
|
|
68
|
+
"@loaders.gl/compression": "4.4.0",
|
|
69
|
+
"@loaders.gl/geoarrow": "4.4.0",
|
|
70
|
+
"@loaders.gl/gis": "4.4.0",
|
|
71
|
+
"@loaders.gl/loader-utils": "4.4.0",
|
|
72
|
+
"@loaders.gl/schema": "4.4.0",
|
|
73
|
+
"@loaders.gl/wkt": "4.4.0",
|
|
74
|
+
"@probe.gl/log": "^4.1.1",
|
|
72
75
|
"async-mutex": "^0.2.2",
|
|
73
76
|
"base64-js": "^1.3.1",
|
|
74
77
|
"brotli": "^1.3.2",
|
|
75
78
|
"ieee754": "^1.2.1",
|
|
76
79
|
"int53": "^0.2.4",
|
|
80
|
+
"isomorphic-ws": "^5.0.0",
|
|
77
81
|
"lz4js": "^0.2.0",
|
|
78
82
|
"node-int64": "^0.4.0",
|
|
79
83
|
"object-stream": "0.0.1",
|
|
@@ -85,14 +89,13 @@
|
|
|
85
89
|
"zstd-codec": "^0.1"
|
|
86
90
|
},
|
|
87
91
|
"devDependencies": {
|
|
88
|
-
"@types/node": "^10.14.15",
|
|
89
92
|
"@types/node-int64": "^0.4.29",
|
|
90
93
|
"@types/thrift": "^0.10.8",
|
|
91
94
|
"@types/varint": "^5.0.0"
|
|
92
95
|
},
|
|
93
96
|
"peerDependencies": {
|
|
94
97
|
"@loaders.gl/core": "4.4.0-alpha.1",
|
|
95
|
-
"apache-arrow": ">=
|
|
98
|
+
"apache-arrow": ">= 17.0.0"
|
|
96
99
|
},
|
|
97
|
-
"gitHead": "
|
|
100
|
+
"gitHead": "b8a23bc05946bde7089936ff3ea1651ee9a45536"
|
|
98
101
|
}
|
|
@@ -29,10 +29,12 @@ export async function encodeArrowToParquet(
|
|
|
29
29
|
try {
|
|
30
30
|
const parquetBytes = wasm.writeParquet(wasmTable, wasmProperties);
|
|
31
31
|
// const parquetBytes = wasm.writeParquet(wasmTable, wasmProperties);
|
|
32
|
-
|
|
32
|
+
const bytes = new Uint8Array(
|
|
33
|
+
parquetBytes.buffer,
|
|
33
34
|
parquetBytes.byteOffset,
|
|
34
|
-
parquetBytes.byteLength
|
|
35
|
+
parquetBytes.byteLength
|
|
35
36
|
);
|
|
37
|
+
return bytes.slice().buffer;
|
|
36
38
|
} finally {
|
|
37
39
|
// wasmTable.free();
|
|
38
40
|
// wasmProperties.free();
|
|
@@ -39,14 +39,16 @@ export async function parseParquetFile(
|
|
|
39
39
|
let limitHasReached = false;
|
|
40
40
|
// we have only one input batch so return
|
|
41
41
|
for (const row of rowBatch) {
|
|
42
|
-
if (options?.limit && rows.length >= options?.limit) {
|
|
42
|
+
if (options?.limit && rows.length >= options?.core?.limit!) {
|
|
43
43
|
limitHasReached = true;
|
|
44
44
|
break;
|
|
45
45
|
}
|
|
46
46
|
rows.push(row);
|
|
47
47
|
}
|
|
48
48
|
if (limitHasReached) {
|
|
49
|
-
log.warn(
|
|
49
|
+
log.warn(
|
|
50
|
+
`Rows number limit has been reached. Only first ${options?.core?.limit} are loaded`
|
|
51
|
+
)();
|
|
50
52
|
break;
|
|
51
53
|
}
|
|
52
54
|
}
|
|
@@ -69,7 +69,7 @@ function encodeValues_BOOLEAN(values: boolean[]): Buffer {
|
|
|
69
69
|
buf.fill(0);
|
|
70
70
|
for (let i = 0; i < values.length; i++) {
|
|
71
71
|
if (values[i]) {
|
|
72
|
-
buf[Math.floor(i / 8)] |= 1 << i % 8;
|
|
72
|
+
buf[Math.floor(i / 8)] |= 1 << (i % 8);
|
|
73
73
|
}
|
|
74
74
|
}
|
|
75
75
|
return buf;
|
|
@@ -79,7 +79,7 @@ function decodeValues_BOOLEAN(cursor: CursorBuffer, count: number): boolean[] {
|
|
|
79
79
|
const values: boolean[] = [];
|
|
80
80
|
for (let i = 0; i < count; i++) {
|
|
81
81
|
const b = cursor.buffer[cursor.offset + Math.floor(i / 8)];
|
|
82
|
-
values.push((b & (1 << i % 8)) > 0);
|
|
82
|
+
values.push((b & (1 << (i % 8))) > 0);
|
|
83
83
|
}
|
|
84
84
|
cursor.offset += Math.ceil(count / 8);
|
|
85
85
|
return values;
|
|
@@ -194,7 +194,7 @@ function encodeValues_BYTE_ARRAY(values: Buffer[]): Buffer {
|
|
|
194
194
|
let buf_pos = 0;
|
|
195
195
|
for (let i = 0; i < values.length; i++) {
|
|
196
196
|
buf.writeUInt32LE(values[i].length, buf_pos);
|
|
197
|
-
values[i].copy(buf, buf_pos + 4);
|
|
197
|
+
values[i].copy(buf as Uint8Array, buf_pos + 4);
|
|
198
198
|
buf_pos += 4 + values[i].length;
|
|
199
199
|
}
|
|
200
200
|
return buf;
|
|
@@ -222,7 +222,7 @@ function encodeValues_FIXED_LEN_BYTE_ARRAY(values: Buffer[], opts: ParquetCodecO
|
|
|
222
222
|
throw new Error(`invalid value for FIXED_LEN_BYTE_ARRAY: ${values[i]}`);
|
|
223
223
|
}
|
|
224
224
|
}
|
|
225
|
-
return Buffer.concat(values);
|
|
225
|
+
return Buffer.concat(values as Uint8Array[]);
|
|
226
226
|
}
|
|
227
227
|
|
|
228
228
|
function decodeValues_FIXED_LEN_BYTE_ARRAY(
|
|
@@ -40,7 +40,7 @@ export function encodeValues(
|
|
|
40
40
|
if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {
|
|
41
41
|
// If we have any data in runs we need to encode them
|
|
42
42
|
if (run.length) {
|
|
43
|
-
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);
|
|
43
|
+
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)] as Uint8Array[]);
|
|
44
44
|
run = [];
|
|
45
45
|
}
|
|
46
46
|
repeats = 1;
|
|
@@ -49,7 +49,7 @@ export function encodeValues(
|
|
|
49
49
|
} else {
|
|
50
50
|
// If values changes we need to post any previous repeated values
|
|
51
51
|
if (repeats) {
|
|
52
|
-
buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);
|
|
52
|
+
buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)] as Uint8Array[]);
|
|
53
53
|
repeats = 0;
|
|
54
54
|
}
|
|
55
55
|
run.push(values[i]);
|
|
@@ -57,9 +57,12 @@ export function encodeValues(
|
|
|
57
57
|
}
|
|
58
58
|
|
|
59
59
|
if (repeats) {
|
|
60
|
-
buf = Buffer.concat([
|
|
60
|
+
buf = Buffer.concat([
|
|
61
|
+
buf,
|
|
62
|
+
encodeRunRepeated(values[values.length - 1], repeats, opts)
|
|
63
|
+
] as Uint8Array[]);
|
|
61
64
|
} else if (run.length) {
|
|
62
|
-
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);
|
|
65
|
+
buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)] as Uint8Array[]);
|
|
63
66
|
}
|
|
64
67
|
|
|
65
68
|
if (opts.disableEnvelope) {
|
|
@@ -70,7 +73,7 @@ export function encodeValues(
|
|
|
70
73
|
|
|
71
74
|
// @ts-ignore buffer polyfill
|
|
72
75
|
envelope.writeUInt32LE(buf.length, undefined);
|
|
73
|
-
buf.copy(envelope, 4);
|
|
76
|
+
buf.copy(envelope as Uint8Array, 4);
|
|
74
77
|
|
|
75
78
|
return envelope;
|
|
76
79
|
}
|
|
@@ -131,8 +134,8 @@ function decodeRunBitpacked(
|
|
|
131
134
|
// tslint:disable-next-line:prefer-array-literal
|
|
132
135
|
const values = new Array(count).fill(0);
|
|
133
136
|
for (let b = 0; b < bitWidth * count; b++) {
|
|
134
|
-
if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) {
|
|
135
|
-
values[Math.floor(b / bitWidth)] |= 1 << b % bitWidth;
|
|
137
|
+
if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << (b % 8))) {
|
|
138
|
+
values[Math.floor(b / bitWidth)] |= 1 << (b % bitWidth);
|
|
136
139
|
}
|
|
137
140
|
}
|
|
138
141
|
|
|
@@ -170,12 +173,15 @@ function encodeRunBitpacked(values: number[], opts: ParquetCodecOptions): Buffer
|
|
|
170
173
|
|
|
171
174
|
const buf = Buffer.alloc(Math.ceil(bitWidth * (values.length / 8)));
|
|
172
175
|
for (let b = 0; b < bitWidth * values.length; b++) {
|
|
173
|
-
if ((values[Math.floor(b / bitWidth)] & (1 << b % bitWidth)) > 0) {
|
|
174
|
-
buf[Math.floor(b / 8)] |= 1 << b % 8;
|
|
176
|
+
if ((values[Math.floor(b / bitWidth)] & (1 << (b % bitWidth))) > 0) {
|
|
177
|
+
buf[Math.floor(b / 8)] |= 1 << (b % 8);
|
|
175
178
|
}
|
|
176
179
|
}
|
|
177
180
|
|
|
178
|
-
return Buffer.concat([
|
|
181
|
+
return Buffer.concat([
|
|
182
|
+
Buffer.from(varint.encode(((values.length / 8) << 1) | 1)),
|
|
183
|
+
buf
|
|
184
|
+
] as Uint8Array[]);
|
|
179
185
|
}
|
|
180
186
|
|
|
181
187
|
function encodeRunRepeated(value: number, count: number, opts: ParquetCodecOptions): Buffer {
|
|
@@ -190,5 +196,5 @@ function encodeRunRepeated(value: number, count: number, opts: ParquetCodecOptio
|
|
|
190
196
|
value >> 8; // TODO - this looks wrong
|
|
191
197
|
}
|
|
192
198
|
|
|
193
|
-
return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf]);
|
|
199
|
+
return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf] as Uint8Array[]);
|
|
194
200
|
}
|
|
@@ -376,7 +376,7 @@ async function encodeDataPage(
|
|
|
376
376
|
page: Buffer;
|
|
377
377
|
}> {
|
|
378
378
|
/* encode repetition and definition levels */
|
|
379
|
-
let rLevelsBuf = Buffer.alloc(0);
|
|
379
|
+
let rLevelsBuf: Buffer = Buffer.alloc(0);
|
|
380
380
|
if (column.rLevelMax > 0) {
|
|
381
381
|
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
382
382
|
bitWidth: getBitWidth(column.rLevelMax)
|
|
@@ -384,7 +384,7 @@ async function encodeDataPage(
|
|
|
384
384
|
});
|
|
385
385
|
}
|
|
386
386
|
|
|
387
|
-
let dLevelsBuf = Buffer.alloc(0);
|
|
387
|
+
let dLevelsBuf: Buffer = Buffer.alloc(0);
|
|
388
388
|
if (column.dLevelMax > 0) {
|
|
389
389
|
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
390
390
|
bitWidth: getBitWidth(column.dLevelMax)
|
|
@@ -398,7 +398,7 @@ async function encodeDataPage(
|
|
|
398
398
|
bitWidth: column.typeLength
|
|
399
399
|
});
|
|
400
400
|
|
|
401
|
-
const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
|
|
401
|
+
const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf] as Uint8Array[]);
|
|
402
402
|
|
|
403
403
|
// compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;
|
|
404
404
|
const compressedBuf = await Compression.deflate(column.compression!, dataBuf);
|
|
@@ -418,7 +418,7 @@ async function encodeDataPage(
|
|
|
418
418
|
|
|
419
419
|
/* concat page header, repetition and definition levels and values */
|
|
420
420
|
const headerBuf = serializeThrift(header);
|
|
421
|
-
const page = Buffer.concat([headerBuf, compressedBuf]);
|
|
421
|
+
const page = Buffer.concat([headerBuf, compressedBuf] as Uint8Array[]);
|
|
422
422
|
|
|
423
423
|
return {header, headerSize: headerBuf.length, page};
|
|
424
424
|
}
|
|
@@ -445,7 +445,7 @@ async function encodeDataPageV2(
|
|
|
445
445
|
const compressedBuf = await Compression.deflate(column.compression!, valuesBuf);
|
|
446
446
|
|
|
447
447
|
/* encode repetition and definition levels */
|
|
448
|
-
let rLevelsBuf = Buffer.alloc(0);
|
|
448
|
+
let rLevelsBuf: Buffer = Buffer.alloc(0);
|
|
449
449
|
if (column.rLevelMax > 0) {
|
|
450
450
|
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
451
451
|
bitWidth: getBitWidth(column.rLevelMax),
|
|
@@ -453,7 +453,7 @@ async function encodeDataPageV2(
|
|
|
453
453
|
});
|
|
454
454
|
}
|
|
455
455
|
|
|
456
|
-
let dLevelsBuf = Buffer.alloc(0);
|
|
456
|
+
let dLevelsBuf: Buffer = Buffer.alloc(0);
|
|
457
457
|
if (column.dLevelMax > 0) {
|
|
458
458
|
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
459
459
|
bitWidth: getBitWidth(column.dLevelMax),
|
|
@@ -479,7 +479,7 @@ async function encodeDataPageV2(
|
|
|
479
479
|
|
|
480
480
|
/* concat page header, repetition and definition levels and values */
|
|
481
481
|
const headerBuf = serializeThrift(header);
|
|
482
|
-
const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
|
|
482
|
+
const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf] as Uint8Array[]);
|
|
483
483
|
return {header, headerSize: headerBuf.length, page};
|
|
484
484
|
}
|
|
485
485
|
|
|
@@ -536,7 +536,7 @@ async function encodeColumnChunk(
|
|
|
536
536
|
|
|
537
537
|
/* concat metadata header and data pages */
|
|
538
538
|
const metadataOffset = baseOffset + pageBuf.length;
|
|
539
|
-
const body = Buffer.concat([pageBuf, serializeThrift(metadata)]);
|
|
539
|
+
const body = Buffer.concat([pageBuf, serializeThrift(metadata)] as Uint8Array[]);
|
|
540
540
|
return {body, metadata, metadataOffset};
|
|
541
541
|
}
|
|
542
542
|
|
|
@@ -573,7 +573,7 @@ async function encodeRowGroup(
|
|
|
573
573
|
metadata.columns.push(cchunk);
|
|
574
574
|
metadata.total_byte_size = new Int64(Number(metadata.total_byte_size) + cchunkData.body.length);
|
|
575
575
|
|
|
576
|
-
body = Buffer.concat([body, cchunkData.body]);
|
|
576
|
+
body = Buffer.concat([body, cchunkData.body] as Uint8Array[]);
|
|
577
577
|
}
|
|
578
578
|
|
|
579
579
|
return {body, metadata};
|
|
@@ -638,7 +638,7 @@ function encodeFooter(
|
|
|
638
638
|
const metadataEncoded = serializeThrift(metadata);
|
|
639
639
|
const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
|
|
640
640
|
|
|
641
|
-
metadataEncoded.copy(footerEncoded);
|
|
641
|
+
metadataEncoded.copy(footerEncoded as Uint8Array);
|
|
642
642
|
footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
|
|
643
643
|
footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);
|
|
644
644
|
return footerEncoded;
|
|
@@ -424,7 +424,7 @@ async function decodeDictionaryPage(
|
|
|
424
424
|
): Promise<(string | ArrayBuffer)[]> {
|
|
425
425
|
const cursorEnd = cursor.offset + pageHeader.compressed_page_size;
|
|
426
426
|
|
|
427
|
-
let dictCursor = {
|
|
427
|
+
let dictCursor: CursorBuffer = {
|
|
428
428
|
offset: 0,
|
|
429
429
|
buffer: cursor.buffer.slice(cursor.offset, cursorEnd),
|
|
430
430
|
size: cursorEnd - cursor.offset
|
|
@@ -475,7 +475,7 @@ function preserveBinary(d: any): ArrayBuffer | ArrayBufferView | string {
|
|
|
475
475
|
}
|
|
476
476
|
// Convert to ArrayBuffer
|
|
477
477
|
if (Buffer.isBuffer(d)) {
|
|
478
|
-
return d.buffer
|
|
478
|
+
return new Uint8Array(d.buffer, d.byteOffset, d.byteLength).slice().buffer;
|
|
479
479
|
}
|
|
480
480
|
return d.toString();
|
|
481
481
|
}
|
|
@@ -30,7 +30,7 @@ export function serializeThrift(obj: any): Buffer {
|
|
|
30
30
|
obj.write(protocol);
|
|
31
31
|
transport.flush();
|
|
32
32
|
|
|
33
|
-
return Buffer.concat(output);
|
|
33
|
+
return Buffer.concat(output as Uint8Array[]);
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
export function decodeThrift(obj: any, buf: Buffer, offset?: number) {
|