@loaders.gl/parquet 3.4.13 → 3.4.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +15 -24
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/bundle.js +1 -1
- package/dist/es5/bundle.js.map +1 -1
- package/dist/es5/constants.js +5 -5
- package/dist/es5/constants.js.map +1 -1
- package/dist/es5/index.js +24 -24
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js +2 -7
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -1
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js +22 -33
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -1
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js +2 -2
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -1
- package/dist/es5/lib/geo/decode-geo-metadata.js +16 -27
- package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -1
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js +20 -151
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -1
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js +13 -138
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -1
- package/dist/es5/lib/wasm/encode-parquet-wasm.js +8 -29
- package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -1
- package/dist/es5/lib/wasm/load-wasm/index.js +1 -1
- package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +10 -33
- package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -1
- package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +4 -22
- package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -1
- package/dist/es5/lib/wasm/parse-parquet-wasm.js +13 -46
- package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -1
- package/dist/es5/parquet-loader.js +4 -4
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-loader.js +4 -4
- package/dist/es5/parquet-wasm-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-writer.js +3 -3
- package/dist/es5/parquet-wasm-writer.js.map +1 -1
- package/dist/es5/parquet-writer.js +4 -4
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/codecs/dictionary.js +3 -6
- package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
- package/dist/es5/parquetjs/codecs/index.js +4 -5
- package/dist/es5/parquetjs/codecs/index.js.map +1 -1
- package/dist/es5/parquetjs/codecs/plain.js +41 -41
- package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
- package/dist/es5/parquetjs/codecs/rle.js +25 -30
- package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
- package/dist/es5/parquetjs/compression.js +26 -90
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/encoder/parquet-encoder.js +245 -536
- package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +123 -133
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +138 -150
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +241 -251
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +58 -70
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +97 -107
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +136 -146
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DateType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +58 -68
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +71 -81
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +164 -174
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IntType.js +58 -68
- package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +58 -68
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ListType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +274 -310
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MapType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/NullType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +56 -66
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +71 -81
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +136 -146
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +71 -81
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +105 -115
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +162 -172
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +71 -81
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js +106 -116
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/StringType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js +58 -68
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +76 -90
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +58 -68
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Type.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/index.js +43 -43
- package/dist/es5/parquetjs/parquet-thrift/index.js.map +1 -1
- package/dist/es5/parquetjs/parser/decoders.js +195 -327
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js +155 -582
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +10 -11
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/schema.js +65 -82
- package/dist/es5/parquetjs/schema/schema.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +56 -87
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js +40 -40
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/utils/file-utils.js +8 -12
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/es5/parquetjs/utils/read-utils.js +22 -39
- package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
- package/dist/esm/parquet-loader.js +1 -1
- package/dist/esm/parquet-wasm-loader.js +1 -1
- package/dist/esm/parquet-wasm-writer.js +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/parquet-worker.js +15 -24
- package/dist/parquet-worker.js.map +3 -3
- package/package.json +6 -6
|
@@ -1,15 +1,10 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
|
|
3
3
|
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
|
|
4
|
-
var _typeof = require("@babel/runtime/helpers/typeof");
|
|
5
4
|
Object.defineProperty(exports, "__esModule", {
|
|
6
5
|
value: true
|
|
7
6
|
});
|
|
8
7
|
exports.ParquetEnvelopeWriter = exports.ParquetEncoder = void 0;
|
|
9
|
-
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
|
|
10
|
-
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
|
|
11
|
-
var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
|
|
12
|
-
var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
|
|
13
8
|
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
|
|
14
9
|
var _codecs = require("../codecs");
|
|
15
10
|
var Compression = _interopRequireWildcard(require("../compression"));
|
|
@@ -18,20 +13,25 @@ var _parquetThrift = require("../parquet-thrift");
|
|
|
18
13
|
var _fileUtils = require("../utils/file-utils");
|
|
19
14
|
var _readUtils = require("../utils/read-utils");
|
|
20
15
|
var _nodeInt = _interopRequireDefault(require("node-int64"));
|
|
21
|
-
function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function
|
|
22
|
-
function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null ||
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
16
|
+
function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function (nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
|
|
17
|
+
function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || typeof obj !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
|
|
18
|
+
const PARQUET_MAGIC = 'PAR1';
|
|
19
|
+
const PARQUET_VERSION = 1;
|
|
20
|
+
const PARQUET_DEFAULT_PAGE_SIZE = 8192;
|
|
21
|
+
const PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
|
|
22
|
+
const PARQUET_RDLVL_TYPE = 'INT32';
|
|
23
|
+
const PARQUET_RDLVL_ENCODING = 'RLE';
|
|
24
|
+
class ParquetEncoder {
|
|
25
|
+
static async openFile(schema, path, opts) {
|
|
26
|
+
const outputStream = await (0, _fileUtils.osopen)(path, opts);
|
|
27
|
+
return ParquetEncoder.openStream(schema, outputStream, opts);
|
|
28
|
+
}
|
|
29
|
+
static async openStream(schema, outputStream) {
|
|
30
|
+
let opts = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {};
|
|
31
|
+
const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
|
|
32
|
+
return new ParquetEncoder(schema, envelopeWriter, opts);
|
|
33
|
+
}
|
|
34
|
+
constructor(schema, envelopeWriter, opts) {
|
|
35
35
|
(0, _defineProperty2.default)(this, "schema", void 0);
|
|
36
36
|
(0, _defineProperty2.default)(this, "envelopeWriter", void 0);
|
|
37
37
|
(0, _defineProperty2.default)(this, "rowBuffer", void 0);
|
|
@@ -46,175 +46,55 @@ var ParquetEncoder = function () {
|
|
|
46
46
|
this.userMetadata = {};
|
|
47
47
|
this.writeHeader();
|
|
48
48
|
}
|
|
49
|
-
(
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
case 10:
|
|
70
|
-
case "end":
|
|
71
|
-
return _context.stop();
|
|
72
|
-
}
|
|
73
|
-
}, _callee, this, [[0, 5]]);
|
|
74
|
-
}));
|
|
75
|
-
function writeHeader() {
|
|
76
|
-
return _writeHeader.apply(this, arguments);
|
|
77
|
-
}
|
|
78
|
-
return writeHeader;
|
|
79
|
-
}()
|
|
80
|
-
}, {
|
|
81
|
-
key: "appendRow",
|
|
82
|
-
value: function () {
|
|
83
|
-
var _appendRow = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(row) {
|
|
84
|
-
return _regenerator.default.wrap(function _callee2$(_context2) {
|
|
85
|
-
while (1) switch (_context2.prev = _context2.next) {
|
|
86
|
-
case 0:
|
|
87
|
-
if (!this.closed) {
|
|
88
|
-
_context2.next = 2;
|
|
89
|
-
break;
|
|
90
|
-
}
|
|
91
|
-
throw new Error('writer was closed');
|
|
92
|
-
case 2:
|
|
93
|
-
Shred.shredRecord(this.schema, row, this.rowBuffer);
|
|
94
|
-
if (this.rowBuffer.rowCount >= this.rowGroupSize) {
|
|
95
|
-
this.rowBuffer = {};
|
|
96
|
-
}
|
|
97
|
-
case 4:
|
|
98
|
-
case "end":
|
|
99
|
-
return _context2.stop();
|
|
100
|
-
}
|
|
101
|
-
}, _callee2, this);
|
|
102
|
-
}));
|
|
103
|
-
function appendRow(_x) {
|
|
104
|
-
return _appendRow.apply(this, arguments);
|
|
105
|
-
}
|
|
106
|
-
return appendRow;
|
|
107
|
-
}()
|
|
108
|
-
}, {
|
|
109
|
-
key: "close",
|
|
110
|
-
value: function () {
|
|
111
|
-
var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(callback) {
|
|
112
|
-
return _regenerator.default.wrap(function _callee3$(_context3) {
|
|
113
|
-
while (1) switch (_context3.prev = _context3.next) {
|
|
114
|
-
case 0:
|
|
115
|
-
if (!this.closed) {
|
|
116
|
-
_context3.next = 2;
|
|
117
|
-
break;
|
|
118
|
-
}
|
|
119
|
-
throw new Error('writer was closed');
|
|
120
|
-
case 2:
|
|
121
|
-
this.closed = true;
|
|
122
|
-
if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
|
|
123
|
-
this.rowBuffer = {};
|
|
124
|
-
}
|
|
125
|
-
_context3.next = 6;
|
|
126
|
-
return this.envelopeWriter.writeFooter(this.userMetadata);
|
|
127
|
-
case 6:
|
|
128
|
-
_context3.next = 8;
|
|
129
|
-
return this.envelopeWriter.close();
|
|
130
|
-
case 8:
|
|
131
|
-
if (callback) {
|
|
132
|
-
callback();
|
|
133
|
-
}
|
|
134
|
-
case 9:
|
|
135
|
-
case "end":
|
|
136
|
-
return _context3.stop();
|
|
137
|
-
}
|
|
138
|
-
}, _callee3, this);
|
|
139
|
-
}));
|
|
140
|
-
function close(_x2) {
|
|
141
|
-
return _close.apply(this, arguments);
|
|
142
|
-
}
|
|
143
|
-
return close;
|
|
144
|
-
}()
|
|
145
|
-
}, {
|
|
146
|
-
key: "setMetadata",
|
|
147
|
-
value: function setMetadata(key, value) {
|
|
148
|
-
this.userMetadata[String(key)] = String(value);
|
|
49
|
+
async writeHeader() {
|
|
50
|
+
try {
|
|
51
|
+
await this.envelopeWriter.writeHeader();
|
|
52
|
+
} catch (err) {
|
|
53
|
+
await this.envelopeWriter.close();
|
|
54
|
+
throw err;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
async appendRow(row) {
|
|
58
|
+
if (this.closed) {
|
|
59
|
+
throw new Error('writer was closed');
|
|
60
|
+
}
|
|
61
|
+
Shred.shredRecord(this.schema, row, this.rowBuffer);
|
|
62
|
+
if (this.rowBuffer.rowCount >= this.rowGroupSize) {
|
|
63
|
+
this.rowBuffer = {};
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
async close(callback) {
|
|
67
|
+
if (this.closed) {
|
|
68
|
+
throw new Error('writer was closed');
|
|
149
69
|
}
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
this.rowGroupSize = cnt;
|
|
70
|
+
this.closed = true;
|
|
71
|
+
if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
|
|
72
|
+
this.rowBuffer = {};
|
|
154
73
|
}
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
74
|
+
await this.envelopeWriter.writeFooter(this.userMetadata);
|
|
75
|
+
await this.envelopeWriter.close();
|
|
76
|
+
if (callback) {
|
|
77
|
+
callback();
|
|
159
78
|
}
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
outputStream = _context4.sent;
|
|
172
|
-
return _context4.abrupt("return", ParquetEncoder.openStream(schema, outputStream, opts));
|
|
173
|
-
case 4:
|
|
174
|
-
case "end":
|
|
175
|
-
return _context4.stop();
|
|
176
|
-
}
|
|
177
|
-
}, _callee4);
|
|
178
|
-
}));
|
|
179
|
-
function openFile(_x3, _x4, _x5) {
|
|
180
|
-
return _openFile.apply(this, arguments);
|
|
181
|
-
}
|
|
182
|
-
return openFile;
|
|
183
|
-
}()
|
|
184
|
-
}, {
|
|
185
|
-
key: "openStream",
|
|
186
|
-
value: function () {
|
|
187
|
-
var _openStream = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(schema, outputStream) {
|
|
188
|
-
var opts,
|
|
189
|
-
envelopeWriter,
|
|
190
|
-
_args5 = arguments;
|
|
191
|
-
return _regenerator.default.wrap(function _callee5$(_context5) {
|
|
192
|
-
while (1) switch (_context5.prev = _context5.next) {
|
|
193
|
-
case 0:
|
|
194
|
-
opts = _args5.length > 2 && _args5[2] !== undefined ? _args5[2] : {};
|
|
195
|
-
_context5.next = 3;
|
|
196
|
-
return ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
|
|
197
|
-
case 3:
|
|
198
|
-
envelopeWriter = _context5.sent;
|
|
199
|
-
return _context5.abrupt("return", new ParquetEncoder(schema, envelopeWriter, opts));
|
|
200
|
-
case 5:
|
|
201
|
-
case "end":
|
|
202
|
-
return _context5.stop();
|
|
203
|
-
}
|
|
204
|
-
}, _callee5);
|
|
205
|
-
}));
|
|
206
|
-
function openStream(_x6, _x7) {
|
|
207
|
-
return _openStream.apply(this, arguments);
|
|
208
|
-
}
|
|
209
|
-
return openStream;
|
|
210
|
-
}()
|
|
211
|
-
}]);
|
|
212
|
-
return ParquetEncoder;
|
|
213
|
-
}();
|
|
79
|
+
}
|
|
80
|
+
setMetadata(key, value) {
|
|
81
|
+
this.userMetadata[String(key)] = String(value);
|
|
82
|
+
}
|
|
83
|
+
setRowGroupSize(cnt) {
|
|
84
|
+
this.rowGroupSize = cnt;
|
|
85
|
+
}
|
|
86
|
+
setPageSize(cnt) {
|
|
87
|
+
this.envelopeWriter.setPageSize(cnt);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
214
90
|
exports.ParquetEncoder = ParquetEncoder;
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
91
|
+
class ParquetEnvelopeWriter {
|
|
92
|
+
static async openStream(schema, outputStream, opts) {
|
|
93
|
+
const writeFn = _fileUtils.oswrite.bind(undefined, outputStream);
|
|
94
|
+
const closeFn = _fileUtils.osclose.bind(undefined, outputStream);
|
|
95
|
+
return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
|
|
96
|
+
}
|
|
97
|
+
constructor(schema, writeFn, closeFn, fileOffset, opts) {
|
|
218
98
|
(0, _defineProperty2.default)(this, "schema", void 0);
|
|
219
99
|
(0, _defineProperty2.default)(this, "write", void 0);
|
|
220
100
|
(0, _defineProperty2.default)(this, "close", void 0);
|
|
@@ -232,88 +112,33 @@ var ParquetEnvelopeWriter = function () {
|
|
|
232
112
|
this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;
|
|
233
113
|
this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
|
|
234
114
|
}
|
|
235
|
-
(
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
return encodeRowGroup(this.schema, records, {
|
|
256
|
-
baseOffset: this.offset,
|
|
257
|
-
pageSize: this.pageSize,
|
|
258
|
-
useDataPageV2: this.useDataPageV2
|
|
259
|
-
});
|
|
260
|
-
case 2:
|
|
261
|
-
rgroup = _context6.sent;
|
|
262
|
-
this.rowCount += records.rowCount;
|
|
263
|
-
this.rowGroups.push(rgroup.metadata);
|
|
264
|
-
_context6.next = 7;
|
|
265
|
-
return this.writeSection(rgroup.body);
|
|
266
|
-
case 7:
|
|
267
|
-
return _context6.abrupt("return", _context6.sent);
|
|
268
|
-
case 8:
|
|
269
|
-
case "end":
|
|
270
|
-
return _context6.stop();
|
|
271
|
-
}
|
|
272
|
-
}, _callee6, this);
|
|
273
|
-
}));
|
|
274
|
-
function writeRowGroup(_x8) {
|
|
275
|
-
return _writeRowGroup.apply(this, arguments);
|
|
276
|
-
}
|
|
277
|
-
return writeRowGroup;
|
|
278
|
-
}()
|
|
279
|
-
}, {
|
|
280
|
-
key: "writeFooter",
|
|
281
|
-
value: function writeFooter(userMetadata) {
|
|
282
|
-
if (!userMetadata) {
|
|
283
|
-
userMetadata = {};
|
|
284
|
-
}
|
|
285
|
-
return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
|
|
286
|
-
}
|
|
287
|
-
}, {
|
|
288
|
-
key: "setPageSize",
|
|
289
|
-
value: function setPageSize(cnt) {
|
|
290
|
-
this.pageSize = cnt;
|
|
115
|
+
writeSection(buf) {
|
|
116
|
+
this.offset += buf.length;
|
|
117
|
+
return this.write(buf);
|
|
118
|
+
}
|
|
119
|
+
writeHeader() {
|
|
120
|
+
return this.writeSection(Buffer.from(PARQUET_MAGIC));
|
|
121
|
+
}
|
|
122
|
+
async writeRowGroup(records) {
|
|
123
|
+
const rgroup = await encodeRowGroup(this.schema, records, {
|
|
124
|
+
baseOffset: this.offset,
|
|
125
|
+
pageSize: this.pageSize,
|
|
126
|
+
useDataPageV2: this.useDataPageV2
|
|
127
|
+
});
|
|
128
|
+
this.rowCount += records.rowCount;
|
|
129
|
+
this.rowGroups.push(rgroup.metadata);
|
|
130
|
+
return await this.writeSection(rgroup.body);
|
|
131
|
+
}
|
|
132
|
+
writeFooter(userMetadata) {
|
|
133
|
+
if (!userMetadata) {
|
|
134
|
+
userMetadata = {};
|
|
291
135
|
}
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
while (1) switch (_context7.prev = _context7.next) {
|
|
299
|
-
case 0:
|
|
300
|
-
writeFn = _fileUtils.oswrite.bind(undefined, outputStream);
|
|
301
|
-
closeFn = _fileUtils.osclose.bind(undefined, outputStream);
|
|
302
|
-
return _context7.abrupt("return", new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts));
|
|
303
|
-
case 3:
|
|
304
|
-
case "end":
|
|
305
|
-
return _context7.stop();
|
|
306
|
-
}
|
|
307
|
-
}, _callee7);
|
|
308
|
-
}));
|
|
309
|
-
function openStream(_x9, _x10, _x11) {
|
|
310
|
-
return _openStream2.apply(this, arguments);
|
|
311
|
-
}
|
|
312
|
-
return openStream;
|
|
313
|
-
}()
|
|
314
|
-
}]);
|
|
315
|
-
return ParquetEnvelopeWriter;
|
|
316
|
-
}();
|
|
136
|
+
return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
|
|
137
|
+
}
|
|
138
|
+
setPageSize(cnt) {
|
|
139
|
+
this.pageSize = cnt;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
317
142
|
exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
|
|
318
143
|
function encodeValues(type, encoding, values, opts) {
|
|
319
144
|
if (!(encoding in _codecs.PARQUET_CODECS)) {
|
|
@@ -321,252 +146,145 @@ function encodeValues(type, encoding, values, opts) {
|
|
|
321
146
|
}
|
|
322
147
|
return _codecs.PARQUET_CODECS[encoding].encodeValues(type, values, opts);
|
|
323
148
|
}
|
|
324
|
-
function encodeDataPage(
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
}),
|
|
362
|
-
uncompressed_page_size: dataBuf.length,
|
|
363
|
-
compressed_page_size: compressedBuf.length
|
|
364
|
-
});
|
|
365
|
-
headerBuf = (0, _readUtils.serializeThrift)(header);
|
|
366
|
-
page = Buffer.concat([headerBuf, compressedBuf]);
|
|
367
|
-
return _context8.abrupt("return", {
|
|
368
|
-
header: header,
|
|
369
|
-
headerSize: headerBuf.length,
|
|
370
|
-
page: page
|
|
371
|
-
});
|
|
372
|
-
case 13:
|
|
373
|
-
case "end":
|
|
374
|
-
return _context8.stop();
|
|
375
|
-
}
|
|
376
|
-
}, _callee8);
|
|
377
|
-
}));
|
|
378
|
-
return _encodeDataPage.apply(this, arguments);
|
|
379
|
-
}
|
|
380
|
-
function encodeDataPageV2(_x14, _x15, _x16) {
|
|
381
|
-
return _encodeDataPageV.apply(this, arguments);
|
|
382
|
-
}
|
|
383
|
-
function _encodeDataPageV() {
|
|
384
|
-
_encodeDataPageV = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9(column, data, rowCount) {
|
|
385
|
-
var valuesBuf, compressedBuf, rLevelsBuf, dLevelsBuf, header, headerBuf, page;
|
|
386
|
-
return _regenerator.default.wrap(function _callee9$(_context9) {
|
|
387
|
-
while (1) switch (_context9.prev = _context9.next) {
|
|
388
|
-
case 0:
|
|
389
|
-
valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
|
|
390
|
-
typeLength: column.typeLength,
|
|
391
|
-
bitWidth: column.typeLength
|
|
392
|
-
});
|
|
393
|
-
_context9.next = 3;
|
|
394
|
-
return Compression.deflate(column.compression, valuesBuf);
|
|
395
|
-
case 3:
|
|
396
|
-
compressedBuf = _context9.sent;
|
|
397
|
-
rLevelsBuf = Buffer.alloc(0);
|
|
398
|
-
if (column.rLevelMax > 0) {
|
|
399
|
-
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
400
|
-
bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax),
|
|
401
|
-
disableEnvelope: true
|
|
402
|
-
});
|
|
403
|
-
}
|
|
404
|
-
dLevelsBuf = Buffer.alloc(0);
|
|
405
|
-
if (column.dLevelMax > 0) {
|
|
406
|
-
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
407
|
-
bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax),
|
|
408
|
-
disableEnvelope: true
|
|
409
|
-
});
|
|
410
|
-
}
|
|
411
|
-
header = new _parquetThrift.PageHeader({
|
|
412
|
-
type: _parquetThrift.PageType.DATA_PAGE_V2,
|
|
413
|
-
data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
|
|
414
|
-
num_values: data.count,
|
|
415
|
-
num_nulls: data.count - data.values.length,
|
|
416
|
-
num_rows: rowCount,
|
|
417
|
-
encoding: _parquetThrift.Encoding[column.encoding],
|
|
418
|
-
definition_levels_byte_length: dLevelsBuf.length,
|
|
419
|
-
repetition_levels_byte_length: rLevelsBuf.length,
|
|
420
|
-
is_compressed: column.compression !== 'UNCOMPRESSED'
|
|
421
|
-
}),
|
|
422
|
-
uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
|
|
423
|
-
compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
|
|
424
|
-
});
|
|
425
|
-
headerBuf = (0, _readUtils.serializeThrift)(header);
|
|
426
|
-
page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
|
|
427
|
-
return _context9.abrupt("return", {
|
|
428
|
-
header: header,
|
|
429
|
-
headerSize: headerBuf.length,
|
|
430
|
-
page: page
|
|
431
|
-
});
|
|
432
|
-
case 12:
|
|
433
|
-
case "end":
|
|
434
|
-
return _context9.stop();
|
|
435
|
-
}
|
|
436
|
-
}, _callee9);
|
|
437
|
-
}));
|
|
438
|
-
return _encodeDataPageV.apply(this, arguments);
|
|
439
|
-
}
|
|
440
|
-
function encodeColumnChunk(_x17, _x18, _x19, _x20) {
|
|
441
|
-
return _encodeColumnChunk.apply(this, arguments);
|
|
149
|
+
async function encodeDataPage(column, data) {
|
|
150
|
+
let rLevelsBuf = Buffer.alloc(0);
|
|
151
|
+
if (column.rLevelMax > 0) {
|
|
152
|
+
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
153
|
+
bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax)
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
let dLevelsBuf = Buffer.alloc(0);
|
|
157
|
+
if (column.dLevelMax > 0) {
|
|
158
|
+
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
159
|
+
bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax)
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
|
|
163
|
+
typeLength: column.typeLength,
|
|
164
|
+
bitWidth: column.typeLength
|
|
165
|
+
});
|
|
166
|
+
const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
|
|
167
|
+
const compressedBuf = await Compression.deflate(column.compression, dataBuf);
|
|
168
|
+
const header = new _parquetThrift.PageHeader({
|
|
169
|
+
type: _parquetThrift.PageType.DATA_PAGE,
|
|
170
|
+
data_page_header: new _parquetThrift.DataPageHeader({
|
|
171
|
+
num_values: data.count,
|
|
172
|
+
encoding: _parquetThrift.Encoding[column.encoding],
|
|
173
|
+
definition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING],
|
|
174
|
+
repetition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]
|
|
175
|
+
}),
|
|
176
|
+
uncompressed_page_size: dataBuf.length,
|
|
177
|
+
compressed_page_size: compressedBuf.length
|
|
178
|
+
});
|
|
179
|
+
const headerBuf = (0, _readUtils.serializeThrift)(header);
|
|
180
|
+
const page = Buffer.concat([headerBuf, compressedBuf]);
|
|
181
|
+
return {
|
|
182
|
+
header,
|
|
183
|
+
headerSize: headerBuf.length,
|
|
184
|
+
page
|
|
185
|
+
};
|
|
442
186
|
}
|
|
443
|
-
function
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
metadata.encodings.push(_parquetThrift.Encoding[column.encoding]);
|
|
485
|
-
metadataOffset = baseOffset + pageBuf.length;
|
|
486
|
-
body = Buffer.concat([pageBuf, (0, _readUtils.serializeThrift)(metadata)]);
|
|
487
|
-
return _context10.abrupt("return", {
|
|
488
|
-
body: body,
|
|
489
|
-
metadata: metadata,
|
|
490
|
-
metadataOffset: metadataOffset
|
|
491
|
-
});
|
|
492
|
-
case 23:
|
|
493
|
-
case "end":
|
|
494
|
-
return _context10.stop();
|
|
495
|
-
}
|
|
496
|
-
}, _callee10);
|
|
497
|
-
}));
|
|
498
|
-
return _encodeColumnChunk.apply(this, arguments);
|
|
187
|
+
async function encodeDataPageV2(column, data, rowCount) {
|
|
188
|
+
const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
|
|
189
|
+
typeLength: column.typeLength,
|
|
190
|
+
bitWidth: column.typeLength
|
|
191
|
+
});
|
|
192
|
+
const compressedBuf = await Compression.deflate(column.compression, valuesBuf);
|
|
193
|
+
let rLevelsBuf = Buffer.alloc(0);
|
|
194
|
+
if (column.rLevelMax > 0) {
|
|
195
|
+
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
196
|
+
bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax),
|
|
197
|
+
disableEnvelope: true
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
let dLevelsBuf = Buffer.alloc(0);
|
|
201
|
+
if (column.dLevelMax > 0) {
|
|
202
|
+
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
203
|
+
bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax),
|
|
204
|
+
disableEnvelope: true
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
const header = new _parquetThrift.PageHeader({
|
|
208
|
+
type: _parquetThrift.PageType.DATA_PAGE_V2,
|
|
209
|
+
data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
|
|
210
|
+
num_values: data.count,
|
|
211
|
+
num_nulls: data.count - data.values.length,
|
|
212
|
+
num_rows: rowCount,
|
|
213
|
+
encoding: _parquetThrift.Encoding[column.encoding],
|
|
214
|
+
definition_levels_byte_length: dLevelsBuf.length,
|
|
215
|
+
repetition_levels_byte_length: rLevelsBuf.length,
|
|
216
|
+
is_compressed: column.compression !== 'UNCOMPRESSED'
|
|
217
|
+
}),
|
|
218
|
+
uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
|
|
219
|
+
compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
|
|
220
|
+
});
|
|
221
|
+
const headerBuf = (0, _readUtils.serializeThrift)(header);
|
|
222
|
+
const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
|
|
223
|
+
return {
|
|
224
|
+
header,
|
|
225
|
+
headerSize: headerBuf.length,
|
|
226
|
+
page
|
|
227
|
+
};
|
|
499
228
|
}
|
|
500
|
-
function
|
|
501
|
-
|
|
229
|
+
async function encodeColumnChunk(column, buffer, offset, opts) {
|
|
230
|
+
const data = buffer.columnData[column.path.join()];
|
|
231
|
+
const baseOffset = (opts.baseOffset || 0) + offset;
|
|
232
|
+
let pageBuf;
|
|
233
|
+
let total_uncompressed_size = 0;
|
|
234
|
+
let total_compressed_size = 0;
|
|
235
|
+
{
|
|
236
|
+
const result = opts.useDataPageV2 ? await encodeDataPageV2(column, data, buffer.rowCount) : await encodeDataPage(column, data);
|
|
237
|
+
pageBuf = result.page;
|
|
238
|
+
total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
|
|
239
|
+
total_compressed_size += result.header.compressed_page_size + result.headerSize;
|
|
240
|
+
}
|
|
241
|
+
const metadata = new _parquetThrift.ColumnMetaData({
|
|
242
|
+
path_in_schema: column.path,
|
|
243
|
+
num_values: data.count,
|
|
244
|
+
data_page_offset: baseOffset,
|
|
245
|
+
encodings: [],
|
|
246
|
+
total_uncompressed_size,
|
|
247
|
+
total_compressed_size,
|
|
248
|
+
type: _parquetThrift.Type[column.primitiveType],
|
|
249
|
+
codec: _parquetThrift.CompressionCodec[column.compression]
|
|
250
|
+
});
|
|
251
|
+
metadata.encodings.push(_parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]);
|
|
252
|
+
metadata.encodings.push(_parquetThrift.Encoding[column.encoding]);
|
|
253
|
+
const metadataOffset = baseOffset + pageBuf.length;
|
|
254
|
+
const body = Buffer.concat([pageBuf, (0, _readUtils.serializeThrift)(metadata)]);
|
|
255
|
+
return {
|
|
256
|
+
body,
|
|
257
|
+
metadata,
|
|
258
|
+
metadataOffset
|
|
259
|
+
};
|
|
502
260
|
}
|
|
503
|
-
function
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
}
|
|
528
|
-
return _context11.abrupt("continue", 16);
|
|
529
|
-
case 9:
|
|
530
|
-
_context11.next = 11;
|
|
531
|
-
return encodeColumnChunk(field, data, body.length, opts);
|
|
532
|
-
case 11:
|
|
533
|
-
cchunkData = _context11.sent;
|
|
534
|
-
cchunk = new _parquetThrift.ColumnChunk({
|
|
535
|
-
file_offset: cchunkData.metadataOffset,
|
|
536
|
-
meta_data: cchunkData.metadata
|
|
537
|
-
});
|
|
538
|
-
metadata.columns.push(cchunk);
|
|
539
|
-
metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
|
|
540
|
-
body = Buffer.concat([body, cchunkData.body]);
|
|
541
|
-
case 16:
|
|
542
|
-
_context11.next = 5;
|
|
543
|
-
break;
|
|
544
|
-
case 18:
|
|
545
|
-
_context11.next = 23;
|
|
546
|
-
break;
|
|
547
|
-
case 20:
|
|
548
|
-
_context11.prev = 20;
|
|
549
|
-
_context11.t0 = _context11["catch"](3);
|
|
550
|
-
_iterator2.e(_context11.t0);
|
|
551
|
-
case 23:
|
|
552
|
-
_context11.prev = 23;
|
|
553
|
-
_iterator2.f();
|
|
554
|
-
return _context11.finish(23);
|
|
555
|
-
case 26:
|
|
556
|
-
return _context11.abrupt("return", {
|
|
557
|
-
body: body,
|
|
558
|
-
metadata: metadata
|
|
559
|
-
});
|
|
560
|
-
case 27:
|
|
561
|
-
case "end":
|
|
562
|
-
return _context11.stop();
|
|
563
|
-
}
|
|
564
|
-
}, _callee11, null, [[3, 20, 23, 26]]);
|
|
565
|
-
}));
|
|
566
|
-
return _encodeRowGroup.apply(this, arguments);
|
|
261
|
+
async function encodeRowGroup(schema, data, opts) {
|
|
262
|
+
const metadata = new _parquetThrift.RowGroup({
|
|
263
|
+
num_rows: data.rowCount,
|
|
264
|
+
columns: [],
|
|
265
|
+
total_byte_size: 0
|
|
266
|
+
});
|
|
267
|
+
let body = Buffer.alloc(0);
|
|
268
|
+
for (const field of schema.fieldList) {
|
|
269
|
+
if (field.isNested) {
|
|
270
|
+
continue;
|
|
271
|
+
}
|
|
272
|
+
const cchunkData = await encodeColumnChunk(field, data, body.length, opts);
|
|
273
|
+
const cchunk = new _parquetThrift.ColumnChunk({
|
|
274
|
+
file_offset: cchunkData.metadataOffset,
|
|
275
|
+
meta_data: cchunkData.metadata
|
|
276
|
+
});
|
|
277
|
+
metadata.columns.push(cchunk);
|
|
278
|
+
metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
|
|
279
|
+
body = Buffer.concat([body, cchunkData.body]);
|
|
280
|
+
}
|
|
281
|
+
return {
|
|
282
|
+
body,
|
|
283
|
+
metadata
|
|
284
|
+
};
|
|
567
285
|
}
|
|
568
286
|
function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
|
|
569
|
-
|
|
287
|
+
const metadata = new _parquetThrift.FileMetaData({
|
|
570
288
|
version: PARQUET_VERSION,
|
|
571
289
|
created_by: 'parquets',
|
|
572
290
|
num_rows: rowCount,
|
|
@@ -574,49 +292,40 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
|
|
|
574
292
|
schema: [],
|
|
575
293
|
key_value_metadata: []
|
|
576
294
|
});
|
|
577
|
-
for (
|
|
295
|
+
for (const key in userMetadata) {
|
|
578
296
|
var _metadata$key_value_m, _metadata$key_value_m2, _metadata$key_value_m3;
|
|
579
|
-
|
|
580
|
-
key
|
|
297
|
+
const kv = new _parquetThrift.KeyValue({
|
|
298
|
+
key,
|
|
581
299
|
value: userMetadata[key]
|
|
582
300
|
});
|
|
583
301
|
(_metadata$key_value_m = metadata.key_value_metadata) === null || _metadata$key_value_m === void 0 ? void 0 : (_metadata$key_value_m2 = (_metadata$key_value_m3 = _metadata$key_value_m).push) === null || _metadata$key_value_m2 === void 0 ? void 0 : _metadata$key_value_m2.call(_metadata$key_value_m3, kv);
|
|
584
302
|
}
|
|
585
303
|
{
|
|
586
|
-
|
|
304
|
+
const schemaRoot = new _parquetThrift.SchemaElement({
|
|
587
305
|
name: 'root',
|
|
588
306
|
num_children: Object.keys(schema.fields).length
|
|
589
307
|
});
|
|
590
308
|
metadata.schema.push(schemaRoot);
|
|
591
309
|
}
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
schemaElem.type = _parquetThrift.Type[field.primitiveType];
|
|
606
|
-
}
|
|
607
|
-
if (field.originalType) {
|
|
608
|
-
schemaElem.converted_type = _parquetThrift.ConvertedType[field.originalType];
|
|
609
|
-
}
|
|
610
|
-
schemaElem.type_length = field.typeLength;
|
|
611
|
-
metadata.schema.push(schemaElem);
|
|
310
|
+
for (const field of schema.fieldList) {
|
|
311
|
+
const relt = _parquetThrift.FieldRepetitionType[field.repetitionType];
|
|
312
|
+
const schemaElem = new _parquetThrift.SchemaElement({
|
|
313
|
+
name: field.name,
|
|
314
|
+
repetition_type: relt
|
|
315
|
+
});
|
|
316
|
+
if (field.isNested) {
|
|
317
|
+
schemaElem.num_children = field.fieldCount;
|
|
318
|
+
} else {
|
|
319
|
+
schemaElem.type = _parquetThrift.Type[field.primitiveType];
|
|
320
|
+
}
|
|
321
|
+
if (field.originalType) {
|
|
322
|
+
schemaElem.converted_type = _parquetThrift.ConvertedType[field.originalType];
|
|
612
323
|
}
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
} finally {
|
|
616
|
-
_iterator.f();
|
|
324
|
+
schemaElem.type_length = field.typeLength;
|
|
325
|
+
metadata.schema.push(schemaElem);
|
|
617
326
|
}
|
|
618
|
-
|
|
619
|
-
|
|
327
|
+
const metadataEncoded = (0, _readUtils.serializeThrift)(metadata);
|
|
328
|
+
const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
|
|
620
329
|
metadataEncoded.copy(footerEncoded);
|
|
621
330
|
footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
|
|
622
331
|
footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);
|