@loaders.gl/parquet 3.4.13 → 3.4.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +15 -24
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/bundle.js +1 -1
- package/dist/es5/bundle.js.map +1 -1
- package/dist/es5/constants.js +5 -5
- package/dist/es5/constants.js.map +1 -1
- package/dist/es5/index.js +24 -24
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js +2 -7
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -1
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js +22 -33
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -1
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js +2 -2
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -1
- package/dist/es5/lib/geo/decode-geo-metadata.js +16 -27
- package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -1
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js +20 -151
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -1
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js +13 -138
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -1
- package/dist/es5/lib/wasm/encode-parquet-wasm.js +8 -29
- package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -1
- package/dist/es5/lib/wasm/load-wasm/index.js +1 -1
- package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +10 -33
- package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -1
- package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +4 -22
- package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -1
- package/dist/es5/lib/wasm/parse-parquet-wasm.js +13 -46
- package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -1
- package/dist/es5/parquet-loader.js +4 -4
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-loader.js +4 -4
- package/dist/es5/parquet-wasm-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-writer.js +3 -3
- package/dist/es5/parquet-wasm-writer.js.map +1 -1
- package/dist/es5/parquet-writer.js +4 -4
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/codecs/dictionary.js +3 -6
- package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
- package/dist/es5/parquetjs/codecs/index.js +4 -5
- package/dist/es5/parquetjs/codecs/index.js.map +1 -1
- package/dist/es5/parquetjs/codecs/plain.js +41 -41
- package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
- package/dist/es5/parquetjs/codecs/rle.js +25 -30
- package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
- package/dist/es5/parquetjs/compression.js +26 -90
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/encoder/parquet-encoder.js +245 -536
- package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +123 -133
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +138 -150
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +241 -251
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +58 -70
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +97 -107
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +136 -146
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DateType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +58 -68
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +71 -81
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +164 -174
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IntType.js +58 -68
- package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +58 -68
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ListType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +274 -310
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MapType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/NullType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +56 -66
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +71 -81
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +136 -146
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +71 -81
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +105 -115
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +162 -172
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +71 -81
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js +106 -116
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/StringType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js +58 -68
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +76 -90
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +58 -68
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Type.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +28 -40
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/index.js +43 -43
- package/dist/es5/parquetjs/parquet-thrift/index.js.map +1 -1
- package/dist/es5/parquetjs/parser/decoders.js +195 -327
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js +155 -582
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +10 -11
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/schema.js +65 -82
- package/dist/es5/parquetjs/schema/schema.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +56 -87
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js +40 -40
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/utils/file-utils.js +8 -12
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/es5/parquetjs/utils/read-utils.js +22 -39
- package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
- package/dist/esm/parquet-loader.js +1 -1
- package/dist/esm/parquet-wasm-loader.js +1 -1
- package/dist/esm/parquet-wasm-writer.js +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/parquet-worker.js +15 -24
- package/dist/parquet-worker.js.map +3 -3
- package/package.json +6 -6
|
@@ -5,606 +5,179 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
5
5
|
value: true
|
|
6
6
|
});
|
|
7
7
|
exports.ParquetReader = void 0;
|
|
8
|
-
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
|
|
9
|
-
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
|
|
10
|
-
var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
|
|
11
|
-
var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
|
|
12
8
|
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
|
|
13
|
-
var _awaitAsyncGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/awaitAsyncGenerator"));
|
|
14
|
-
var _wrapAsyncGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/wrapAsyncGenerator"));
|
|
15
9
|
var _schema = require("../schema/schema");
|
|
16
10
|
var _decoders = require("./decoders");
|
|
17
11
|
var _shred = require("../schema/shred");
|
|
18
12
|
var _constants = require("../../constants");
|
|
19
13
|
var _parquetThrift = require("../parquet-thrift");
|
|
20
14
|
var _readUtils = require("../utils/read-utils");
|
|
21
|
-
|
|
22
|
-
function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
|
|
23
|
-
function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) arr2[i] = arr[i]; return arr2; }
|
|
24
|
-
function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); enumerableOnly && (symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; })), keys.push.apply(keys, symbols); } return keys; }
|
|
25
|
-
function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = null != arguments[i] ? arguments[i] : {}; i % 2 ? ownKeys(Object(source), !0).forEach(function (key) { (0, _defineProperty2.default)(target, key, source[key]); }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)) : ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } return target; }
|
|
26
|
-
function _asyncIterator(iterable) { var method, async, sync, retry = 2; for ("undefined" != typeof Symbol && (async = Symbol.asyncIterator, sync = Symbol.iterator); retry--;) { if (async && null != (method = iterable[async])) return method.call(iterable); if (sync && null != (method = iterable[sync])) return new AsyncFromSyncIterator(method.call(iterable)); async = "@@asyncIterator", sync = "@@iterator"; } throw new TypeError("Object is not async iterable"); }
|
|
27
|
-
function AsyncFromSyncIterator(s) { function AsyncFromSyncIteratorContinuation(r) { if (Object(r) !== r) return Promise.reject(new TypeError(r + " is not an object.")); var done = r.done; return Promise.resolve(r.value).then(function (value) { return { value: value, done: done }; }); } return AsyncFromSyncIterator = function AsyncFromSyncIterator(s) { this.s = s, this.n = s.next; }, AsyncFromSyncIterator.prototype = { s: null, n: null, next: function next() { return AsyncFromSyncIteratorContinuation(this.n.apply(this.s, arguments)); }, return: function _return(value) { var ret = this.s.return; return void 0 === ret ? Promise.resolve({ value: value, done: !0 }) : AsyncFromSyncIteratorContinuation(ret.apply(this.s, arguments)); }, throw: function _throw(value) { var thr = this.s.return; return void 0 === thr ? Promise.reject(value) : AsyncFromSyncIteratorContinuation(thr.apply(this.s, arguments)); } }, new AsyncFromSyncIterator(s); }
|
|
28
|
-
var DEFAULT_PROPS = {
|
|
15
|
+
const DEFAULT_PROPS = {
|
|
29
16
|
defaultDictionarySize: 1e6
|
|
30
17
|
};
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
(0, _classCallCheck2.default)(this, ParquetReader);
|
|
18
|
+
class ParquetReader {
|
|
19
|
+
constructor(file, props) {
|
|
34
20
|
(0, _defineProperty2.default)(this, "props", void 0);
|
|
35
21
|
(0, _defineProperty2.default)(this, "file", void 0);
|
|
36
22
|
(0, _defineProperty2.default)(this, "metadata", null);
|
|
37
23
|
this.file = file;
|
|
38
|
-
this.props =
|
|
24
|
+
this.props = {
|
|
25
|
+
...DEFAULT_PROPS,
|
|
26
|
+
...props
|
|
27
|
+
};
|
|
39
28
|
}
|
|
40
|
-
(
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
29
|
+
close() {
|
|
30
|
+
this.file.close();
|
|
31
|
+
}
|
|
32
|
+
async *rowIterator(props) {
|
|
33
|
+
for await (const rows of this.rowBatchIterator(props)) {
|
|
34
|
+
for (const row of rows) {
|
|
35
|
+
yield row;
|
|
36
|
+
}
|
|
44
37
|
}
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
var _iteratorAbruptCompletion, _didIteratorError, _iteratorError, _iterator, _step, rows, _iterator3, _step3, row;
|
|
51
|
-
return _regenerator.default.wrap(function _callee$(_context) {
|
|
52
|
-
while (1) switch (_context.prev = _context.next) {
|
|
53
|
-
case 0:
|
|
54
|
-
_iteratorAbruptCompletion = false;
|
|
55
|
-
_didIteratorError = false;
|
|
56
|
-
_context.prev = 2;
|
|
57
|
-
_iterator = _asyncIterator(_this.rowBatchIterator(props));
|
|
58
|
-
case 4:
|
|
59
|
-
_context.next = 6;
|
|
60
|
-
return (0, _awaitAsyncGenerator2.default)(_iterator.next());
|
|
61
|
-
case 6:
|
|
62
|
-
if (!(_iteratorAbruptCompletion = !(_step = _context.sent).done)) {
|
|
63
|
-
_context.next = 28;
|
|
64
|
-
break;
|
|
65
|
-
}
|
|
66
|
-
rows = _step.value;
|
|
67
|
-
_iterator3 = _createForOfIteratorHelper(rows);
|
|
68
|
-
_context.prev = 9;
|
|
69
|
-
_iterator3.s();
|
|
70
|
-
case 11:
|
|
71
|
-
if ((_step3 = _iterator3.n()).done) {
|
|
72
|
-
_context.next = 17;
|
|
73
|
-
break;
|
|
74
|
-
}
|
|
75
|
-
row = _step3.value;
|
|
76
|
-
_context.next = 15;
|
|
77
|
-
return row;
|
|
78
|
-
case 15:
|
|
79
|
-
_context.next = 11;
|
|
80
|
-
break;
|
|
81
|
-
case 17:
|
|
82
|
-
_context.next = 22;
|
|
83
|
-
break;
|
|
84
|
-
case 19:
|
|
85
|
-
_context.prev = 19;
|
|
86
|
-
_context.t0 = _context["catch"](9);
|
|
87
|
-
_iterator3.e(_context.t0);
|
|
88
|
-
case 22:
|
|
89
|
-
_context.prev = 22;
|
|
90
|
-
_iterator3.f();
|
|
91
|
-
return _context.finish(22);
|
|
92
|
-
case 25:
|
|
93
|
-
_iteratorAbruptCompletion = false;
|
|
94
|
-
_context.next = 4;
|
|
95
|
-
break;
|
|
96
|
-
case 28:
|
|
97
|
-
_context.next = 34;
|
|
98
|
-
break;
|
|
99
|
-
case 30:
|
|
100
|
-
_context.prev = 30;
|
|
101
|
-
_context.t1 = _context["catch"](2);
|
|
102
|
-
_didIteratorError = true;
|
|
103
|
-
_iteratorError = _context.t1;
|
|
104
|
-
case 34:
|
|
105
|
-
_context.prev = 34;
|
|
106
|
-
_context.prev = 35;
|
|
107
|
-
if (!(_iteratorAbruptCompletion && _iterator.return != null)) {
|
|
108
|
-
_context.next = 39;
|
|
109
|
-
break;
|
|
110
|
-
}
|
|
111
|
-
_context.next = 39;
|
|
112
|
-
return (0, _awaitAsyncGenerator2.default)(_iterator.return());
|
|
113
|
-
case 39:
|
|
114
|
-
_context.prev = 39;
|
|
115
|
-
if (!_didIteratorError) {
|
|
116
|
-
_context.next = 42;
|
|
117
|
-
break;
|
|
118
|
-
}
|
|
119
|
-
throw _iteratorError;
|
|
120
|
-
case 42:
|
|
121
|
-
return _context.finish(39);
|
|
122
|
-
case 43:
|
|
123
|
-
return _context.finish(34);
|
|
124
|
-
case 44:
|
|
125
|
-
case "end":
|
|
126
|
-
return _context.stop();
|
|
127
|
-
}
|
|
128
|
-
}, _callee, null, [[2, 30, 34, 44], [9, 19, 22, 25], [35,, 39, 43]]);
|
|
129
|
-
}))();
|
|
38
|
+
}
|
|
39
|
+
async *rowBatchIterator(props) {
|
|
40
|
+
const schema = await this.getSchema();
|
|
41
|
+
for await (const rowGroup of this.rowGroupIterator(props)) {
|
|
42
|
+
yield (0, _shred.materializeRecords)(schema, rowGroup);
|
|
130
43
|
}
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
_context2.next = 2;
|
|
141
|
-
return (0, _awaitAsyncGenerator2.default)(_this2.getSchema());
|
|
142
|
-
case 2:
|
|
143
|
-
schema = _context2.sent;
|
|
144
|
-
_iteratorAbruptCompletion2 = false;
|
|
145
|
-
_didIteratorError2 = false;
|
|
146
|
-
_context2.prev = 5;
|
|
147
|
-
_iterator2 = _asyncIterator(_this2.rowGroupIterator(props));
|
|
148
|
-
case 7:
|
|
149
|
-
_context2.next = 9;
|
|
150
|
-
return (0, _awaitAsyncGenerator2.default)(_iterator2.next());
|
|
151
|
-
case 9:
|
|
152
|
-
if (!(_iteratorAbruptCompletion2 = !(_step2 = _context2.sent).done)) {
|
|
153
|
-
_context2.next = 16;
|
|
154
|
-
break;
|
|
155
|
-
}
|
|
156
|
-
rowGroup = _step2.value;
|
|
157
|
-
_context2.next = 13;
|
|
158
|
-
return (0, _shred.materializeRecords)(schema, rowGroup);
|
|
159
|
-
case 13:
|
|
160
|
-
_iteratorAbruptCompletion2 = false;
|
|
161
|
-
_context2.next = 7;
|
|
162
|
-
break;
|
|
163
|
-
case 16:
|
|
164
|
-
_context2.next = 22;
|
|
165
|
-
break;
|
|
166
|
-
case 18:
|
|
167
|
-
_context2.prev = 18;
|
|
168
|
-
_context2.t0 = _context2["catch"](5);
|
|
169
|
-
_didIteratorError2 = true;
|
|
170
|
-
_iteratorError2 = _context2.t0;
|
|
171
|
-
case 22:
|
|
172
|
-
_context2.prev = 22;
|
|
173
|
-
_context2.prev = 23;
|
|
174
|
-
if (!(_iteratorAbruptCompletion2 && _iterator2.return != null)) {
|
|
175
|
-
_context2.next = 27;
|
|
176
|
-
break;
|
|
177
|
-
}
|
|
178
|
-
_context2.next = 27;
|
|
179
|
-
return (0, _awaitAsyncGenerator2.default)(_iterator2.return());
|
|
180
|
-
case 27:
|
|
181
|
-
_context2.prev = 27;
|
|
182
|
-
if (!_didIteratorError2) {
|
|
183
|
-
_context2.next = 30;
|
|
184
|
-
break;
|
|
185
|
-
}
|
|
186
|
-
throw _iteratorError2;
|
|
187
|
-
case 30:
|
|
188
|
-
return _context2.finish(27);
|
|
189
|
-
case 31:
|
|
190
|
-
return _context2.finish(22);
|
|
191
|
-
case 32:
|
|
192
|
-
case "end":
|
|
193
|
-
return _context2.stop();
|
|
194
|
-
}
|
|
195
|
-
}, _callee2, null, [[5, 18, 22, 32], [23,, 27, 31]]);
|
|
196
|
-
}))();
|
|
44
|
+
}
|
|
45
|
+
async *rowGroupIterator(props) {
|
|
46
|
+
const columnList = ((props === null || props === void 0 ? void 0 : props.columnList) || []).map(x => Array.isArray(x) ? x : [x]);
|
|
47
|
+
const metadata = await this.getFileMetadata();
|
|
48
|
+
const schema = await this.getSchema();
|
|
49
|
+
const rowGroupCount = (metadata === null || metadata === void 0 ? void 0 : metadata.row_groups.length) || 0;
|
|
50
|
+
for (let rowGroupIndex = 0; rowGroupIndex < rowGroupCount; rowGroupIndex++) {
|
|
51
|
+
const rowGroup = await this.readRowGroup(schema, metadata.row_groups[rowGroupIndex], columnList);
|
|
52
|
+
yield rowGroup;
|
|
197
53
|
}
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
schema = _context3.sent;
|
|
218
|
-
rowGroupCount = (metadata === null || metadata === void 0 ? void 0 : metadata.row_groups.length) || 0;
|
|
219
|
-
rowGroupIndex = 0;
|
|
220
|
-
case 9:
|
|
221
|
-
if (!(rowGroupIndex < rowGroupCount)) {
|
|
222
|
-
_context3.next = 18;
|
|
223
|
-
break;
|
|
224
|
-
}
|
|
225
|
-
_context3.next = 12;
|
|
226
|
-
return (0, _awaitAsyncGenerator2.default)(_this3.readRowGroup(schema, metadata.row_groups[rowGroupIndex], columnList));
|
|
227
|
-
case 12:
|
|
228
|
-
rowGroup = _context3.sent;
|
|
229
|
-
_context3.next = 15;
|
|
230
|
-
return rowGroup;
|
|
231
|
-
case 15:
|
|
232
|
-
rowGroupIndex++;
|
|
233
|
-
_context3.next = 9;
|
|
234
|
-
break;
|
|
235
|
-
case 18:
|
|
236
|
-
case "end":
|
|
237
|
-
return _context3.stop();
|
|
238
|
-
}
|
|
239
|
-
}, _callee3);
|
|
240
|
-
}))();
|
|
54
|
+
}
|
|
55
|
+
async getRowCount() {
|
|
56
|
+
const metadata = await this.getFileMetadata();
|
|
57
|
+
return Number(metadata.num_rows);
|
|
58
|
+
}
|
|
59
|
+
async getSchema() {
|
|
60
|
+
const metadata = await this.getFileMetadata();
|
|
61
|
+
const root = metadata.schema[0];
|
|
62
|
+
const {
|
|
63
|
+
schema: schemaDefinition
|
|
64
|
+
} = (0, _decoders.decodeSchema)(metadata.schema, 1, root.num_children);
|
|
65
|
+
const schema = new _schema.ParquetSchema(schemaDefinition);
|
|
66
|
+
return schema;
|
|
67
|
+
}
|
|
68
|
+
async getSchemaMetadata() {
|
|
69
|
+
const metadata = await this.getFileMetadata();
|
|
70
|
+
const md = {};
|
|
71
|
+
for (const kv of metadata.key_value_metadata) {
|
|
72
|
+
md[kv.key] = kv.value;
|
|
241
73
|
}
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
return getSchema;
|
|
292
|
-
}()
|
|
293
|
-
}, {
|
|
294
|
-
key: "getSchemaMetadata",
|
|
295
|
-
value: function () {
|
|
296
|
-
var _getSchemaMetadata = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6() {
|
|
297
|
-
var metadata, md, _iterator4, _step4, kv;
|
|
298
|
-
return _regenerator.default.wrap(function _callee6$(_context6) {
|
|
299
|
-
while (1) switch (_context6.prev = _context6.next) {
|
|
300
|
-
case 0:
|
|
301
|
-
_context6.next = 2;
|
|
302
|
-
return this.getFileMetadata();
|
|
303
|
-
case 2:
|
|
304
|
-
metadata = _context6.sent;
|
|
305
|
-
md = {};
|
|
306
|
-
_iterator4 = _createForOfIteratorHelper(metadata.key_value_metadata);
|
|
307
|
-
try {
|
|
308
|
-
for (_iterator4.s(); !(_step4 = _iterator4.n()).done;) {
|
|
309
|
-
kv = _step4.value;
|
|
310
|
-
md[kv.key] = kv.value;
|
|
311
|
-
}
|
|
312
|
-
} catch (err) {
|
|
313
|
-
_iterator4.e(err);
|
|
314
|
-
} finally {
|
|
315
|
-
_iterator4.f();
|
|
316
|
-
}
|
|
317
|
-
return _context6.abrupt("return", md);
|
|
318
|
-
case 7:
|
|
319
|
-
case "end":
|
|
320
|
-
return _context6.stop();
|
|
321
|
-
}
|
|
322
|
-
}, _callee6, this);
|
|
323
|
-
}));
|
|
324
|
-
function getSchemaMetadata() {
|
|
325
|
-
return _getSchemaMetadata.apply(this, arguments);
|
|
326
|
-
}
|
|
327
|
-
return getSchemaMetadata;
|
|
328
|
-
}()
|
|
329
|
-
}, {
|
|
330
|
-
key: "getFileMetadata",
|
|
331
|
-
value: function () {
|
|
332
|
-
var _getFileMetadata = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7() {
|
|
333
|
-
return _regenerator.default.wrap(function _callee7$(_context7) {
|
|
334
|
-
while (1) switch (_context7.prev = _context7.next) {
|
|
335
|
-
case 0:
|
|
336
|
-
if (this.metadata) {
|
|
337
|
-
_context7.next = 4;
|
|
338
|
-
break;
|
|
339
|
-
}
|
|
340
|
-
_context7.next = 3;
|
|
341
|
-
return this.readHeader();
|
|
342
|
-
case 3:
|
|
343
|
-
this.metadata = this.readFooter();
|
|
344
|
-
case 4:
|
|
345
|
-
return _context7.abrupt("return", this.metadata);
|
|
346
|
-
case 5:
|
|
347
|
-
case "end":
|
|
348
|
-
return _context7.stop();
|
|
349
|
-
}
|
|
350
|
-
}, _callee7, this);
|
|
351
|
-
}));
|
|
352
|
-
function getFileMetadata() {
|
|
353
|
-
return _getFileMetadata.apply(this, arguments);
|
|
354
|
-
}
|
|
355
|
-
return getFileMetadata;
|
|
356
|
-
}()
|
|
357
|
-
}, {
|
|
358
|
-
key: "readHeader",
|
|
359
|
-
value: function () {
|
|
360
|
-
var _readHeader = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8() {
|
|
361
|
-
var buffer, magic;
|
|
362
|
-
return _regenerator.default.wrap(function _callee8$(_context8) {
|
|
363
|
-
while (1) switch (_context8.prev = _context8.next) {
|
|
364
|
-
case 0:
|
|
365
|
-
_context8.next = 2;
|
|
366
|
-
return this.file.read(0, _constants.PARQUET_MAGIC.length);
|
|
367
|
-
case 2:
|
|
368
|
-
buffer = _context8.sent;
|
|
369
|
-
magic = buffer.toString();
|
|
370
|
-
_context8.t0 = magic;
|
|
371
|
-
_context8.next = _context8.t0 === _constants.PARQUET_MAGIC ? 7 : _context8.t0 === _constants.PARQUET_MAGIC_ENCRYPTED ? 8 : 9;
|
|
372
|
-
break;
|
|
373
|
-
case 7:
|
|
374
|
-
return _context8.abrupt("break", 10);
|
|
375
|
-
case 8:
|
|
376
|
-
throw new Error('Encrypted parquet file not supported');
|
|
377
|
-
case 9:
|
|
378
|
-
throw new Error("Invalid parquet file (magic=".concat(magic, ")"));
|
|
379
|
-
case 10:
|
|
380
|
-
case "end":
|
|
381
|
-
return _context8.stop();
|
|
382
|
-
}
|
|
383
|
-
}, _callee8, this);
|
|
384
|
-
}));
|
|
385
|
-
function readHeader() {
|
|
386
|
-
return _readHeader.apply(this, arguments);
|
|
387
|
-
}
|
|
388
|
-
return readHeader;
|
|
389
|
-
}()
|
|
390
|
-
}, {
|
|
391
|
-
key: "readFooter",
|
|
392
|
-
value: function () {
|
|
393
|
-
var _readFooter = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9() {
|
|
394
|
-
var trailerLen, trailerBuf, magic, metadataSize, metadataOffset, metadataBuf, _decodeFileMetadata, metadata;
|
|
395
|
-
return _regenerator.default.wrap(function _callee9$(_context9) {
|
|
396
|
-
while (1) switch (_context9.prev = _context9.next) {
|
|
397
|
-
case 0:
|
|
398
|
-
trailerLen = _constants.PARQUET_MAGIC.length + 4;
|
|
399
|
-
_context9.next = 3;
|
|
400
|
-
return this.file.read(this.file.size - trailerLen, trailerLen);
|
|
401
|
-
case 3:
|
|
402
|
-
trailerBuf = _context9.sent;
|
|
403
|
-
magic = trailerBuf.slice(4).toString();
|
|
404
|
-
if (!(magic !== _constants.PARQUET_MAGIC)) {
|
|
405
|
-
_context9.next = 7;
|
|
406
|
-
break;
|
|
407
|
-
}
|
|
408
|
-
throw new Error("Not a valid parquet file (magic=\"".concat(magic, ")"));
|
|
409
|
-
case 7:
|
|
410
|
-
metadataSize = trailerBuf.readUInt32LE(0);
|
|
411
|
-
metadataOffset = this.file.size - metadataSize - trailerLen;
|
|
412
|
-
if (!(metadataOffset < _constants.PARQUET_MAGIC.length)) {
|
|
413
|
-
_context9.next = 11;
|
|
414
|
-
break;
|
|
415
|
-
}
|
|
416
|
-
throw new Error("Invalid metadata size ".concat(metadataOffset));
|
|
417
|
-
case 11:
|
|
418
|
-
_context9.next = 13;
|
|
419
|
-
return this.file.read(metadataOffset, metadataSize);
|
|
420
|
-
case 13:
|
|
421
|
-
metadataBuf = _context9.sent;
|
|
422
|
-
_decodeFileMetadata = (0, _readUtils.decodeFileMetadata)(metadataBuf), metadata = _decodeFileMetadata.metadata;
|
|
423
|
-
return _context9.abrupt("return", metadata);
|
|
424
|
-
case 16:
|
|
425
|
-
case "end":
|
|
426
|
-
return _context9.stop();
|
|
427
|
-
}
|
|
428
|
-
}, _callee9, this);
|
|
429
|
-
}));
|
|
430
|
-
function readFooter() {
|
|
431
|
-
return _readFooter.apply(this, arguments);
|
|
432
|
-
}
|
|
433
|
-
return readFooter;
|
|
434
|
-
}()
|
|
435
|
-
}, {
|
|
436
|
-
key: "readRowGroup",
|
|
437
|
-
value: function () {
|
|
438
|
-
var _readRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee10(schema, rowGroup, columnList) {
|
|
439
|
-
var buffer, _iterator5, _step5, colChunk, colMetadata, colKey;
|
|
440
|
-
return _regenerator.default.wrap(function _callee10$(_context10) {
|
|
441
|
-
while (1) switch (_context10.prev = _context10.next) {
|
|
442
|
-
case 0:
|
|
443
|
-
buffer = {
|
|
444
|
-
rowCount: Number(rowGroup.num_rows),
|
|
445
|
-
columnData: {}
|
|
446
|
-
};
|
|
447
|
-
_iterator5 = _createForOfIteratorHelper(rowGroup.columns);
|
|
448
|
-
_context10.prev = 2;
|
|
449
|
-
_iterator5.s();
|
|
450
|
-
case 4:
|
|
451
|
-
if ((_step5 = _iterator5.n()).done) {
|
|
452
|
-
_context10.next = 15;
|
|
453
|
-
break;
|
|
454
|
-
}
|
|
455
|
-
colChunk = _step5.value;
|
|
456
|
-
colMetadata = colChunk.meta_data;
|
|
457
|
-
colKey = colMetadata === null || colMetadata === void 0 ? void 0 : colMetadata.path_in_schema;
|
|
458
|
-
if (!(columnList.length > 0 && (0, _readUtils.fieldIndexOf)(columnList, colKey) < 0)) {
|
|
459
|
-
_context10.next = 10;
|
|
460
|
-
break;
|
|
461
|
-
}
|
|
462
|
-
return _context10.abrupt("continue", 13);
|
|
463
|
-
case 10:
|
|
464
|
-
_context10.next = 12;
|
|
465
|
-
return this.readColumnChunk(schema, colChunk);
|
|
466
|
-
case 12:
|
|
467
|
-
buffer.columnData[colKey.join()] = _context10.sent;
|
|
468
|
-
case 13:
|
|
469
|
-
_context10.next = 4;
|
|
470
|
-
break;
|
|
471
|
-
case 15:
|
|
472
|
-
_context10.next = 20;
|
|
473
|
-
break;
|
|
474
|
-
case 17:
|
|
475
|
-
_context10.prev = 17;
|
|
476
|
-
_context10.t0 = _context10["catch"](2);
|
|
477
|
-
_iterator5.e(_context10.t0);
|
|
478
|
-
case 20:
|
|
479
|
-
_context10.prev = 20;
|
|
480
|
-
_iterator5.f();
|
|
481
|
-
return _context10.finish(20);
|
|
482
|
-
case 23:
|
|
483
|
-
return _context10.abrupt("return", buffer);
|
|
484
|
-
case 24:
|
|
485
|
-
case "end":
|
|
486
|
-
return _context10.stop();
|
|
487
|
-
}
|
|
488
|
-
}, _callee10, this, [[2, 17, 20, 23]]);
|
|
489
|
-
}));
|
|
490
|
-
function readRowGroup(_x, _x2, _x3) {
|
|
491
|
-
return _readRowGroup.apply(this, arguments);
|
|
492
|
-
}
|
|
493
|
-
return readRowGroup;
|
|
494
|
-
}()
|
|
495
|
-
}, {
|
|
496
|
-
key: "readColumnChunk",
|
|
497
|
-
value: function () {
|
|
498
|
-
var _readColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee11(schema, colChunk) {
|
|
499
|
-
var _colChunk$meta_data, _colChunk$meta_data2, _colChunk$meta_data3, _colChunk$meta_data4, _colChunk$meta_data5, _colChunk$meta_data7, _colChunk$meta_data8, _options$dictionary;
|
|
500
|
-
var field, type, compression, pagesOffset, pagesSize, _colChunk$meta_data6, options, dictionary, dictionaryPageOffset, dictionaryOffset, pagesBuf;
|
|
501
|
-
return _regenerator.default.wrap(function _callee11$(_context11) {
|
|
502
|
-
while (1) switch (_context11.prev = _context11.next) {
|
|
503
|
-
case 0:
|
|
504
|
-
if (!(colChunk.file_path !== undefined && colChunk.file_path !== null)) {
|
|
505
|
-
_context11.next = 2;
|
|
506
|
-
break;
|
|
507
|
-
}
|
|
508
|
-
throw new Error('external references are not supported');
|
|
509
|
-
case 2:
|
|
510
|
-
field = schema.findField((_colChunk$meta_data = colChunk.meta_data) === null || _colChunk$meta_data === void 0 ? void 0 : _colChunk$meta_data.path_in_schema);
|
|
511
|
-
type = (0, _readUtils.getThriftEnum)(_parquetThrift.Type, (_colChunk$meta_data2 = colChunk.meta_data) === null || _colChunk$meta_data2 === void 0 ? void 0 : _colChunk$meta_data2.type);
|
|
512
|
-
if (!(type !== field.primitiveType)) {
|
|
513
|
-
_context11.next = 6;
|
|
514
|
-
break;
|
|
515
|
-
}
|
|
516
|
-
throw new Error("chunk type not matching schema: ".concat(type));
|
|
517
|
-
case 6:
|
|
518
|
-
compression = (0, _readUtils.getThriftEnum)(_parquetThrift.CompressionCodec, (_colChunk$meta_data3 = colChunk.meta_data) === null || _colChunk$meta_data3 === void 0 ? void 0 : _colChunk$meta_data3.codec);
|
|
519
|
-
pagesOffset = Number((_colChunk$meta_data4 = colChunk.meta_data) === null || _colChunk$meta_data4 === void 0 ? void 0 : _colChunk$meta_data4.data_page_offset);
|
|
520
|
-
pagesSize = Number((_colChunk$meta_data5 = colChunk.meta_data) === null || _colChunk$meta_data5 === void 0 ? void 0 : _colChunk$meta_data5.total_compressed_size);
|
|
521
|
-
if (!colChunk.file_path) {
|
|
522
|
-
pagesSize = Math.min(this.file.size - pagesOffset, Number((_colChunk$meta_data6 = colChunk.meta_data) === null || _colChunk$meta_data6 === void 0 ? void 0 : _colChunk$meta_data6.total_compressed_size));
|
|
523
|
-
}
|
|
524
|
-
options = {
|
|
525
|
-
type: type,
|
|
526
|
-
rLevelMax: field.rLevelMax,
|
|
527
|
-
dLevelMax: field.dLevelMax,
|
|
528
|
-
compression: compression,
|
|
529
|
-
column: field,
|
|
530
|
-
numValues: (_colChunk$meta_data7 = colChunk.meta_data) === null || _colChunk$meta_data7 === void 0 ? void 0 : _colChunk$meta_data7.num_values,
|
|
531
|
-
dictionary: []
|
|
532
|
-
};
|
|
533
|
-
dictionaryPageOffset = colChunk === null || colChunk === void 0 ? void 0 : (_colChunk$meta_data8 = colChunk.meta_data) === null || _colChunk$meta_data8 === void 0 ? void 0 : _colChunk$meta_data8.dictionary_page_offset;
|
|
534
|
-
if (!dictionaryPageOffset) {
|
|
535
|
-
_context11.next = 17;
|
|
536
|
-
break;
|
|
537
|
-
}
|
|
538
|
-
dictionaryOffset = Number(dictionaryPageOffset);
|
|
539
|
-
_context11.next = 16;
|
|
540
|
-
return this.getDictionary(dictionaryOffset, options, pagesOffset);
|
|
541
|
-
case 16:
|
|
542
|
-
dictionary = _context11.sent;
|
|
543
|
-
case 17:
|
|
544
|
-
dictionary = (_options$dictionary = options.dictionary) !== null && _options$dictionary !== void 0 && _options$dictionary.length ? options.dictionary : dictionary;
|
|
545
|
-
_context11.next = 20;
|
|
546
|
-
return this.file.read(pagesOffset, pagesSize);
|
|
547
|
-
case 20:
|
|
548
|
-
pagesBuf = _context11.sent;
|
|
549
|
-
_context11.next = 23;
|
|
550
|
-
return (0, _decoders.decodeDataPages)(pagesBuf, _objectSpread(_objectSpread({}, options), {}, {
|
|
551
|
-
dictionary: dictionary
|
|
552
|
-
}));
|
|
553
|
-
case 23:
|
|
554
|
-
return _context11.abrupt("return", _context11.sent);
|
|
555
|
-
case 24:
|
|
556
|
-
case "end":
|
|
557
|
-
return _context11.stop();
|
|
558
|
-
}
|
|
559
|
-
}, _callee11, this);
|
|
560
|
-
}));
|
|
561
|
-
function readColumnChunk(_x4, _x5) {
|
|
562
|
-
return _readColumnChunk.apply(this, arguments);
|
|
563
|
-
}
|
|
564
|
-
return readColumnChunk;
|
|
565
|
-
}()
|
|
566
|
-
}, {
|
|
567
|
-
key: "getDictionary",
|
|
568
|
-
value: function () {
|
|
569
|
-
var _getDictionary = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee12(dictionaryPageOffset, options, pagesOffset) {
|
|
570
|
-
var dictionarySize, pagesBuf, cursor, decodedPage;
|
|
571
|
-
return _regenerator.default.wrap(function _callee12$(_context12) {
|
|
572
|
-
while (1) switch (_context12.prev = _context12.next) {
|
|
573
|
-
case 0:
|
|
574
|
-
if (!(dictionaryPageOffset === 0)) {
|
|
575
|
-
_context12.next = 2;
|
|
576
|
-
break;
|
|
577
|
-
}
|
|
578
|
-
return _context12.abrupt("return", []);
|
|
579
|
-
case 2:
|
|
580
|
-
dictionarySize = Math.min(this.file.size - dictionaryPageOffset, this.props.defaultDictionarySize);
|
|
581
|
-
_context12.next = 5;
|
|
582
|
-
return this.file.read(dictionaryPageOffset, dictionarySize);
|
|
583
|
-
case 5:
|
|
584
|
-
pagesBuf = _context12.sent;
|
|
585
|
-
cursor = {
|
|
586
|
-
buffer: pagesBuf,
|
|
587
|
-
offset: 0,
|
|
588
|
-
size: pagesBuf.length
|
|
589
|
-
};
|
|
590
|
-
_context12.next = 9;
|
|
591
|
-
return (0, _decoders.decodePage)(cursor, options);
|
|
592
|
-
case 9:
|
|
593
|
-
decodedPage = _context12.sent;
|
|
594
|
-
return _context12.abrupt("return", decodedPage.dictionary);
|
|
595
|
-
case 11:
|
|
596
|
-
case "end":
|
|
597
|
-
return _context12.stop();
|
|
598
|
-
}
|
|
599
|
-
}, _callee12, this);
|
|
600
|
-
}));
|
|
601
|
-
function getDictionary(_x6, _x7, _x8) {
|
|
602
|
-
return _getDictionary.apply(this, arguments);
|
|
74
|
+
return md;
|
|
75
|
+
}
|
|
76
|
+
async getFileMetadata() {
|
|
77
|
+
if (!this.metadata) {
|
|
78
|
+
await this.readHeader();
|
|
79
|
+
this.metadata = this.readFooter();
|
|
80
|
+
}
|
|
81
|
+
return this.metadata;
|
|
82
|
+
}
|
|
83
|
+
async readHeader() {
|
|
84
|
+
const buffer = await this.file.read(0, _constants.PARQUET_MAGIC.length);
|
|
85
|
+
const magic = buffer.toString();
|
|
86
|
+
switch (magic) {
|
|
87
|
+
case _constants.PARQUET_MAGIC:
|
|
88
|
+
break;
|
|
89
|
+
case _constants.PARQUET_MAGIC_ENCRYPTED:
|
|
90
|
+
throw new Error('Encrypted parquet file not supported');
|
|
91
|
+
default:
|
|
92
|
+
throw new Error("Invalid parquet file (magic=".concat(magic, ")"));
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
async readFooter() {
|
|
96
|
+
const trailerLen = _constants.PARQUET_MAGIC.length + 4;
|
|
97
|
+
const trailerBuf = await this.file.read(this.file.size - trailerLen, trailerLen);
|
|
98
|
+
const magic = trailerBuf.slice(4).toString();
|
|
99
|
+
if (magic !== _constants.PARQUET_MAGIC) {
|
|
100
|
+
throw new Error("Not a valid parquet file (magic=\"".concat(magic, ")"));
|
|
101
|
+
}
|
|
102
|
+
const metadataSize = trailerBuf.readUInt32LE(0);
|
|
103
|
+
const metadataOffset = this.file.size - metadataSize - trailerLen;
|
|
104
|
+
if (metadataOffset < _constants.PARQUET_MAGIC.length) {
|
|
105
|
+
throw new Error("Invalid metadata size ".concat(metadataOffset));
|
|
106
|
+
}
|
|
107
|
+
const metadataBuf = await this.file.read(metadataOffset, metadataSize);
|
|
108
|
+
const {
|
|
109
|
+
metadata
|
|
110
|
+
} = (0, _readUtils.decodeFileMetadata)(metadataBuf);
|
|
111
|
+
return metadata;
|
|
112
|
+
}
|
|
113
|
+
async readRowGroup(schema, rowGroup, columnList) {
|
|
114
|
+
const buffer = {
|
|
115
|
+
rowCount: Number(rowGroup.num_rows),
|
|
116
|
+
columnData: {}
|
|
117
|
+
};
|
|
118
|
+
for (const colChunk of rowGroup.columns) {
|
|
119
|
+
const colMetadata = colChunk.meta_data;
|
|
120
|
+
const colKey = colMetadata === null || colMetadata === void 0 ? void 0 : colMetadata.path_in_schema;
|
|
121
|
+
if (columnList.length > 0 && (0, _readUtils.fieldIndexOf)(columnList, colKey) < 0) {
|
|
122
|
+
continue;
|
|
603
123
|
}
|
|
604
|
-
|
|
605
|
-
}
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
124
|
+
buffer.columnData[colKey.join()] = await this.readColumnChunk(schema, colChunk);
|
|
125
|
+
}
|
|
126
|
+
return buffer;
|
|
127
|
+
}
|
|
128
|
+
async readColumnChunk(schema, colChunk) {
|
|
129
|
+
var _colChunk$meta_data, _colChunk$meta_data2, _colChunk$meta_data3, _colChunk$meta_data4, _colChunk$meta_data5, _colChunk$meta_data7, _colChunk$meta_data8, _options$dictionary;
|
|
130
|
+
if (colChunk.file_path !== undefined && colChunk.file_path !== null) {
|
|
131
|
+
throw new Error('external references are not supported');
|
|
132
|
+
}
|
|
133
|
+
const field = schema.findField((_colChunk$meta_data = colChunk.meta_data) === null || _colChunk$meta_data === void 0 ? void 0 : _colChunk$meta_data.path_in_schema);
|
|
134
|
+
const type = (0, _readUtils.getThriftEnum)(_parquetThrift.Type, (_colChunk$meta_data2 = colChunk.meta_data) === null || _colChunk$meta_data2 === void 0 ? void 0 : _colChunk$meta_data2.type);
|
|
135
|
+
if (type !== field.primitiveType) {
|
|
136
|
+
throw new Error("chunk type not matching schema: ".concat(type));
|
|
137
|
+
}
|
|
138
|
+
const compression = (0, _readUtils.getThriftEnum)(_parquetThrift.CompressionCodec, (_colChunk$meta_data3 = colChunk.meta_data) === null || _colChunk$meta_data3 === void 0 ? void 0 : _colChunk$meta_data3.codec);
|
|
139
|
+
const pagesOffset = Number((_colChunk$meta_data4 = colChunk.meta_data) === null || _colChunk$meta_data4 === void 0 ? void 0 : _colChunk$meta_data4.data_page_offset);
|
|
140
|
+
let pagesSize = Number((_colChunk$meta_data5 = colChunk.meta_data) === null || _colChunk$meta_data5 === void 0 ? void 0 : _colChunk$meta_data5.total_compressed_size);
|
|
141
|
+
if (!colChunk.file_path) {
|
|
142
|
+
var _colChunk$meta_data6;
|
|
143
|
+
pagesSize = Math.min(this.file.size - pagesOffset, Number((_colChunk$meta_data6 = colChunk.meta_data) === null || _colChunk$meta_data6 === void 0 ? void 0 : _colChunk$meta_data6.total_compressed_size));
|
|
144
|
+
}
|
|
145
|
+
const options = {
|
|
146
|
+
type,
|
|
147
|
+
rLevelMax: field.rLevelMax,
|
|
148
|
+
dLevelMax: field.dLevelMax,
|
|
149
|
+
compression,
|
|
150
|
+
column: field,
|
|
151
|
+
numValues: (_colChunk$meta_data7 = colChunk.meta_data) === null || _colChunk$meta_data7 === void 0 ? void 0 : _colChunk$meta_data7.num_values,
|
|
152
|
+
dictionary: []
|
|
153
|
+
};
|
|
154
|
+
let dictionary;
|
|
155
|
+
const dictionaryPageOffset = colChunk === null || colChunk === void 0 ? void 0 : (_colChunk$meta_data8 = colChunk.meta_data) === null || _colChunk$meta_data8 === void 0 ? void 0 : _colChunk$meta_data8.dictionary_page_offset;
|
|
156
|
+
if (dictionaryPageOffset) {
|
|
157
|
+
const dictionaryOffset = Number(dictionaryPageOffset);
|
|
158
|
+
dictionary = await this.getDictionary(dictionaryOffset, options, pagesOffset);
|
|
159
|
+
}
|
|
160
|
+
dictionary = (_options$dictionary = options.dictionary) !== null && _options$dictionary !== void 0 && _options$dictionary.length ? options.dictionary : dictionary;
|
|
161
|
+
const pagesBuf = await this.file.read(pagesOffset, pagesSize);
|
|
162
|
+
return await (0, _decoders.decodeDataPages)(pagesBuf, {
|
|
163
|
+
...options,
|
|
164
|
+
dictionary
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
async getDictionary(dictionaryPageOffset, options, pagesOffset) {
|
|
168
|
+
if (dictionaryPageOffset === 0) {
|
|
169
|
+
return [];
|
|
170
|
+
}
|
|
171
|
+
const dictionarySize = Math.min(this.file.size - dictionaryPageOffset, this.props.defaultDictionarySize);
|
|
172
|
+
const pagesBuf = await this.file.read(dictionaryPageOffset, dictionarySize);
|
|
173
|
+
const cursor = {
|
|
174
|
+
buffer: pagesBuf,
|
|
175
|
+
offset: 0,
|
|
176
|
+
size: pagesBuf.length
|
|
177
|
+
};
|
|
178
|
+
const decodedPage = await (0, _decoders.decodePage)(cursor, options);
|
|
179
|
+
return decodedPage.dictionary;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
609
182
|
exports.ParquetReader = ParquetReader;
|
|
610
183
|
//# sourceMappingURL=parquet-reader.js.map
|