@loaders.gl/parquet 3.0.13 → 3.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.es5.min.js +1 -1
- package/dist/dist.es5.min.js.map +1 -1
- package/dist/dist.min.js +1 -1
- package/dist/dist.min.js.map +1 -1
- package/dist/es5/bundle.js +2 -2
- package/dist/es5/bundle.js.map +1 -1
- package/dist/es5/index.js +9 -9
- package/dist/es5/parquet-loader.js +70 -19
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-writer.js +4 -4
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/codecs/index.js +6 -4
- package/dist/es5/parquetjs/codecs/index.js.map +1 -1
- package/dist/es5/parquetjs/codecs/plain.js +43 -41
- package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
- package/dist/es5/parquetjs/codecs/rle.js +35 -25
- package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
- package/dist/es5/parquetjs/compression.js +9 -7
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/file.js +15 -15
- package/dist/es5/parquetjs/file.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +152 -141
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +160 -147
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +259 -248
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +79 -67
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +124 -113
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +169 -158
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DateType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +182 -170
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IntType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ListType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +343 -319
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MapType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/NullType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +75 -64
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +169 -158
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +124 -113
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +199 -188
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js +135 -124
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/StringType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +101 -88
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Type.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/index.js +43 -43
- package/dist/es5/parquetjs/reader.js +813 -276
- package/dist/es5/parquetjs/reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +11 -9
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/schema.js +87 -73
- package/dist/es5/parquetjs/schema/schema.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +95 -55
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js +25 -25
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/util.js +71 -39
- package/dist/es5/parquetjs/util.js.map +1 -1
- package/dist/es5/parquetjs/writer.js +467 -200
- package/dist/es5/parquetjs/writer.js.map +1 -1
- package/dist/esm/parquet-loader.js +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/parquet-worker.js +1 -1
- package/dist/parquet-worker.js.map +1 -1
- package/package.json +4 -4
|
@@ -2,11 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
|
|
4
4
|
|
|
5
|
+
var _typeof = require("@babel/runtime/helpers/typeof");
|
|
6
|
+
|
|
5
7
|
Object.defineProperty(exports, "__esModule", {
|
|
6
8
|
value: true
|
|
7
9
|
});
|
|
8
10
|
exports.ParquetEnvelopeReader = exports.ParquetReader = exports.ParquetCursor = void 0;
|
|
9
11
|
|
|
12
|
+
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
|
|
13
|
+
|
|
14
|
+
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
|
|
15
|
+
|
|
16
|
+
var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
|
|
17
|
+
|
|
18
|
+
var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
|
|
19
|
+
|
|
10
20
|
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
|
|
11
21
|
|
|
12
22
|
var _codecs = require("./codecs");
|
|
@@ -21,20 +31,27 @@ var _parquetThrift = require("./parquet-thrift");
|
|
|
21
31
|
|
|
22
32
|
var Util = _interopRequireWildcard(require("./util"));
|
|
23
33
|
|
|
24
|
-
|
|
34
|
+
var _Symbol$asyncIterator, _Symbol$asyncIterator2;
|
|
35
|
+
|
|
36
|
+
function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
|
|
37
|
+
|
|
38
|
+
function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
|
|
25
39
|
|
|
26
|
-
function
|
|
40
|
+
function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
|
|
27
41
|
|
|
28
|
-
function
|
|
42
|
+
function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
|
|
29
43
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
44
|
+
function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
|
|
45
|
+
|
|
46
|
+
var PARQUET_MAGIC = 'PAR1';
|
|
47
|
+
var PARQUET_VERSION = 1;
|
|
48
|
+
var PARQUET_RDLVL_TYPE = 'INT32';
|
|
49
|
+
var PARQUET_RDLVL_ENCODING = 'RLE';
|
|
34
50
|
_Symbol$asyncIterator = Symbol.asyncIterator;
|
|
35
51
|
|
|
36
|
-
|
|
37
|
-
|
|
52
|
+
var ParquetCursor = function () {
|
|
53
|
+
function ParquetCursor(metadata, envelopeReader, schema, columnList) {
|
|
54
|
+
(0, _classCallCheck2.default)(this, ParquetCursor);
|
|
38
55
|
(0, _defineProperty2.default)(this, "metadata", void 0);
|
|
39
56
|
(0, _defineProperty2.default)(this, "envelopeReader", void 0);
|
|
40
57
|
(0, _defineProperty2.default)(this, "schema", void 0);
|
|
@@ -49,118 +66,184 @@ class ParquetCursor {
|
|
|
49
66
|
this.rowGroupIndex = 0;
|
|
50
67
|
}
|
|
51
68
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
69
|
+
(0, _createClass2.default)(ParquetCursor, [{
|
|
70
|
+
key: "next",
|
|
71
|
+
value: function () {
|
|
72
|
+
var _next = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee() {
|
|
73
|
+
var rowBuffer;
|
|
74
|
+
return _regenerator.default.wrap(function _callee$(_context) {
|
|
75
|
+
while (1) {
|
|
76
|
+
switch (_context.prev = _context.next) {
|
|
77
|
+
case 0:
|
|
78
|
+
if (!(this.rowGroup.length === 0)) {
|
|
79
|
+
_context.next = 8;
|
|
80
|
+
break;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (!(this.rowGroupIndex >= this.metadata.row_groups.length)) {
|
|
84
|
+
_context.next = 3;
|
|
85
|
+
break;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return _context.abrupt("return", null);
|
|
89
|
+
|
|
90
|
+
case 3:
|
|
91
|
+
_context.next = 5;
|
|
92
|
+
return this.envelopeReader.readRowGroup(this.schema, this.metadata.row_groups[this.rowGroupIndex], this.columnList);
|
|
93
|
+
|
|
94
|
+
case 5:
|
|
95
|
+
rowBuffer = _context.sent;
|
|
96
|
+
this.rowGroup = Shred.materializeRecords(this.schema, rowBuffer);
|
|
97
|
+
this.rowGroupIndex++;
|
|
98
|
+
|
|
99
|
+
case 8:
|
|
100
|
+
return _context.abrupt("return", this.rowGroup.shift());
|
|
101
|
+
|
|
102
|
+
case 9:
|
|
103
|
+
case "end":
|
|
104
|
+
return _context.stop();
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}, _callee, this);
|
|
108
|
+
}));
|
|
109
|
+
|
|
110
|
+
function next() {
|
|
111
|
+
return _next.apply(this, arguments);
|
|
56
112
|
}
|
|
57
113
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
114
|
+
return next;
|
|
115
|
+
}()
|
|
116
|
+
}, {
|
|
117
|
+
key: "rewind",
|
|
118
|
+
value: function rewind() {
|
|
119
|
+
this.rowGroup = [];
|
|
120
|
+
this.rowGroupIndex = 0;
|
|
61
121
|
}
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
}
|
|
122
|
+
}, {
|
|
123
|
+
key: _Symbol$asyncIterator,
|
|
124
|
+
value: function value() {
|
|
125
|
+
var _this = this;
|
|
126
|
+
|
|
127
|
+
var done = false;
|
|
128
|
+
return {
|
|
129
|
+
next: function () {
|
|
130
|
+
var _next2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2() {
|
|
131
|
+
var value;
|
|
132
|
+
return _regenerator.default.wrap(function _callee2$(_context2) {
|
|
133
|
+
while (1) {
|
|
134
|
+
switch (_context2.prev = _context2.next) {
|
|
135
|
+
case 0:
|
|
136
|
+
if (!done) {
|
|
137
|
+
_context2.next = 2;
|
|
138
|
+
break;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return _context2.abrupt("return", {
|
|
142
|
+
done: done,
|
|
143
|
+
value: null
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
case 2:
|
|
147
|
+
_context2.next = 4;
|
|
148
|
+
return _this.next();
|
|
149
|
+
|
|
150
|
+
case 4:
|
|
151
|
+
value = _context2.sent;
|
|
152
|
+
|
|
153
|
+
if (!(value === null)) {
|
|
154
|
+
_context2.next = 7;
|
|
155
|
+
break;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return _context2.abrupt("return", {
|
|
159
|
+
done: true,
|
|
160
|
+
value: value
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
case 7:
|
|
164
|
+
return _context2.abrupt("return", {
|
|
165
|
+
done: false,
|
|
166
|
+
value: value
|
|
167
|
+
});
|
|
168
|
+
|
|
169
|
+
case 8:
|
|
170
|
+
case "end":
|
|
171
|
+
return _context2.stop();
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}, _callee2);
|
|
175
|
+
}));
|
|
176
|
+
|
|
177
|
+
function next() {
|
|
178
|
+
return _next2.apply(this, arguments);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return next;
|
|
182
|
+
}(),
|
|
183
|
+
return: function () {
|
|
184
|
+
var _return2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3() {
|
|
185
|
+
return _regenerator.default.wrap(function _callee3$(_context3) {
|
|
186
|
+
while (1) {
|
|
187
|
+
switch (_context3.prev = _context3.next) {
|
|
188
|
+
case 0:
|
|
189
|
+
done = true;
|
|
190
|
+
return _context3.abrupt("return", {
|
|
191
|
+
done: done,
|
|
192
|
+
value: null
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
case 2:
|
|
196
|
+
case "end":
|
|
197
|
+
return _context3.stop();
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}, _callee3);
|
|
201
|
+
}));
|
|
202
|
+
|
|
203
|
+
function _return() {
|
|
204
|
+
return _return2.apply(this, arguments);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return _return;
|
|
208
|
+
}(),
|
|
209
|
+
throw: function () {
|
|
210
|
+
var _throw2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4() {
|
|
211
|
+
return _regenerator.default.wrap(function _callee4$(_context4) {
|
|
212
|
+
while (1) {
|
|
213
|
+
switch (_context4.prev = _context4.next) {
|
|
214
|
+
case 0:
|
|
215
|
+
done = true;
|
|
216
|
+
return _context4.abrupt("return", {
|
|
217
|
+
done: true,
|
|
218
|
+
value: null
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
case 2:
|
|
222
|
+
case "end":
|
|
223
|
+
return _context4.stop();
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}, _callee4);
|
|
227
|
+
}));
|
|
228
|
+
|
|
229
|
+
function _throw() {
|
|
230
|
+
return _throw2.apply(this, arguments);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return _throw;
|
|
234
|
+
}()
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
}]);
|
|
238
|
+
return ParquetCursor;
|
|
239
|
+
}();
|
|
114
240
|
|
|
115
241
|
exports.ParquetCursor = ParquetCursor;
|
|
116
242
|
_Symbol$asyncIterator2 = Symbol.asyncIterator;
|
|
117
243
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
try {
|
|
123
|
-
await envelopeReader.readHeader();
|
|
124
|
-
const metadata = await envelopeReader.readFooter();
|
|
125
|
-
return new ParquetReader(metadata, envelopeReader);
|
|
126
|
-
} catch (err) {
|
|
127
|
-
await envelopeReader.close();
|
|
128
|
-
throw err;
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
static async openBuffer(buffer) {
|
|
133
|
-
const envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer);
|
|
134
|
-
|
|
135
|
-
try {
|
|
136
|
-
await envelopeReader.readHeader();
|
|
137
|
-
const metadata = await envelopeReader.readFooter();
|
|
138
|
-
return new ParquetReader(metadata, envelopeReader);
|
|
139
|
-
} catch (err) {
|
|
140
|
-
await envelopeReader.close();
|
|
141
|
-
throw err;
|
|
142
|
-
}
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
static async openArrayBuffer(arrayBuffer) {
|
|
146
|
-
const readFn = async (start, length) => Buffer.from(arrayBuffer, start, length);
|
|
147
|
-
|
|
148
|
-
const closeFn = async () => {};
|
|
149
|
-
|
|
150
|
-
const size = arrayBuffer.byteLength;
|
|
151
|
-
const envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);
|
|
152
|
-
|
|
153
|
-
try {
|
|
154
|
-
await envelopeReader.readHeader();
|
|
155
|
-
const metadata = await envelopeReader.readFooter();
|
|
156
|
-
return new ParquetReader(metadata, envelopeReader);
|
|
157
|
-
} catch (err) {
|
|
158
|
-
await envelopeReader.close();
|
|
159
|
-
throw err;
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
constructor(metadata, envelopeReader) {
|
|
244
|
+
var ParquetReader = function () {
|
|
245
|
+
function ParquetReader(metadata, envelopeReader) {
|
|
246
|
+
(0, _classCallCheck2.default)(this, ParquetReader);
|
|
164
247
|
(0, _defineProperty2.default)(this, "metadata", void 0);
|
|
165
248
|
(0, _defineProperty2.default)(this, "envelopeReader", void 0);
|
|
166
249
|
(0, _defineProperty2.default)(this, "schema", void 0);
|
|
@@ -171,70 +254,279 @@ class ParquetReader {
|
|
|
171
254
|
|
|
172
255
|
this.metadata = metadata;
|
|
173
256
|
this.envelopeReader = envelopeReader;
|
|
174
|
-
|
|
175
|
-
const {
|
|
176
|
-
schema
|
|
177
|
-
} = decodeSchema(this.metadata.schema, 1, root.num_children);
|
|
178
|
-
this.schema = new _schema.ParquetSchema(schema);
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
async close() {
|
|
182
|
-
await this.envelopeReader.close();
|
|
183
|
-
}
|
|
257
|
+
var root = this.metadata.schema[0];
|
|
184
258
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
columnList = [];
|
|
188
|
-
}
|
|
259
|
+
var _decodeSchema = decodeSchema(this.metadata.schema, 1, root.num_children),
|
|
260
|
+
schema = _decodeSchema.schema;
|
|
189
261
|
|
|
190
|
-
|
|
191
|
-
return new ParquetCursor(this.metadata, this.envelopeReader, this.schema, columnList);
|
|
262
|
+
this.schema = new _schema.ParquetSchema(schema);
|
|
192
263
|
}
|
|
193
264
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
265
|
+
(0, _createClass2.default)(ParquetReader, [{
|
|
266
|
+
key: "close",
|
|
267
|
+
value: function () {
|
|
268
|
+
var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5() {
|
|
269
|
+
return _regenerator.default.wrap(function _callee5$(_context5) {
|
|
270
|
+
while (1) {
|
|
271
|
+
switch (_context5.prev = _context5.next) {
|
|
272
|
+
case 0:
|
|
273
|
+
_context5.next = 2;
|
|
274
|
+
return this.envelopeReader.close();
|
|
275
|
+
|
|
276
|
+
case 2:
|
|
277
|
+
case "end":
|
|
278
|
+
return _context5.stop();
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}, _callee5, this);
|
|
282
|
+
}));
|
|
283
|
+
|
|
284
|
+
function close() {
|
|
285
|
+
return _close.apply(this, arguments);
|
|
286
|
+
}
|
|
197
287
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
}
|
|
288
|
+
return close;
|
|
289
|
+
}()
|
|
290
|
+
}, {
|
|
291
|
+
key: "getCursor",
|
|
292
|
+
value: function getCursor(columnList) {
|
|
293
|
+
if (!columnList) {
|
|
294
|
+
columnList = [];
|
|
295
|
+
}
|
|
201
296
|
|
|
202
|
-
|
|
203
|
-
|
|
297
|
+
columnList = columnList.map(function (x) {
|
|
298
|
+
return Array.isArray(x) ? x : [x];
|
|
299
|
+
});
|
|
300
|
+
return new ParquetCursor(this.metadata, this.envelopeReader, this.schema, columnList);
|
|
301
|
+
}
|
|
302
|
+
}, {
|
|
303
|
+
key: "getRowCount",
|
|
304
|
+
value: function getRowCount() {
|
|
305
|
+
return Number(this.metadata.num_rows);
|
|
306
|
+
}
|
|
307
|
+
}, {
|
|
308
|
+
key: "getSchema",
|
|
309
|
+
value: function getSchema() {
|
|
310
|
+
return this.schema;
|
|
311
|
+
}
|
|
312
|
+
}, {
|
|
313
|
+
key: "getMetadata",
|
|
314
|
+
value: function getMetadata() {
|
|
315
|
+
var md = {};
|
|
316
|
+
|
|
317
|
+
var _iterator = _createForOfIteratorHelper(this.metadata.key_value_metadata),
|
|
318
|
+
_step;
|
|
319
|
+
|
|
320
|
+
try {
|
|
321
|
+
for (_iterator.s(); !(_step = _iterator.n()).done;) {
|
|
322
|
+
var kv = _step.value;
|
|
323
|
+
md[kv.key] = kv.value;
|
|
324
|
+
}
|
|
325
|
+
} catch (err) {
|
|
326
|
+
_iterator.e(err);
|
|
327
|
+
} finally {
|
|
328
|
+
_iterator.f();
|
|
329
|
+
}
|
|
204
330
|
|
|
205
|
-
|
|
206
|
-
md[kv.key] = kv.value;
|
|
331
|
+
return md;
|
|
207
332
|
}
|
|
333
|
+
}, {
|
|
334
|
+
key: _Symbol$asyncIterator2,
|
|
335
|
+
value: function value() {
|
|
336
|
+
return this.getCursor()[Symbol.asyncIterator]();
|
|
337
|
+
}
|
|
338
|
+
}], [{
|
|
339
|
+
key: "openFile",
|
|
340
|
+
value: function () {
|
|
341
|
+
var _openFile = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6(filePath) {
|
|
342
|
+
var envelopeReader, metadata;
|
|
343
|
+
return _regenerator.default.wrap(function _callee6$(_context6) {
|
|
344
|
+
while (1) {
|
|
345
|
+
switch (_context6.prev = _context6.next) {
|
|
346
|
+
case 0:
|
|
347
|
+
_context6.next = 2;
|
|
348
|
+
return ParquetEnvelopeReader.openFile(filePath);
|
|
349
|
+
|
|
350
|
+
case 2:
|
|
351
|
+
envelopeReader = _context6.sent;
|
|
352
|
+
_context6.prev = 3;
|
|
353
|
+
_context6.next = 6;
|
|
354
|
+
return envelopeReader.readHeader();
|
|
355
|
+
|
|
356
|
+
case 6:
|
|
357
|
+
_context6.next = 8;
|
|
358
|
+
return envelopeReader.readFooter();
|
|
359
|
+
|
|
360
|
+
case 8:
|
|
361
|
+
metadata = _context6.sent;
|
|
362
|
+
return _context6.abrupt("return", new ParquetReader(metadata, envelopeReader));
|
|
363
|
+
|
|
364
|
+
case 12:
|
|
365
|
+
_context6.prev = 12;
|
|
366
|
+
_context6.t0 = _context6["catch"](3);
|
|
367
|
+
_context6.next = 16;
|
|
368
|
+
return envelopeReader.close();
|
|
369
|
+
|
|
370
|
+
case 16:
|
|
371
|
+
throw _context6.t0;
|
|
372
|
+
|
|
373
|
+
case 17:
|
|
374
|
+
case "end":
|
|
375
|
+
return _context6.stop();
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}, _callee6, null, [[3, 12]]);
|
|
379
|
+
}));
|
|
380
|
+
|
|
381
|
+
function openFile(_x) {
|
|
382
|
+
return _openFile.apply(this, arguments);
|
|
383
|
+
}
|
|
208
384
|
|
|
209
|
-
|
|
210
|
-
|
|
385
|
+
return openFile;
|
|
386
|
+
}()
|
|
387
|
+
}, {
|
|
388
|
+
key: "openBuffer",
|
|
389
|
+
value: function () {
|
|
390
|
+
var _openBuffer = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7(buffer) {
|
|
391
|
+
var envelopeReader, metadata;
|
|
392
|
+
return _regenerator.default.wrap(function _callee7$(_context7) {
|
|
393
|
+
while (1) {
|
|
394
|
+
switch (_context7.prev = _context7.next) {
|
|
395
|
+
case 0:
|
|
396
|
+
_context7.next = 2;
|
|
397
|
+
return ParquetEnvelopeReader.openBuffer(buffer);
|
|
398
|
+
|
|
399
|
+
case 2:
|
|
400
|
+
envelopeReader = _context7.sent;
|
|
401
|
+
_context7.prev = 3;
|
|
402
|
+
_context7.next = 6;
|
|
403
|
+
return envelopeReader.readHeader();
|
|
404
|
+
|
|
405
|
+
case 6:
|
|
406
|
+
_context7.next = 8;
|
|
407
|
+
return envelopeReader.readFooter();
|
|
408
|
+
|
|
409
|
+
case 8:
|
|
410
|
+
metadata = _context7.sent;
|
|
411
|
+
return _context7.abrupt("return", new ParquetReader(metadata, envelopeReader));
|
|
412
|
+
|
|
413
|
+
case 12:
|
|
414
|
+
_context7.prev = 12;
|
|
415
|
+
_context7.t0 = _context7["catch"](3);
|
|
416
|
+
_context7.next = 16;
|
|
417
|
+
return envelopeReader.close();
|
|
418
|
+
|
|
419
|
+
case 16:
|
|
420
|
+
throw _context7.t0;
|
|
421
|
+
|
|
422
|
+
case 17:
|
|
423
|
+
case "end":
|
|
424
|
+
return _context7.stop();
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
}, _callee7, null, [[3, 12]]);
|
|
428
|
+
}));
|
|
429
|
+
|
|
430
|
+
function openBuffer(_x2) {
|
|
431
|
+
return _openBuffer.apply(this, arguments);
|
|
432
|
+
}
|
|
211
433
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
}
|
|
434
|
+
return openBuffer;
|
|
435
|
+
}()
|
|
436
|
+
}, {
|
|
437
|
+
key: "openArrayBuffer",
|
|
438
|
+
value: function () {
|
|
439
|
+
var _openArrayBuffer = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee10(arrayBuffer) {
|
|
440
|
+
var readFn, closeFn, size, envelopeReader, metadata;
|
|
441
|
+
return _regenerator.default.wrap(function _callee10$(_context10) {
|
|
442
|
+
while (1) {
|
|
443
|
+
switch (_context10.prev = _context10.next) {
|
|
444
|
+
case 0:
|
|
445
|
+
readFn = function () {
|
|
446
|
+
var _ref = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8(start, length) {
|
|
447
|
+
return _regenerator.default.wrap(function _callee8$(_context8) {
|
|
448
|
+
while (1) {
|
|
449
|
+
switch (_context8.prev = _context8.next) {
|
|
450
|
+
case 0:
|
|
451
|
+
return _context8.abrupt("return", Buffer.from(arrayBuffer, start, length));
|
|
452
|
+
|
|
453
|
+
case 1:
|
|
454
|
+
case "end":
|
|
455
|
+
return _context8.stop();
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
}, _callee8);
|
|
459
|
+
}));
|
|
460
|
+
|
|
461
|
+
return function readFn(_x4, _x5) {
|
|
462
|
+
return _ref.apply(this, arguments);
|
|
463
|
+
};
|
|
464
|
+
}();
|
|
465
|
+
|
|
466
|
+
closeFn = function () {
|
|
467
|
+
var _ref2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9() {
|
|
468
|
+
return _regenerator.default.wrap(function _callee9$(_context9) {
|
|
469
|
+
while (1) {
|
|
470
|
+
switch (_context9.prev = _context9.next) {
|
|
471
|
+
case 0:
|
|
472
|
+
case "end":
|
|
473
|
+
return _context9.stop();
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
}, _callee9);
|
|
477
|
+
}));
|
|
478
|
+
|
|
479
|
+
return function closeFn() {
|
|
480
|
+
return _ref2.apply(this, arguments);
|
|
481
|
+
};
|
|
482
|
+
}();
|
|
483
|
+
|
|
484
|
+
size = arrayBuffer.byteLength;
|
|
485
|
+
envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);
|
|
486
|
+
_context10.prev = 4;
|
|
487
|
+
_context10.next = 7;
|
|
488
|
+
return envelopeReader.readHeader();
|
|
489
|
+
|
|
490
|
+
case 7:
|
|
491
|
+
_context10.next = 9;
|
|
492
|
+
return envelopeReader.readFooter();
|
|
493
|
+
|
|
494
|
+
case 9:
|
|
495
|
+
metadata = _context10.sent;
|
|
496
|
+
return _context10.abrupt("return", new ParquetReader(metadata, envelopeReader));
|
|
497
|
+
|
|
498
|
+
case 13:
|
|
499
|
+
_context10.prev = 13;
|
|
500
|
+
_context10.t0 = _context10["catch"](4);
|
|
501
|
+
_context10.next = 17;
|
|
502
|
+
return envelopeReader.close();
|
|
503
|
+
|
|
504
|
+
case 17:
|
|
505
|
+
throw _context10.t0;
|
|
506
|
+
|
|
507
|
+
case 18:
|
|
508
|
+
case "end":
|
|
509
|
+
return _context10.stop();
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
}, _callee10, null, [[4, 13]]);
|
|
513
|
+
}));
|
|
514
|
+
|
|
515
|
+
function openArrayBuffer(_x3) {
|
|
516
|
+
return _openArrayBuffer.apply(this, arguments);
|
|
517
|
+
}
|
|
215
518
|
|
|
216
|
-
|
|
519
|
+
return openArrayBuffer;
|
|
520
|
+
}()
|
|
521
|
+
}]);
|
|
522
|
+
return ParquetReader;
|
|
523
|
+
}();
|
|
217
524
|
|
|
218
525
|
exports.ParquetReader = ParquetReader;
|
|
219
526
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
const fileDescriptor = await Util.fopen(filePath);
|
|
224
|
-
const readFn = Util.fread.bind(undefined, fileDescriptor);
|
|
225
|
-
const closeFn = Util.fclose.bind(undefined, fileDescriptor);
|
|
226
|
-
return new ParquetEnvelopeReader(readFn, closeFn, fileStat.size);
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
static async openBuffer(buffer) {
|
|
230
|
-
const readFn = (position, length) => Promise.resolve(buffer.slice(position, position + length));
|
|
231
|
-
|
|
232
|
-
const closeFn = () => Promise.resolve();
|
|
233
|
-
|
|
234
|
-
return new ParquetEnvelopeReader(readFn, closeFn, buffer.length);
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
constructor(read, close, fileSize) {
|
|
527
|
+
var ParquetEnvelopeReader = function () {
|
|
528
|
+
function ParquetEnvelopeReader(read, close, fileSize) {
|
|
529
|
+
(0, _classCallCheck2.default)(this, ParquetEnvelopeReader);
|
|
238
530
|
(0, _defineProperty2.default)(this, "read", void 0);
|
|
239
531
|
(0, _defineProperty2.default)(this, "close", void 0);
|
|
240
532
|
(0, _defineProperty2.default)(this, "fileSize", void 0);
|
|
@@ -243,74 +535,308 @@ class ParquetEnvelopeReader {
|
|
|
243
535
|
this.fileSize = fileSize;
|
|
244
536
|
}
|
|
245
537
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
538
|
+
(0, _createClass2.default)(ParquetEnvelopeReader, [{
|
|
539
|
+
key: "readHeader",
|
|
540
|
+
value: function () {
|
|
541
|
+
var _readHeader = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee11() {
|
|
542
|
+
var buf;
|
|
543
|
+
return _regenerator.default.wrap(function _callee11$(_context11) {
|
|
544
|
+
while (1) {
|
|
545
|
+
switch (_context11.prev = _context11.next) {
|
|
546
|
+
case 0:
|
|
547
|
+
_context11.next = 2;
|
|
548
|
+
return this.read(0, PARQUET_MAGIC.length);
|
|
549
|
+
|
|
550
|
+
case 2:
|
|
551
|
+
buf = _context11.sent;
|
|
552
|
+
|
|
553
|
+
if (!(buf.toString() !== PARQUET_MAGIC)) {
|
|
554
|
+
_context11.next = 5;
|
|
555
|
+
break;
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
throw new Error('not valid parquet file');
|
|
559
|
+
|
|
560
|
+
case 5:
|
|
561
|
+
case "end":
|
|
562
|
+
return _context11.stop();
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
}, _callee11, this);
|
|
566
|
+
}));
|
|
567
|
+
|
|
568
|
+
function readHeader() {
|
|
569
|
+
return _readHeader.apply(this, arguments);
|
|
266
570
|
}
|
|
267
571
|
|
|
268
|
-
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
572
|
+
return readHeader;
|
|
573
|
+
}()
|
|
574
|
+
}, {
|
|
575
|
+
key: "readRowGroup",
|
|
576
|
+
value: function () {
|
|
577
|
+
var _readRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee12(schema, rowGroup, columnList) {
|
|
578
|
+
var buffer, _iterator2, _step2, colChunk, colMetadata, colKey;
|
|
579
|
+
|
|
580
|
+
return _regenerator.default.wrap(function _callee12$(_context12) {
|
|
581
|
+
while (1) {
|
|
582
|
+
switch (_context12.prev = _context12.next) {
|
|
583
|
+
case 0:
|
|
584
|
+
buffer = {
|
|
585
|
+
rowCount: Number(rowGroup.num_rows),
|
|
586
|
+
columnData: {}
|
|
587
|
+
};
|
|
588
|
+
_iterator2 = _createForOfIteratorHelper(rowGroup.columns);
|
|
589
|
+
_context12.prev = 2;
|
|
590
|
+
|
|
591
|
+
_iterator2.s();
|
|
592
|
+
|
|
593
|
+
case 4:
|
|
594
|
+
if ((_step2 = _iterator2.n()).done) {
|
|
595
|
+
_context12.next = 15;
|
|
596
|
+
break;
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
colChunk = _step2.value;
|
|
600
|
+
colMetadata = colChunk.meta_data;
|
|
601
|
+
colKey = colMetadata === null || colMetadata === void 0 ? void 0 : colMetadata.path_in_schema;
|
|
602
|
+
|
|
603
|
+
if (!(columnList.length > 0 && Util.fieldIndexOf(columnList, colKey) < 0)) {
|
|
604
|
+
_context12.next = 10;
|
|
605
|
+
break;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
return _context12.abrupt("continue", 13);
|
|
609
|
+
|
|
610
|
+
case 10:
|
|
611
|
+
_context12.next = 12;
|
|
612
|
+
return this.readColumnChunk(schema, colChunk);
|
|
613
|
+
|
|
614
|
+
case 12:
|
|
615
|
+
buffer.columnData[colKey.join()] = _context12.sent;
|
|
616
|
+
|
|
617
|
+
case 13:
|
|
618
|
+
_context12.next = 4;
|
|
619
|
+
break;
|
|
620
|
+
|
|
621
|
+
case 15:
|
|
622
|
+
_context12.next = 20;
|
|
623
|
+
break;
|
|
624
|
+
|
|
625
|
+
case 17:
|
|
626
|
+
_context12.prev = 17;
|
|
627
|
+
_context12.t0 = _context12["catch"](2);
|
|
628
|
+
|
|
629
|
+
_iterator2.e(_context12.t0);
|
|
630
|
+
|
|
631
|
+
case 20:
|
|
632
|
+
_context12.prev = 20;
|
|
633
|
+
|
|
634
|
+
_iterator2.f();
|
|
635
|
+
|
|
636
|
+
return _context12.finish(20);
|
|
637
|
+
|
|
638
|
+
case 23:
|
|
639
|
+
return _context12.abrupt("return", buffer);
|
|
640
|
+
|
|
641
|
+
case 24:
|
|
642
|
+
case "end":
|
|
643
|
+
return _context12.stop();
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
}, _callee12, this, [[2, 17, 20, 23]]);
|
|
647
|
+
}));
|
|
648
|
+
|
|
649
|
+
function readRowGroup(_x6, _x7, _x8) {
|
|
650
|
+
return _readRowGroup.apply(this, arguments);
|
|
651
|
+
}
|
|
294
652
|
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
653
|
+
return readRowGroup;
|
|
654
|
+
}()
|
|
655
|
+
}, {
|
|
656
|
+
key: "readColumnChunk",
|
|
657
|
+
value: function () {
|
|
658
|
+
var _readColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee13(schema, colChunk) {
|
|
659
|
+
var _colChunk$meta_data, _colChunk$meta_data2, _colChunk$meta_data3, _colChunk$meta_data4, _colChunk$meta_data5;
|
|
660
|
+
|
|
661
|
+
var field, type, compression, pagesOffset, pagesSize, pagesBuf;
|
|
662
|
+
return _regenerator.default.wrap(function _callee13$(_context13) {
|
|
663
|
+
while (1) {
|
|
664
|
+
switch (_context13.prev = _context13.next) {
|
|
665
|
+
case 0:
|
|
666
|
+
if (!(colChunk.file_path !== undefined && colChunk.file_path !== null)) {
|
|
667
|
+
_context13.next = 2;
|
|
668
|
+
break;
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
throw new Error('external references are not supported');
|
|
672
|
+
|
|
673
|
+
case 2:
|
|
674
|
+
field = schema.findField((_colChunk$meta_data = colChunk.meta_data) === null || _colChunk$meta_data === void 0 ? void 0 : _colChunk$meta_data.path_in_schema);
|
|
675
|
+
type = Util.getThriftEnum(_parquetThrift.Type, (_colChunk$meta_data2 = colChunk.meta_data) === null || _colChunk$meta_data2 === void 0 ? void 0 : _colChunk$meta_data2.type);
|
|
676
|
+
|
|
677
|
+
if (!(type !== field.primitiveType)) {
|
|
678
|
+
_context13.next = 6;
|
|
679
|
+
break;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
throw new Error("chunk type not matching schema: ".concat(type));
|
|
683
|
+
|
|
684
|
+
case 6:
|
|
685
|
+
compression = Util.getThriftEnum(_parquetThrift.CompressionCodec, (_colChunk$meta_data3 = colChunk.meta_data) === null || _colChunk$meta_data3 === void 0 ? void 0 : _colChunk$meta_data3.codec);
|
|
686
|
+
pagesOffset = Number((_colChunk$meta_data4 = colChunk.meta_data) === null || _colChunk$meta_data4 === void 0 ? void 0 : _colChunk$meta_data4.data_page_offset);
|
|
687
|
+
pagesSize = Number((_colChunk$meta_data5 = colChunk.meta_data) === null || _colChunk$meta_data5 === void 0 ? void 0 : _colChunk$meta_data5.total_compressed_size);
|
|
688
|
+
_context13.next = 11;
|
|
689
|
+
return this.read(pagesOffset, pagesSize);
|
|
690
|
+
|
|
691
|
+
case 11:
|
|
692
|
+
pagesBuf = _context13.sent;
|
|
693
|
+
return _context13.abrupt("return", decodeDataPages(pagesBuf, field, compression));
|
|
694
|
+
|
|
695
|
+
case 13:
|
|
696
|
+
case "end":
|
|
697
|
+
return _context13.stop();
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
}, _callee13, this);
|
|
701
|
+
}));
|
|
702
|
+
|
|
703
|
+
function readColumnChunk(_x9, _x10) {
|
|
704
|
+
return _readColumnChunk.apply(this, arguments);
|
|
705
|
+
}
|
|
298
706
|
|
|
299
|
-
|
|
300
|
-
|
|
707
|
+
return readColumnChunk;
|
|
708
|
+
}()
|
|
709
|
+
}, {
|
|
710
|
+
key: "readFooter",
|
|
711
|
+
value: function () {
|
|
712
|
+
var _readFooter = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee14() {
|
|
713
|
+
var trailerLen, trailerBuf, metadataSize, metadataOffset, metadataBuf, _Util$decodeFileMetad, metadata;
|
|
714
|
+
|
|
715
|
+
return _regenerator.default.wrap(function _callee14$(_context14) {
|
|
716
|
+
while (1) {
|
|
717
|
+
switch (_context14.prev = _context14.next) {
|
|
718
|
+
case 0:
|
|
719
|
+
trailerLen = PARQUET_MAGIC.length + 4;
|
|
720
|
+
_context14.next = 3;
|
|
721
|
+
return this.read(this.fileSize - trailerLen, trailerLen);
|
|
722
|
+
|
|
723
|
+
case 3:
|
|
724
|
+
trailerBuf = _context14.sent;
|
|
725
|
+
|
|
726
|
+
if (!(trailerBuf.slice(4).toString() !== PARQUET_MAGIC)) {
|
|
727
|
+
_context14.next = 6;
|
|
728
|
+
break;
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
throw new Error('not a valid parquet file');
|
|
732
|
+
|
|
733
|
+
case 6:
|
|
734
|
+
metadataSize = trailerBuf.readUInt32LE(0);
|
|
735
|
+
metadataOffset = this.fileSize - metadataSize - trailerLen;
|
|
736
|
+
|
|
737
|
+
if (!(metadataOffset < PARQUET_MAGIC.length)) {
|
|
738
|
+
_context14.next = 10;
|
|
739
|
+
break;
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
throw new Error('invalid metadata size');
|
|
743
|
+
|
|
744
|
+
case 10:
|
|
745
|
+
_context14.next = 12;
|
|
746
|
+
return this.read(metadataOffset, metadataSize);
|
|
747
|
+
|
|
748
|
+
case 12:
|
|
749
|
+
metadataBuf = _context14.sent;
|
|
750
|
+
_Util$decodeFileMetad = Util.decodeFileMetadata(metadataBuf), metadata = _Util$decodeFileMetad.metadata;
|
|
751
|
+
return _context14.abrupt("return", metadata);
|
|
752
|
+
|
|
753
|
+
case 15:
|
|
754
|
+
case "end":
|
|
755
|
+
return _context14.stop();
|
|
756
|
+
}
|
|
757
|
+
}
|
|
758
|
+
}, _callee14, this);
|
|
759
|
+
}));
|
|
760
|
+
|
|
761
|
+
function readFooter() {
|
|
762
|
+
return _readFooter.apply(this, arguments);
|
|
763
|
+
}
|
|
301
764
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
765
|
+
return readFooter;
|
|
766
|
+
}()
|
|
767
|
+
}], [{
|
|
768
|
+
key: "openFile",
|
|
769
|
+
value: function () {
|
|
770
|
+
var _openFile2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee15(filePath) {
|
|
771
|
+
var fileStat, fileDescriptor, readFn, closeFn;
|
|
772
|
+
return _regenerator.default.wrap(function _callee15$(_context15) {
|
|
773
|
+
while (1) {
|
|
774
|
+
switch (_context15.prev = _context15.next) {
|
|
775
|
+
case 0:
|
|
776
|
+
_context15.next = 2;
|
|
777
|
+
return Util.fstat(filePath);
|
|
778
|
+
|
|
779
|
+
case 2:
|
|
780
|
+
fileStat = _context15.sent;
|
|
781
|
+
_context15.next = 5;
|
|
782
|
+
return Util.fopen(filePath);
|
|
783
|
+
|
|
784
|
+
case 5:
|
|
785
|
+
fileDescriptor = _context15.sent;
|
|
786
|
+
readFn = Util.fread.bind(undefined, fileDescriptor);
|
|
787
|
+
closeFn = Util.fclose.bind(undefined, fileDescriptor);
|
|
788
|
+
return _context15.abrupt("return", new ParquetEnvelopeReader(readFn, closeFn, fileStat.size));
|
|
789
|
+
|
|
790
|
+
case 9:
|
|
791
|
+
case "end":
|
|
792
|
+
return _context15.stop();
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
}, _callee15);
|
|
796
|
+
}));
|
|
797
|
+
|
|
798
|
+
function openFile(_x11) {
|
|
799
|
+
return _openFile2.apply(this, arguments);
|
|
800
|
+
}
|
|
305
801
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
802
|
+
return openFile;
|
|
803
|
+
}()
|
|
804
|
+
}, {
|
|
805
|
+
key: "openBuffer",
|
|
806
|
+
value: function () {
|
|
807
|
+
var _openBuffer2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee16(buffer) {
|
|
808
|
+
var readFn, closeFn;
|
|
809
|
+
return _regenerator.default.wrap(function _callee16$(_context16) {
|
|
810
|
+
while (1) {
|
|
811
|
+
switch (_context16.prev = _context16.next) {
|
|
812
|
+
case 0:
|
|
813
|
+
readFn = function readFn(position, length) {
|
|
814
|
+
return Promise.resolve(buffer.slice(position, position + length));
|
|
815
|
+
};
|
|
816
|
+
|
|
817
|
+
closeFn = function closeFn() {
|
|
818
|
+
return Promise.resolve();
|
|
819
|
+
};
|
|
820
|
+
|
|
821
|
+
return _context16.abrupt("return", new ParquetEnvelopeReader(readFn, closeFn, buffer.length));
|
|
822
|
+
|
|
823
|
+
case 3:
|
|
824
|
+
case "end":
|
|
825
|
+
return _context16.stop();
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
}, _callee16);
|
|
829
|
+
}));
|
|
830
|
+
|
|
831
|
+
function openBuffer(_x12) {
|
|
832
|
+
return _openBuffer2.apply(this, arguments);
|
|
833
|
+
}
|
|
312
834
|
|
|
313
|
-
|
|
835
|
+
return openBuffer;
|
|
836
|
+
}()
|
|
837
|
+
}]);
|
|
838
|
+
return ParquetEnvelopeReader;
|
|
839
|
+
}();
|
|
314
840
|
|
|
315
841
|
exports.ParquetEnvelopeReader = ParquetEnvelopeReader;
|
|
316
842
|
|
|
@@ -323,12 +849,12 @@ function decodeValues(type, encoding, cursor, count, opts) {
|
|
|
323
849
|
}
|
|
324
850
|
|
|
325
851
|
function decodeDataPages(buffer, column, compression) {
|
|
326
|
-
|
|
327
|
-
buffer,
|
|
852
|
+
var cursor = {
|
|
853
|
+
buffer: buffer,
|
|
328
854
|
offset: 0,
|
|
329
855
|
size: buffer.length
|
|
330
856
|
};
|
|
331
|
-
|
|
857
|
+
var data = {
|
|
332
858
|
rlevels: [],
|
|
333
859
|
dlevels: [],
|
|
334
860
|
values: [],
|
|
@@ -336,13 +862,13 @@ function decodeDataPages(buffer, column, compression) {
|
|
|
336
862
|
};
|
|
337
863
|
|
|
338
864
|
while (cursor.offset < cursor.size) {
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
cursor.offset +=
|
|
344
|
-
|
|
345
|
-
|
|
865
|
+
var _Util$decodePageHeade = Util.decodePageHeader(cursor.buffer),
|
|
866
|
+
pageHeader = _Util$decodePageHeade.pageHeader,
|
|
867
|
+
_length = _Util$decodePageHeade.length;
|
|
868
|
+
|
|
869
|
+
cursor.offset += _length;
|
|
870
|
+
var pageType = Util.getThriftEnum(_parquetThrift.PageType, pageHeader.type);
|
|
871
|
+
var pageData = null;
|
|
346
872
|
|
|
347
873
|
switch (pageType) {
|
|
348
874
|
case 'DATA_PAGE':
|
|
@@ -369,12 +895,12 @@ function decodeDataPages(buffer, column, compression) {
|
|
|
369
895
|
function decodeDataPage(cursor, header, column, compression) {
|
|
370
896
|
var _header$data_page_hea, _header$data_page_hea2, _header$data_page_hea3, _header$data_page_hea4;
|
|
371
897
|
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
898
|
+
var cursorEnd = cursor.offset + header.compressed_page_size;
|
|
899
|
+
var valueCount = (_header$data_page_hea = header.data_page_header) === null || _header$data_page_hea === void 0 ? void 0 : _header$data_page_hea.num_values;
|
|
900
|
+
var dataCursor = cursor;
|
|
375
901
|
|
|
376
902
|
if (compression !== 'UNCOMPRESSED') {
|
|
377
|
-
|
|
903
|
+
var valuesBuf = Compression.inflate(compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
|
|
378
904
|
dataCursor = {
|
|
379
905
|
buffer: valuesBuf,
|
|
380
906
|
offset: 0,
|
|
@@ -383,8 +909,8 @@ function decodeDataPage(cursor, header, column, compression) {
|
|
|
383
909
|
cursor.offset = cursorEnd;
|
|
384
910
|
}
|
|
385
911
|
|
|
386
|
-
|
|
387
|
-
|
|
912
|
+
var rLevelEncoding = Util.getThriftEnum(_parquetThrift.Encoding, (_header$data_page_hea2 = header.data_page_header) === null || _header$data_page_hea2 === void 0 ? void 0 : _header$data_page_hea2.repetition_level_encoding);
|
|
913
|
+
var rLevels = new Array(valueCount);
|
|
388
914
|
|
|
389
915
|
if (column.rLevelMax > 0) {
|
|
390
916
|
rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount, {
|
|
@@ -395,8 +921,8 @@ function decodeDataPage(cursor, header, column, compression) {
|
|
|
395
921
|
rLevels.fill(0);
|
|
396
922
|
}
|
|
397
923
|
|
|
398
|
-
|
|
399
|
-
|
|
924
|
+
var dLevelEncoding = Util.getThriftEnum(_parquetThrift.Encoding, (_header$data_page_hea3 = header.data_page_header) === null || _header$data_page_hea3 === void 0 ? void 0 : _header$data_page_hea3.definition_level_encoding);
|
|
925
|
+
var dLevels = new Array(valueCount);
|
|
400
926
|
|
|
401
927
|
if (column.dLevelMax > 0) {
|
|
402
928
|
dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount, {
|
|
@@ -407,23 +933,34 @@ function decodeDataPage(cursor, header, column, compression) {
|
|
|
407
933
|
dLevels.fill(0);
|
|
408
934
|
}
|
|
409
935
|
|
|
410
|
-
|
|
936
|
+
var valueCountNonNull = 0;
|
|
411
937
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
938
|
+
var _iterator3 = _createForOfIteratorHelper(dLevels),
|
|
939
|
+
_step3;
|
|
940
|
+
|
|
941
|
+
try {
|
|
942
|
+
for (_iterator3.s(); !(_step3 = _iterator3.n()).done;) {
|
|
943
|
+
var dlvl = _step3.value;
|
|
944
|
+
|
|
945
|
+
if (dlvl === column.dLevelMax) {
|
|
946
|
+
valueCountNonNull++;
|
|
947
|
+
}
|
|
415
948
|
}
|
|
949
|
+
} catch (err) {
|
|
950
|
+
_iterator3.e(err);
|
|
951
|
+
} finally {
|
|
952
|
+
_iterator3.f();
|
|
416
953
|
}
|
|
417
954
|
|
|
418
|
-
|
|
419
|
-
|
|
955
|
+
var valueEncoding = Util.getThriftEnum(_parquetThrift.Encoding, (_header$data_page_hea4 = header.data_page_header) === null || _header$data_page_hea4 === void 0 ? void 0 : _header$data_page_hea4.encoding);
|
|
956
|
+
var values = decodeValues(column.primitiveType, valueEncoding, dataCursor, valueCountNonNull, {
|
|
420
957
|
typeLength: column.typeLength,
|
|
421
958
|
bitWidth: column.typeLength
|
|
422
959
|
});
|
|
423
960
|
return {
|
|
424
961
|
dlevels: dLevels,
|
|
425
962
|
rlevels: rLevels,
|
|
426
|
-
values,
|
|
963
|
+
values: values,
|
|
427
964
|
count: valueCount
|
|
428
965
|
};
|
|
429
966
|
}
|
|
@@ -431,11 +968,11 @@ function decodeDataPage(cursor, header, column, compression) {
|
|
|
431
968
|
function decodeDataPageV2(cursor, header, column, compression) {
|
|
432
969
|
var _header$data_page_hea5, _header$data_page_hea6, _header$data_page_hea7, _header$data_page_hea8;
|
|
433
970
|
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
971
|
+
var cursorEnd = cursor.offset + header.compressed_page_size;
|
|
972
|
+
var valueCount = (_header$data_page_hea5 = header.data_page_header_v2) === null || _header$data_page_hea5 === void 0 ? void 0 : _header$data_page_hea5.num_values;
|
|
973
|
+
var valueCountNonNull = valueCount - ((_header$data_page_hea6 = header.data_page_header_v2) === null || _header$data_page_hea6 === void 0 ? void 0 : _header$data_page_hea6.num_nulls);
|
|
974
|
+
var valueEncoding = Util.getThriftEnum(_parquetThrift.Encoding, (_header$data_page_hea7 = header.data_page_header_v2) === null || _header$data_page_hea7 === void 0 ? void 0 : _header$data_page_hea7.encoding);
|
|
975
|
+
var rLevels = new Array(valueCount);
|
|
439
976
|
|
|
440
977
|
if (column.rLevelMax > 0) {
|
|
441
978
|
rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount, {
|
|
@@ -446,7 +983,7 @@ function decodeDataPageV2(cursor, header, column, compression) {
|
|
|
446
983
|
rLevels.fill(0);
|
|
447
984
|
}
|
|
448
985
|
|
|
449
|
-
|
|
986
|
+
var dLevels = new Array(valueCount);
|
|
450
987
|
|
|
451
988
|
if (column.dLevelMax > 0) {
|
|
452
989
|
dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount, {
|
|
@@ -457,10 +994,10 @@ function decodeDataPageV2(cursor, header, column, compression) {
|
|
|
457
994
|
dLevels.fill(0);
|
|
458
995
|
}
|
|
459
996
|
|
|
460
|
-
|
|
997
|
+
var valuesBufCursor = cursor;
|
|
461
998
|
|
|
462
999
|
if ((_header$data_page_hea8 = header.data_page_header_v2) !== null && _header$data_page_hea8 !== void 0 && _header$data_page_hea8.is_compressed) {
|
|
463
|
-
|
|
1000
|
+
var valuesBuf = Compression.inflate(compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
|
|
464
1001
|
valuesBufCursor = {
|
|
465
1002
|
buffer: valuesBuf,
|
|
466
1003
|
offset: 0,
|
|
@@ -469,27 +1006,27 @@ function decodeDataPageV2(cursor, header, column, compression) {
|
|
|
469
1006
|
cursor.offset = cursorEnd;
|
|
470
1007
|
}
|
|
471
1008
|
|
|
472
|
-
|
|
1009
|
+
var values = decodeValues(column.primitiveType, valueEncoding, valuesBufCursor, valueCountNonNull, {
|
|
473
1010
|
typeLength: column.typeLength,
|
|
474
1011
|
bitWidth: column.typeLength
|
|
475
1012
|
});
|
|
476
1013
|
return {
|
|
477
1014
|
dlevels: dLevels,
|
|
478
1015
|
rlevels: rLevels,
|
|
479
|
-
values,
|
|
1016
|
+
values: values,
|
|
480
1017
|
count: valueCount
|
|
481
1018
|
};
|
|
482
1019
|
}
|
|
483
1020
|
|
|
484
1021
|
function decodeSchema(schemaElements, offset, len) {
|
|
485
|
-
|
|
486
|
-
|
|
1022
|
+
var schema = {};
|
|
1023
|
+
var next = offset;
|
|
487
1024
|
|
|
488
|
-
for (
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
1025
|
+
for (var i = 0; i < len; i++) {
|
|
1026
|
+
var schemaElement = schemaElements[next];
|
|
1027
|
+
var repetitionType = next > 0 ? Util.getThriftEnum(_parquetThrift.FieldRepetitionType, schemaElement.repetition_type) : 'ROOT';
|
|
1028
|
+
var optional = false;
|
|
1029
|
+
var repeated = false;
|
|
493
1030
|
|
|
494
1031
|
switch (repetitionType) {
|
|
495
1032
|
case 'REQUIRED':
|
|
@@ -508,15 +1045,15 @@ function decodeSchema(schemaElements, offset, len) {
|
|
|
508
1045
|
}
|
|
509
1046
|
|
|
510
1047
|
if (schemaElement.num_children > 0) {
|
|
511
|
-
|
|
1048
|
+
var res = decodeSchema(schemaElements, next + 1, schemaElement.num_children);
|
|
512
1049
|
next = res.next;
|
|
513
1050
|
schema[schemaElement.name] = {
|
|
514
|
-
optional,
|
|
515
|
-
repeated,
|
|
1051
|
+
optional: optional,
|
|
1052
|
+
repeated: repeated,
|
|
516
1053
|
fields: res.schema
|
|
517
1054
|
};
|
|
518
1055
|
} else {
|
|
519
|
-
|
|
1056
|
+
var logicalType = Util.getThriftEnum(_parquetThrift.Type, schemaElement.type);
|
|
520
1057
|
|
|
521
1058
|
if (schemaElement.converted_type) {
|
|
522
1059
|
logicalType = Util.getThriftEnum(_parquetThrift.ConvertedType, schemaElement.converted_type);
|
|
@@ -525,17 +1062,17 @@ function decodeSchema(schemaElements, offset, len) {
|
|
|
525
1062
|
schema[schemaElement.name] = {
|
|
526
1063
|
type: logicalType,
|
|
527
1064
|
typeLength: schemaElement.type_length,
|
|
528
|
-
optional,
|
|
529
|
-
repeated
|
|
1065
|
+
optional: optional,
|
|
1066
|
+
repeated: repeated
|
|
530
1067
|
};
|
|
531
1068
|
next++;
|
|
532
1069
|
}
|
|
533
1070
|
}
|
|
534
1071
|
|
|
535
1072
|
return {
|
|
536
|
-
schema,
|
|
537
|
-
offset,
|
|
538
|
-
next
|
|
1073
|
+
schema: schema,
|
|
1074
|
+
offset: offset,
|
|
1075
|
+
next: next
|
|
539
1076
|
};
|
|
540
1077
|
}
|
|
541
1078
|
//# sourceMappingURL=reader.js.map
|