@loaders.gl/parquet 3.0.13 → 3.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/dist/dist.es5.min.js +1 -1
  2. package/dist/dist.es5.min.js.map +1 -1
  3. package/dist/dist.min.js +1 -1
  4. package/dist/dist.min.js.map +1 -1
  5. package/dist/es5/bundle.js +2 -2
  6. package/dist/es5/bundle.js.map +1 -1
  7. package/dist/es5/index.js +9 -9
  8. package/dist/es5/parquet-loader.js +70 -19
  9. package/dist/es5/parquet-loader.js.map +1 -1
  10. package/dist/es5/parquet-writer.js +4 -4
  11. package/dist/es5/parquet-writer.js.map +1 -1
  12. package/dist/es5/parquetjs/codecs/index.js +6 -4
  13. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  14. package/dist/es5/parquetjs/codecs/plain.js +43 -41
  15. package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
  16. package/dist/es5/parquetjs/codecs/rle.js +35 -25
  17. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  18. package/dist/es5/parquetjs/compression.js +9 -7
  19. package/dist/es5/parquetjs/compression.js.map +1 -1
  20. package/dist/es5/parquetjs/file.js +15 -15
  21. package/dist/es5/parquetjs/file.js.map +1 -1
  22. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
  23. package/dist/es5/parquetjs/parquet-thrift/BsonType.js +45 -31
  24. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  25. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +152 -141
  26. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  27. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +160 -147
  28. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  29. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +259 -248
  30. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  31. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +79 -67
  32. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  33. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
  34. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +1 -1
  35. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +124 -113
  36. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  37. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +169 -158
  38. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  39. package/dist/es5/parquetjs/parquet-thrift/DateType.js +45 -31
  40. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
  41. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +79 -68
  42. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  43. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +94 -83
  44. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  45. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +1 -1
  46. package/dist/es5/parquetjs/parquet-thrift/EnumType.js +45 -31
  47. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  48. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
  49. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +182 -170
  50. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  51. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +45 -31
  52. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  53. package/dist/es5/parquetjs/parquet-thrift/IntType.js +79 -68
  54. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
  55. package/dist/es5/parquetjs/parquet-thrift/JsonType.js +45 -31
  56. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  57. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +79 -68
  58. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  59. package/dist/es5/parquetjs/parquet-thrift/ListType.js +45 -31
  60. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
  61. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +343 -319
  62. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  63. package/dist/es5/parquetjs/parquet-thrift/MapType.js +45 -31
  64. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
  65. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +45 -31
  66. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  67. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +45 -31
  68. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  69. package/dist/es5/parquetjs/parquet-thrift/NullType.js +45 -31
  70. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
  71. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +75 -64
  72. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  73. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +94 -83
  74. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  75. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +169 -158
  76. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  77. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +94 -83
  78. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  79. package/dist/es5/parquetjs/parquet-thrift/PageType.js +1 -1
  80. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +124 -113
  81. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  82. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +199 -188
  83. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  84. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +94 -83
  85. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  86. package/dist/es5/parquetjs/parquet-thrift/Statistics.js +135 -124
  87. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  88. package/dist/es5/parquetjs/parquet-thrift/StringType.js +45 -31
  89. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
  90. package/dist/es5/parquetjs/parquet-thrift/TimeType.js +79 -68
  91. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  92. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +101 -88
  93. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  94. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +79 -68
  95. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  96. package/dist/es5/parquetjs/parquet-thrift/Type.js +1 -1
  97. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +45 -31
  98. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  99. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +45 -31
  100. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  101. package/dist/es5/parquetjs/parquet-thrift/index.js +43 -43
  102. package/dist/es5/parquetjs/reader.js +813 -276
  103. package/dist/es5/parquetjs/reader.js.map +1 -1
  104. package/dist/es5/parquetjs/schema/declare.js +11 -9
  105. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  106. package/dist/es5/parquetjs/schema/schema.js +87 -73
  107. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  108. package/dist/es5/parquetjs/schema/shred.js +95 -55
  109. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  110. package/dist/es5/parquetjs/schema/types.js +25 -25
  111. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  112. package/dist/es5/parquetjs/util.js +71 -39
  113. package/dist/es5/parquetjs/util.js.map +1 -1
  114. package/dist/es5/parquetjs/writer.js +467 -200
  115. package/dist/es5/parquetjs/writer.js.map +1 -1
  116. package/dist/esm/parquet-loader.js +1 -1
  117. package/dist/esm/parquet-writer.js +1 -1
  118. package/dist/parquet-worker.js +1 -1
  119. package/dist/parquet-worker.js.map +1 -1
  120. package/package.json +4 -4
@@ -2,11 +2,21 @@
2
2
 
3
3
  var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
4
 
5
+ var _typeof = require("@babel/runtime/helpers/typeof");
6
+
5
7
  Object.defineProperty(exports, "__esModule", {
6
8
  value: true
7
9
  });
8
10
  exports.ParquetEnvelopeReader = exports.ParquetReader = exports.ParquetCursor = void 0;
9
11
 
12
+ var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
13
+
14
+ var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
15
+
16
+ var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
17
+
18
+ var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
19
+
10
20
  var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
11
21
 
12
22
  var _codecs = require("./codecs");
@@ -21,20 +31,27 @@ var _parquetThrift = require("./parquet-thrift");
21
31
 
22
32
  var Util = _interopRequireWildcard(require("./util"));
23
33
 
24
- let _Symbol$asyncIterator, _Symbol$asyncIterator2;
34
+ var _Symbol$asyncIterator, _Symbol$asyncIterator2;
35
+
36
+ function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
37
+
38
+ function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
25
39
 
26
- function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function (nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
40
+ function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
27
41
 
28
- function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || typeof obj !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
42
+ function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
29
43
 
30
- const PARQUET_MAGIC = 'PAR1';
31
- const PARQUET_VERSION = 1;
32
- const PARQUET_RDLVL_TYPE = 'INT32';
33
- const PARQUET_RDLVL_ENCODING = 'RLE';
44
+ function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
45
+
46
+ var PARQUET_MAGIC = 'PAR1';
47
+ var PARQUET_VERSION = 1;
48
+ var PARQUET_RDLVL_TYPE = 'INT32';
49
+ var PARQUET_RDLVL_ENCODING = 'RLE';
34
50
  _Symbol$asyncIterator = Symbol.asyncIterator;
35
51
 
36
- class ParquetCursor {
37
- constructor(metadata, envelopeReader, schema, columnList) {
52
+ var ParquetCursor = function () {
53
+ function ParquetCursor(metadata, envelopeReader, schema, columnList) {
54
+ (0, _classCallCheck2.default)(this, ParquetCursor);
38
55
  (0, _defineProperty2.default)(this, "metadata", void 0);
39
56
  (0, _defineProperty2.default)(this, "envelopeReader", void 0);
40
57
  (0, _defineProperty2.default)(this, "schema", void 0);
@@ -49,118 +66,184 @@ class ParquetCursor {
49
66
  this.rowGroupIndex = 0;
50
67
  }
51
68
 
52
- async next() {
53
- if (this.rowGroup.length === 0) {
54
- if (this.rowGroupIndex >= this.metadata.row_groups.length) {
55
- return null;
69
+ (0, _createClass2.default)(ParquetCursor, [{
70
+ key: "next",
71
+ value: function () {
72
+ var _next = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee() {
73
+ var rowBuffer;
74
+ return _regenerator.default.wrap(function _callee$(_context) {
75
+ while (1) {
76
+ switch (_context.prev = _context.next) {
77
+ case 0:
78
+ if (!(this.rowGroup.length === 0)) {
79
+ _context.next = 8;
80
+ break;
81
+ }
82
+
83
+ if (!(this.rowGroupIndex >= this.metadata.row_groups.length)) {
84
+ _context.next = 3;
85
+ break;
86
+ }
87
+
88
+ return _context.abrupt("return", null);
89
+
90
+ case 3:
91
+ _context.next = 5;
92
+ return this.envelopeReader.readRowGroup(this.schema, this.metadata.row_groups[this.rowGroupIndex], this.columnList);
93
+
94
+ case 5:
95
+ rowBuffer = _context.sent;
96
+ this.rowGroup = Shred.materializeRecords(this.schema, rowBuffer);
97
+ this.rowGroupIndex++;
98
+
99
+ case 8:
100
+ return _context.abrupt("return", this.rowGroup.shift());
101
+
102
+ case 9:
103
+ case "end":
104
+ return _context.stop();
105
+ }
106
+ }
107
+ }, _callee, this);
108
+ }));
109
+
110
+ function next() {
111
+ return _next.apply(this, arguments);
56
112
  }
57
113
 
58
- const rowBuffer = await this.envelopeReader.readRowGroup(this.schema, this.metadata.row_groups[this.rowGroupIndex], this.columnList);
59
- this.rowGroup = Shred.materializeRecords(this.schema, rowBuffer);
60
- this.rowGroupIndex++;
114
+ return next;
115
+ }()
116
+ }, {
117
+ key: "rewind",
118
+ value: function rewind() {
119
+ this.rowGroup = [];
120
+ this.rowGroupIndex = 0;
61
121
  }
62
-
63
- return this.rowGroup.shift();
64
- }
65
-
66
- rewind() {
67
- this.rowGroup = [];
68
- this.rowGroupIndex = 0;
69
- }
70
-
71
- [_Symbol$asyncIterator]() {
72
- let done = false;
73
- return {
74
- next: async () => {
75
- if (done) {
76
- return {
77
- done,
78
- value: null
79
- };
80
- }
81
-
82
- const value = await this.next();
83
-
84
- if (value === null) {
85
- return {
86
- done: true,
87
- value
88
- };
89
- }
90
-
91
- return {
92
- done: false,
93
- value
94
- };
95
- },
96
- return: async () => {
97
- done = true;
98
- return {
99
- done,
100
- value: null
101
- };
102
- },
103
- throw: async () => {
104
- done = true;
105
- return {
106
- done: true,
107
- value: null
108
- };
109
- }
110
- };
111
- }
112
-
113
- }
122
+ }, {
123
+ key: _Symbol$asyncIterator,
124
+ value: function value() {
125
+ var _this = this;
126
+
127
+ var done = false;
128
+ return {
129
+ next: function () {
130
+ var _next2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2() {
131
+ var value;
132
+ return _regenerator.default.wrap(function _callee2$(_context2) {
133
+ while (1) {
134
+ switch (_context2.prev = _context2.next) {
135
+ case 0:
136
+ if (!done) {
137
+ _context2.next = 2;
138
+ break;
139
+ }
140
+
141
+ return _context2.abrupt("return", {
142
+ done: done,
143
+ value: null
144
+ });
145
+
146
+ case 2:
147
+ _context2.next = 4;
148
+ return _this.next();
149
+
150
+ case 4:
151
+ value = _context2.sent;
152
+
153
+ if (!(value === null)) {
154
+ _context2.next = 7;
155
+ break;
156
+ }
157
+
158
+ return _context2.abrupt("return", {
159
+ done: true,
160
+ value: value
161
+ });
162
+
163
+ case 7:
164
+ return _context2.abrupt("return", {
165
+ done: false,
166
+ value: value
167
+ });
168
+
169
+ case 8:
170
+ case "end":
171
+ return _context2.stop();
172
+ }
173
+ }
174
+ }, _callee2);
175
+ }));
176
+
177
+ function next() {
178
+ return _next2.apply(this, arguments);
179
+ }
180
+
181
+ return next;
182
+ }(),
183
+ return: function () {
184
+ var _return2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3() {
185
+ return _regenerator.default.wrap(function _callee3$(_context3) {
186
+ while (1) {
187
+ switch (_context3.prev = _context3.next) {
188
+ case 0:
189
+ done = true;
190
+ return _context3.abrupt("return", {
191
+ done: done,
192
+ value: null
193
+ });
194
+
195
+ case 2:
196
+ case "end":
197
+ return _context3.stop();
198
+ }
199
+ }
200
+ }, _callee3);
201
+ }));
202
+
203
+ function _return() {
204
+ return _return2.apply(this, arguments);
205
+ }
206
+
207
+ return _return;
208
+ }(),
209
+ throw: function () {
210
+ var _throw2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4() {
211
+ return _regenerator.default.wrap(function _callee4$(_context4) {
212
+ while (1) {
213
+ switch (_context4.prev = _context4.next) {
214
+ case 0:
215
+ done = true;
216
+ return _context4.abrupt("return", {
217
+ done: true,
218
+ value: null
219
+ });
220
+
221
+ case 2:
222
+ case "end":
223
+ return _context4.stop();
224
+ }
225
+ }
226
+ }, _callee4);
227
+ }));
228
+
229
+ function _throw() {
230
+ return _throw2.apply(this, arguments);
231
+ }
232
+
233
+ return _throw;
234
+ }()
235
+ };
236
+ }
237
+ }]);
238
+ return ParquetCursor;
239
+ }();
114
240
 
115
241
  exports.ParquetCursor = ParquetCursor;
116
242
  _Symbol$asyncIterator2 = Symbol.asyncIterator;
117
243
 
118
- class ParquetReader {
119
- static async openFile(filePath) {
120
- const envelopeReader = await ParquetEnvelopeReader.openFile(filePath);
121
-
122
- try {
123
- await envelopeReader.readHeader();
124
- const metadata = await envelopeReader.readFooter();
125
- return new ParquetReader(metadata, envelopeReader);
126
- } catch (err) {
127
- await envelopeReader.close();
128
- throw err;
129
- }
130
- }
131
-
132
- static async openBuffer(buffer) {
133
- const envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer);
134
-
135
- try {
136
- await envelopeReader.readHeader();
137
- const metadata = await envelopeReader.readFooter();
138
- return new ParquetReader(metadata, envelopeReader);
139
- } catch (err) {
140
- await envelopeReader.close();
141
- throw err;
142
- }
143
- }
144
-
145
- static async openArrayBuffer(arrayBuffer) {
146
- const readFn = async (start, length) => Buffer.from(arrayBuffer, start, length);
147
-
148
- const closeFn = async () => {};
149
-
150
- const size = arrayBuffer.byteLength;
151
- const envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);
152
-
153
- try {
154
- await envelopeReader.readHeader();
155
- const metadata = await envelopeReader.readFooter();
156
- return new ParquetReader(metadata, envelopeReader);
157
- } catch (err) {
158
- await envelopeReader.close();
159
- throw err;
160
- }
161
- }
162
-
163
- constructor(metadata, envelopeReader) {
244
+ var ParquetReader = function () {
245
+ function ParquetReader(metadata, envelopeReader) {
246
+ (0, _classCallCheck2.default)(this, ParquetReader);
164
247
  (0, _defineProperty2.default)(this, "metadata", void 0);
165
248
  (0, _defineProperty2.default)(this, "envelopeReader", void 0);
166
249
  (0, _defineProperty2.default)(this, "schema", void 0);
@@ -171,70 +254,279 @@ class ParquetReader {
171
254
 
172
255
  this.metadata = metadata;
173
256
  this.envelopeReader = envelopeReader;
174
- const root = this.metadata.schema[0];
175
- const {
176
- schema
177
- } = decodeSchema(this.metadata.schema, 1, root.num_children);
178
- this.schema = new _schema.ParquetSchema(schema);
179
- }
180
-
181
- async close() {
182
- await this.envelopeReader.close();
183
- }
257
+ var root = this.metadata.schema[0];
184
258
 
185
- getCursor(columnList) {
186
- if (!columnList) {
187
- columnList = [];
188
- }
259
+ var _decodeSchema = decodeSchema(this.metadata.schema, 1, root.num_children),
260
+ schema = _decodeSchema.schema;
189
261
 
190
- columnList = columnList.map(x => Array.isArray(x) ? x : [x]);
191
- return new ParquetCursor(this.metadata, this.envelopeReader, this.schema, columnList);
262
+ this.schema = new _schema.ParquetSchema(schema);
192
263
  }
193
264
 
194
- getRowCount() {
195
- return Number(this.metadata.num_rows);
196
- }
265
+ (0, _createClass2.default)(ParquetReader, [{
266
+ key: "close",
267
+ value: function () {
268
+ var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5() {
269
+ return _regenerator.default.wrap(function _callee5$(_context5) {
270
+ while (1) {
271
+ switch (_context5.prev = _context5.next) {
272
+ case 0:
273
+ _context5.next = 2;
274
+ return this.envelopeReader.close();
275
+
276
+ case 2:
277
+ case "end":
278
+ return _context5.stop();
279
+ }
280
+ }
281
+ }, _callee5, this);
282
+ }));
283
+
284
+ function close() {
285
+ return _close.apply(this, arguments);
286
+ }
197
287
 
198
- getSchema() {
199
- return this.schema;
200
- }
288
+ return close;
289
+ }()
290
+ }, {
291
+ key: "getCursor",
292
+ value: function getCursor(columnList) {
293
+ if (!columnList) {
294
+ columnList = [];
295
+ }
201
296
 
202
- getMetadata() {
203
- const md = {};
297
+ columnList = columnList.map(function (x) {
298
+ return Array.isArray(x) ? x : [x];
299
+ });
300
+ return new ParquetCursor(this.metadata, this.envelopeReader, this.schema, columnList);
301
+ }
302
+ }, {
303
+ key: "getRowCount",
304
+ value: function getRowCount() {
305
+ return Number(this.metadata.num_rows);
306
+ }
307
+ }, {
308
+ key: "getSchema",
309
+ value: function getSchema() {
310
+ return this.schema;
311
+ }
312
+ }, {
313
+ key: "getMetadata",
314
+ value: function getMetadata() {
315
+ var md = {};
316
+
317
+ var _iterator = _createForOfIteratorHelper(this.metadata.key_value_metadata),
318
+ _step;
319
+
320
+ try {
321
+ for (_iterator.s(); !(_step = _iterator.n()).done;) {
322
+ var kv = _step.value;
323
+ md[kv.key] = kv.value;
324
+ }
325
+ } catch (err) {
326
+ _iterator.e(err);
327
+ } finally {
328
+ _iterator.f();
329
+ }
204
330
 
205
- for (const kv of this.metadata.key_value_metadata) {
206
- md[kv.key] = kv.value;
331
+ return md;
207
332
  }
333
+ }, {
334
+ key: _Symbol$asyncIterator2,
335
+ value: function value() {
336
+ return this.getCursor()[Symbol.asyncIterator]();
337
+ }
338
+ }], [{
339
+ key: "openFile",
340
+ value: function () {
341
+ var _openFile = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6(filePath) {
342
+ var envelopeReader, metadata;
343
+ return _regenerator.default.wrap(function _callee6$(_context6) {
344
+ while (1) {
345
+ switch (_context6.prev = _context6.next) {
346
+ case 0:
347
+ _context6.next = 2;
348
+ return ParquetEnvelopeReader.openFile(filePath);
349
+
350
+ case 2:
351
+ envelopeReader = _context6.sent;
352
+ _context6.prev = 3;
353
+ _context6.next = 6;
354
+ return envelopeReader.readHeader();
355
+
356
+ case 6:
357
+ _context6.next = 8;
358
+ return envelopeReader.readFooter();
359
+
360
+ case 8:
361
+ metadata = _context6.sent;
362
+ return _context6.abrupt("return", new ParquetReader(metadata, envelopeReader));
363
+
364
+ case 12:
365
+ _context6.prev = 12;
366
+ _context6.t0 = _context6["catch"](3);
367
+ _context6.next = 16;
368
+ return envelopeReader.close();
369
+
370
+ case 16:
371
+ throw _context6.t0;
372
+
373
+ case 17:
374
+ case "end":
375
+ return _context6.stop();
376
+ }
377
+ }
378
+ }, _callee6, null, [[3, 12]]);
379
+ }));
380
+
381
+ function openFile(_x) {
382
+ return _openFile.apply(this, arguments);
383
+ }
208
384
 
209
- return md;
210
- }
385
+ return openFile;
386
+ }()
387
+ }, {
388
+ key: "openBuffer",
389
+ value: function () {
390
+ var _openBuffer = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7(buffer) {
391
+ var envelopeReader, metadata;
392
+ return _regenerator.default.wrap(function _callee7$(_context7) {
393
+ while (1) {
394
+ switch (_context7.prev = _context7.next) {
395
+ case 0:
396
+ _context7.next = 2;
397
+ return ParquetEnvelopeReader.openBuffer(buffer);
398
+
399
+ case 2:
400
+ envelopeReader = _context7.sent;
401
+ _context7.prev = 3;
402
+ _context7.next = 6;
403
+ return envelopeReader.readHeader();
404
+
405
+ case 6:
406
+ _context7.next = 8;
407
+ return envelopeReader.readFooter();
408
+
409
+ case 8:
410
+ metadata = _context7.sent;
411
+ return _context7.abrupt("return", new ParquetReader(metadata, envelopeReader));
412
+
413
+ case 12:
414
+ _context7.prev = 12;
415
+ _context7.t0 = _context7["catch"](3);
416
+ _context7.next = 16;
417
+ return envelopeReader.close();
418
+
419
+ case 16:
420
+ throw _context7.t0;
421
+
422
+ case 17:
423
+ case "end":
424
+ return _context7.stop();
425
+ }
426
+ }
427
+ }, _callee7, null, [[3, 12]]);
428
+ }));
429
+
430
+ function openBuffer(_x2) {
431
+ return _openBuffer.apply(this, arguments);
432
+ }
211
433
 
212
- [_Symbol$asyncIterator2]() {
213
- return this.getCursor()[Symbol.asyncIterator]();
214
- }
434
+ return openBuffer;
435
+ }()
436
+ }, {
437
+ key: "openArrayBuffer",
438
+ value: function () {
439
+ var _openArrayBuffer = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee10(arrayBuffer) {
440
+ var readFn, closeFn, size, envelopeReader, metadata;
441
+ return _regenerator.default.wrap(function _callee10$(_context10) {
442
+ while (1) {
443
+ switch (_context10.prev = _context10.next) {
444
+ case 0:
445
+ readFn = function () {
446
+ var _ref = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8(start, length) {
447
+ return _regenerator.default.wrap(function _callee8$(_context8) {
448
+ while (1) {
449
+ switch (_context8.prev = _context8.next) {
450
+ case 0:
451
+ return _context8.abrupt("return", Buffer.from(arrayBuffer, start, length));
452
+
453
+ case 1:
454
+ case "end":
455
+ return _context8.stop();
456
+ }
457
+ }
458
+ }, _callee8);
459
+ }));
460
+
461
+ return function readFn(_x4, _x5) {
462
+ return _ref.apply(this, arguments);
463
+ };
464
+ }();
465
+
466
+ closeFn = function () {
467
+ var _ref2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9() {
468
+ return _regenerator.default.wrap(function _callee9$(_context9) {
469
+ while (1) {
470
+ switch (_context9.prev = _context9.next) {
471
+ case 0:
472
+ case "end":
473
+ return _context9.stop();
474
+ }
475
+ }
476
+ }, _callee9);
477
+ }));
478
+
479
+ return function closeFn() {
480
+ return _ref2.apply(this, arguments);
481
+ };
482
+ }();
483
+
484
+ size = arrayBuffer.byteLength;
485
+ envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);
486
+ _context10.prev = 4;
487
+ _context10.next = 7;
488
+ return envelopeReader.readHeader();
489
+
490
+ case 7:
491
+ _context10.next = 9;
492
+ return envelopeReader.readFooter();
493
+
494
+ case 9:
495
+ metadata = _context10.sent;
496
+ return _context10.abrupt("return", new ParquetReader(metadata, envelopeReader));
497
+
498
+ case 13:
499
+ _context10.prev = 13;
500
+ _context10.t0 = _context10["catch"](4);
501
+ _context10.next = 17;
502
+ return envelopeReader.close();
503
+
504
+ case 17:
505
+ throw _context10.t0;
506
+
507
+ case 18:
508
+ case "end":
509
+ return _context10.stop();
510
+ }
511
+ }
512
+ }, _callee10, null, [[4, 13]]);
513
+ }));
514
+
515
+ function openArrayBuffer(_x3) {
516
+ return _openArrayBuffer.apply(this, arguments);
517
+ }
215
518
 
216
- }
519
+ return openArrayBuffer;
520
+ }()
521
+ }]);
522
+ return ParquetReader;
523
+ }();
217
524
 
218
525
  exports.ParquetReader = ParquetReader;
219
526
 
220
- class ParquetEnvelopeReader {
221
- static async openFile(filePath) {
222
- const fileStat = await Util.fstat(filePath);
223
- const fileDescriptor = await Util.fopen(filePath);
224
- const readFn = Util.fread.bind(undefined, fileDescriptor);
225
- const closeFn = Util.fclose.bind(undefined, fileDescriptor);
226
- return new ParquetEnvelopeReader(readFn, closeFn, fileStat.size);
227
- }
228
-
229
- static async openBuffer(buffer) {
230
- const readFn = (position, length) => Promise.resolve(buffer.slice(position, position + length));
231
-
232
- const closeFn = () => Promise.resolve();
233
-
234
- return new ParquetEnvelopeReader(readFn, closeFn, buffer.length);
235
- }
236
-
237
- constructor(read, close, fileSize) {
527
+ var ParquetEnvelopeReader = function () {
528
+ function ParquetEnvelopeReader(read, close, fileSize) {
529
+ (0, _classCallCheck2.default)(this, ParquetEnvelopeReader);
238
530
  (0, _defineProperty2.default)(this, "read", void 0);
239
531
  (0, _defineProperty2.default)(this, "close", void 0);
240
532
  (0, _defineProperty2.default)(this, "fileSize", void 0);
@@ -243,74 +535,308 @@ class ParquetEnvelopeReader {
243
535
  this.fileSize = fileSize;
244
536
  }
245
537
 
246
- async readHeader() {
247
- const buf = await this.read(0, PARQUET_MAGIC.length);
248
-
249
- if (buf.toString() !== PARQUET_MAGIC) {
250
- throw new Error('not valid parquet file');
251
- }
252
- }
253
-
254
- async readRowGroup(schema, rowGroup, columnList) {
255
- const buffer = {
256
- rowCount: Number(rowGroup.num_rows),
257
- columnData: {}
258
- };
259
-
260
- for (const colChunk of rowGroup.columns) {
261
- const colMetadata = colChunk.meta_data;
262
- const colKey = colMetadata === null || colMetadata === void 0 ? void 0 : colMetadata.path_in_schema;
263
-
264
- if (columnList.length > 0 && Util.fieldIndexOf(columnList, colKey) < 0) {
265
- continue;
538
+ (0, _createClass2.default)(ParquetEnvelopeReader, [{
539
+ key: "readHeader",
540
+ value: function () {
541
+ var _readHeader = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee11() {
542
+ var buf;
543
+ return _regenerator.default.wrap(function _callee11$(_context11) {
544
+ while (1) {
545
+ switch (_context11.prev = _context11.next) {
546
+ case 0:
547
+ _context11.next = 2;
548
+ return this.read(0, PARQUET_MAGIC.length);
549
+
550
+ case 2:
551
+ buf = _context11.sent;
552
+
553
+ if (!(buf.toString() !== PARQUET_MAGIC)) {
554
+ _context11.next = 5;
555
+ break;
556
+ }
557
+
558
+ throw new Error('not valid parquet file');
559
+
560
+ case 5:
561
+ case "end":
562
+ return _context11.stop();
563
+ }
564
+ }
565
+ }, _callee11, this);
566
+ }));
567
+
568
+ function readHeader() {
569
+ return _readHeader.apply(this, arguments);
266
570
  }
267
571
 
268
- buffer.columnData[colKey.join()] = await this.readColumnChunk(schema, colChunk);
269
- }
270
-
271
- return buffer;
272
- }
273
-
274
- async readColumnChunk(schema, colChunk) {
275
- var _colChunk$meta_data, _colChunk$meta_data2, _colChunk$meta_data3, _colChunk$meta_data4, _colChunk$meta_data5;
276
-
277
- if (colChunk.file_path !== undefined && colChunk.file_path !== null) {
278
- throw new Error('external references are not supported');
279
- }
280
-
281
- const field = schema.findField((_colChunk$meta_data = colChunk.meta_data) === null || _colChunk$meta_data === void 0 ? void 0 : _colChunk$meta_data.path_in_schema);
282
- const type = Util.getThriftEnum(_parquetThrift.Type, (_colChunk$meta_data2 = colChunk.meta_data) === null || _colChunk$meta_data2 === void 0 ? void 0 : _colChunk$meta_data2.type);
283
- if (type !== field.primitiveType) throw new Error("chunk type not matching schema: ".concat(type));
284
- const compression = Util.getThriftEnum(_parquetThrift.CompressionCodec, (_colChunk$meta_data3 = colChunk.meta_data) === null || _colChunk$meta_data3 === void 0 ? void 0 : _colChunk$meta_data3.codec);
285
- const pagesOffset = Number((_colChunk$meta_data4 = colChunk.meta_data) === null || _colChunk$meta_data4 === void 0 ? void 0 : _colChunk$meta_data4.data_page_offset);
286
- const pagesSize = Number((_colChunk$meta_data5 = colChunk.meta_data) === null || _colChunk$meta_data5 === void 0 ? void 0 : _colChunk$meta_data5.total_compressed_size);
287
- const pagesBuf = await this.read(pagesOffset, pagesSize);
288
- return decodeDataPages(pagesBuf, field, compression);
289
- }
290
-
291
- async readFooter() {
292
- const trailerLen = PARQUET_MAGIC.length + 4;
293
- const trailerBuf = await this.read(this.fileSize - trailerLen, trailerLen);
572
+ return readHeader;
573
+ }()
574
+ }, {
575
+ key: "readRowGroup",
576
+ value: function () {
577
+ var _readRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee12(schema, rowGroup, columnList) {
578
+ var buffer, _iterator2, _step2, colChunk, colMetadata, colKey;
579
+
580
+ return _regenerator.default.wrap(function _callee12$(_context12) {
581
+ while (1) {
582
+ switch (_context12.prev = _context12.next) {
583
+ case 0:
584
+ buffer = {
585
+ rowCount: Number(rowGroup.num_rows),
586
+ columnData: {}
587
+ };
588
+ _iterator2 = _createForOfIteratorHelper(rowGroup.columns);
589
+ _context12.prev = 2;
590
+
591
+ _iterator2.s();
592
+
593
+ case 4:
594
+ if ((_step2 = _iterator2.n()).done) {
595
+ _context12.next = 15;
596
+ break;
597
+ }
598
+
599
+ colChunk = _step2.value;
600
+ colMetadata = colChunk.meta_data;
601
+ colKey = colMetadata === null || colMetadata === void 0 ? void 0 : colMetadata.path_in_schema;
602
+
603
+ if (!(columnList.length > 0 && Util.fieldIndexOf(columnList, colKey) < 0)) {
604
+ _context12.next = 10;
605
+ break;
606
+ }
607
+
608
+ return _context12.abrupt("continue", 13);
609
+
610
+ case 10:
611
+ _context12.next = 12;
612
+ return this.readColumnChunk(schema, colChunk);
613
+
614
+ case 12:
615
+ buffer.columnData[colKey.join()] = _context12.sent;
616
+
617
+ case 13:
618
+ _context12.next = 4;
619
+ break;
620
+
621
+ case 15:
622
+ _context12.next = 20;
623
+ break;
624
+
625
+ case 17:
626
+ _context12.prev = 17;
627
+ _context12.t0 = _context12["catch"](2);
628
+
629
+ _iterator2.e(_context12.t0);
630
+
631
+ case 20:
632
+ _context12.prev = 20;
633
+
634
+ _iterator2.f();
635
+
636
+ return _context12.finish(20);
637
+
638
+ case 23:
639
+ return _context12.abrupt("return", buffer);
640
+
641
+ case 24:
642
+ case "end":
643
+ return _context12.stop();
644
+ }
645
+ }
646
+ }, _callee12, this, [[2, 17, 20, 23]]);
647
+ }));
648
+
649
+ function readRowGroup(_x6, _x7, _x8) {
650
+ return _readRowGroup.apply(this, arguments);
651
+ }
294
652
 
295
- if (trailerBuf.slice(4).toString() !== PARQUET_MAGIC) {
296
- throw new Error('not a valid parquet file');
297
- }
653
+ return readRowGroup;
654
+ }()
655
+ }, {
656
+ key: "readColumnChunk",
657
+ value: function () {
658
+ var _readColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee13(schema, colChunk) {
659
+ var _colChunk$meta_data, _colChunk$meta_data2, _colChunk$meta_data3, _colChunk$meta_data4, _colChunk$meta_data5;
660
+
661
+ var field, type, compression, pagesOffset, pagesSize, pagesBuf;
662
+ return _regenerator.default.wrap(function _callee13$(_context13) {
663
+ while (1) {
664
+ switch (_context13.prev = _context13.next) {
665
+ case 0:
666
+ if (!(colChunk.file_path !== undefined && colChunk.file_path !== null)) {
667
+ _context13.next = 2;
668
+ break;
669
+ }
670
+
671
+ throw new Error('external references are not supported');
672
+
673
+ case 2:
674
+ field = schema.findField((_colChunk$meta_data = colChunk.meta_data) === null || _colChunk$meta_data === void 0 ? void 0 : _colChunk$meta_data.path_in_schema);
675
+ type = Util.getThriftEnum(_parquetThrift.Type, (_colChunk$meta_data2 = colChunk.meta_data) === null || _colChunk$meta_data2 === void 0 ? void 0 : _colChunk$meta_data2.type);
676
+
677
+ if (!(type !== field.primitiveType)) {
678
+ _context13.next = 6;
679
+ break;
680
+ }
681
+
682
+ throw new Error("chunk type not matching schema: ".concat(type));
683
+
684
+ case 6:
685
+ compression = Util.getThriftEnum(_parquetThrift.CompressionCodec, (_colChunk$meta_data3 = colChunk.meta_data) === null || _colChunk$meta_data3 === void 0 ? void 0 : _colChunk$meta_data3.codec);
686
+ pagesOffset = Number((_colChunk$meta_data4 = colChunk.meta_data) === null || _colChunk$meta_data4 === void 0 ? void 0 : _colChunk$meta_data4.data_page_offset);
687
+ pagesSize = Number((_colChunk$meta_data5 = colChunk.meta_data) === null || _colChunk$meta_data5 === void 0 ? void 0 : _colChunk$meta_data5.total_compressed_size);
688
+ _context13.next = 11;
689
+ return this.read(pagesOffset, pagesSize);
690
+
691
+ case 11:
692
+ pagesBuf = _context13.sent;
693
+ return _context13.abrupt("return", decodeDataPages(pagesBuf, field, compression));
694
+
695
+ case 13:
696
+ case "end":
697
+ return _context13.stop();
698
+ }
699
+ }
700
+ }, _callee13, this);
701
+ }));
702
+
703
+ function readColumnChunk(_x9, _x10) {
704
+ return _readColumnChunk.apply(this, arguments);
705
+ }
298
706
 
299
- const metadataSize = trailerBuf.readUInt32LE(0);
300
- const metadataOffset = this.fileSize - metadataSize - trailerLen;
707
+ return readColumnChunk;
708
+ }()
709
+ }, {
710
+ key: "readFooter",
711
+ value: function () {
712
+ var _readFooter = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee14() {
713
+ var trailerLen, trailerBuf, metadataSize, metadataOffset, metadataBuf, _Util$decodeFileMetad, metadata;
714
+
715
+ return _regenerator.default.wrap(function _callee14$(_context14) {
716
+ while (1) {
717
+ switch (_context14.prev = _context14.next) {
718
+ case 0:
719
+ trailerLen = PARQUET_MAGIC.length + 4;
720
+ _context14.next = 3;
721
+ return this.read(this.fileSize - trailerLen, trailerLen);
722
+
723
+ case 3:
724
+ trailerBuf = _context14.sent;
725
+
726
+ if (!(trailerBuf.slice(4).toString() !== PARQUET_MAGIC)) {
727
+ _context14.next = 6;
728
+ break;
729
+ }
730
+
731
+ throw new Error('not a valid parquet file');
732
+
733
+ case 6:
734
+ metadataSize = trailerBuf.readUInt32LE(0);
735
+ metadataOffset = this.fileSize - metadataSize - trailerLen;
736
+
737
+ if (!(metadataOffset < PARQUET_MAGIC.length)) {
738
+ _context14.next = 10;
739
+ break;
740
+ }
741
+
742
+ throw new Error('invalid metadata size');
743
+
744
+ case 10:
745
+ _context14.next = 12;
746
+ return this.read(metadataOffset, metadataSize);
747
+
748
+ case 12:
749
+ metadataBuf = _context14.sent;
750
+ _Util$decodeFileMetad = Util.decodeFileMetadata(metadataBuf), metadata = _Util$decodeFileMetad.metadata;
751
+ return _context14.abrupt("return", metadata);
752
+
753
+ case 15:
754
+ case "end":
755
+ return _context14.stop();
756
+ }
757
+ }
758
+ }, _callee14, this);
759
+ }));
760
+
761
+ function readFooter() {
762
+ return _readFooter.apply(this, arguments);
763
+ }
301
764
 
302
- if (metadataOffset < PARQUET_MAGIC.length) {
303
- throw new Error('invalid metadata size');
304
- }
765
+ return readFooter;
766
+ }()
767
+ }], [{
768
+ key: "openFile",
769
+ value: function () {
770
+ var _openFile2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee15(filePath) {
771
+ var fileStat, fileDescriptor, readFn, closeFn;
772
+ return _regenerator.default.wrap(function _callee15$(_context15) {
773
+ while (1) {
774
+ switch (_context15.prev = _context15.next) {
775
+ case 0:
776
+ _context15.next = 2;
777
+ return Util.fstat(filePath);
778
+
779
+ case 2:
780
+ fileStat = _context15.sent;
781
+ _context15.next = 5;
782
+ return Util.fopen(filePath);
783
+
784
+ case 5:
785
+ fileDescriptor = _context15.sent;
786
+ readFn = Util.fread.bind(undefined, fileDescriptor);
787
+ closeFn = Util.fclose.bind(undefined, fileDescriptor);
788
+ return _context15.abrupt("return", new ParquetEnvelopeReader(readFn, closeFn, fileStat.size));
789
+
790
+ case 9:
791
+ case "end":
792
+ return _context15.stop();
793
+ }
794
+ }
795
+ }, _callee15);
796
+ }));
797
+
798
+ function openFile(_x11) {
799
+ return _openFile2.apply(this, arguments);
800
+ }
305
801
 
306
- const metadataBuf = await this.read(metadataOffset, metadataSize);
307
- const {
308
- metadata
309
- } = Util.decodeFileMetadata(metadataBuf);
310
- return metadata;
311
- }
802
+ return openFile;
803
+ }()
804
+ }, {
805
+ key: "openBuffer",
806
+ value: function () {
807
+ var _openBuffer2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee16(buffer) {
808
+ var readFn, closeFn;
809
+ return _regenerator.default.wrap(function _callee16$(_context16) {
810
+ while (1) {
811
+ switch (_context16.prev = _context16.next) {
812
+ case 0:
813
+ readFn = function readFn(position, length) {
814
+ return Promise.resolve(buffer.slice(position, position + length));
815
+ };
816
+
817
+ closeFn = function closeFn() {
818
+ return Promise.resolve();
819
+ };
820
+
821
+ return _context16.abrupt("return", new ParquetEnvelopeReader(readFn, closeFn, buffer.length));
822
+
823
+ case 3:
824
+ case "end":
825
+ return _context16.stop();
826
+ }
827
+ }
828
+ }, _callee16);
829
+ }));
830
+
831
+ function openBuffer(_x12) {
832
+ return _openBuffer2.apply(this, arguments);
833
+ }
312
834
 
313
- }
835
+ return openBuffer;
836
+ }()
837
+ }]);
838
+ return ParquetEnvelopeReader;
839
+ }();
314
840
 
315
841
  exports.ParquetEnvelopeReader = ParquetEnvelopeReader;
316
842
 
@@ -323,12 +849,12 @@ function decodeValues(type, encoding, cursor, count, opts) {
323
849
  }
324
850
 
325
851
  function decodeDataPages(buffer, column, compression) {
326
- const cursor = {
327
- buffer,
852
+ var cursor = {
853
+ buffer: buffer,
328
854
  offset: 0,
329
855
  size: buffer.length
330
856
  };
331
- const data = {
857
+ var data = {
332
858
  rlevels: [],
333
859
  dlevels: [],
334
860
  values: [],
@@ -336,13 +862,13 @@ function decodeDataPages(buffer, column, compression) {
336
862
  };
337
863
 
338
864
  while (cursor.offset < cursor.size) {
339
- const {
340
- pageHeader,
341
- length
342
- } = Util.decodePageHeader(cursor.buffer);
343
- cursor.offset += length;
344
- const pageType = Util.getThriftEnum(_parquetThrift.PageType, pageHeader.type);
345
- let pageData = null;
865
+ var _Util$decodePageHeade = Util.decodePageHeader(cursor.buffer),
866
+ pageHeader = _Util$decodePageHeade.pageHeader,
867
+ _length = _Util$decodePageHeade.length;
868
+
869
+ cursor.offset += _length;
870
+ var pageType = Util.getThriftEnum(_parquetThrift.PageType, pageHeader.type);
871
+ var pageData = null;
346
872
 
347
873
  switch (pageType) {
348
874
  case 'DATA_PAGE':
@@ -369,12 +895,12 @@ function decodeDataPages(buffer, column, compression) {
369
895
  function decodeDataPage(cursor, header, column, compression) {
370
896
  var _header$data_page_hea, _header$data_page_hea2, _header$data_page_hea3, _header$data_page_hea4;
371
897
 
372
- const cursorEnd = cursor.offset + header.compressed_page_size;
373
- const valueCount = (_header$data_page_hea = header.data_page_header) === null || _header$data_page_hea === void 0 ? void 0 : _header$data_page_hea.num_values;
374
- let dataCursor = cursor;
898
+ var cursorEnd = cursor.offset + header.compressed_page_size;
899
+ var valueCount = (_header$data_page_hea = header.data_page_header) === null || _header$data_page_hea === void 0 ? void 0 : _header$data_page_hea.num_values;
900
+ var dataCursor = cursor;
375
901
 
376
902
  if (compression !== 'UNCOMPRESSED') {
377
- const valuesBuf = Compression.inflate(compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
903
+ var valuesBuf = Compression.inflate(compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
378
904
  dataCursor = {
379
905
  buffer: valuesBuf,
380
906
  offset: 0,
@@ -383,8 +909,8 @@ function decodeDataPage(cursor, header, column, compression) {
383
909
  cursor.offset = cursorEnd;
384
910
  }
385
911
 
386
- const rLevelEncoding = Util.getThriftEnum(_parquetThrift.Encoding, (_header$data_page_hea2 = header.data_page_header) === null || _header$data_page_hea2 === void 0 ? void 0 : _header$data_page_hea2.repetition_level_encoding);
387
- let rLevels = new Array(valueCount);
912
+ var rLevelEncoding = Util.getThriftEnum(_parquetThrift.Encoding, (_header$data_page_hea2 = header.data_page_header) === null || _header$data_page_hea2 === void 0 ? void 0 : _header$data_page_hea2.repetition_level_encoding);
913
+ var rLevels = new Array(valueCount);
388
914
 
389
915
  if (column.rLevelMax > 0) {
390
916
  rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount, {
@@ -395,8 +921,8 @@ function decodeDataPage(cursor, header, column, compression) {
395
921
  rLevels.fill(0);
396
922
  }
397
923
 
398
- const dLevelEncoding = Util.getThriftEnum(_parquetThrift.Encoding, (_header$data_page_hea3 = header.data_page_header) === null || _header$data_page_hea3 === void 0 ? void 0 : _header$data_page_hea3.definition_level_encoding);
399
- let dLevels = new Array(valueCount);
924
+ var dLevelEncoding = Util.getThriftEnum(_parquetThrift.Encoding, (_header$data_page_hea3 = header.data_page_header) === null || _header$data_page_hea3 === void 0 ? void 0 : _header$data_page_hea3.definition_level_encoding);
925
+ var dLevels = new Array(valueCount);
400
926
 
401
927
  if (column.dLevelMax > 0) {
402
928
  dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount, {
@@ -407,23 +933,34 @@ function decodeDataPage(cursor, header, column, compression) {
407
933
  dLevels.fill(0);
408
934
  }
409
935
 
410
- let valueCountNonNull = 0;
936
+ var valueCountNonNull = 0;
411
937
 
412
- for (const dlvl of dLevels) {
413
- if (dlvl === column.dLevelMax) {
414
- valueCountNonNull++;
938
+ var _iterator3 = _createForOfIteratorHelper(dLevels),
939
+ _step3;
940
+
941
+ try {
942
+ for (_iterator3.s(); !(_step3 = _iterator3.n()).done;) {
943
+ var dlvl = _step3.value;
944
+
945
+ if (dlvl === column.dLevelMax) {
946
+ valueCountNonNull++;
947
+ }
415
948
  }
949
+ } catch (err) {
950
+ _iterator3.e(err);
951
+ } finally {
952
+ _iterator3.f();
416
953
  }
417
954
 
418
- const valueEncoding = Util.getThriftEnum(_parquetThrift.Encoding, (_header$data_page_hea4 = header.data_page_header) === null || _header$data_page_hea4 === void 0 ? void 0 : _header$data_page_hea4.encoding);
419
- const values = decodeValues(column.primitiveType, valueEncoding, dataCursor, valueCountNonNull, {
955
+ var valueEncoding = Util.getThriftEnum(_parquetThrift.Encoding, (_header$data_page_hea4 = header.data_page_header) === null || _header$data_page_hea4 === void 0 ? void 0 : _header$data_page_hea4.encoding);
956
+ var values = decodeValues(column.primitiveType, valueEncoding, dataCursor, valueCountNonNull, {
420
957
  typeLength: column.typeLength,
421
958
  bitWidth: column.typeLength
422
959
  });
423
960
  return {
424
961
  dlevels: dLevels,
425
962
  rlevels: rLevels,
426
- values,
963
+ values: values,
427
964
  count: valueCount
428
965
  };
429
966
  }
@@ -431,11 +968,11 @@ function decodeDataPage(cursor, header, column, compression) {
431
968
  function decodeDataPageV2(cursor, header, column, compression) {
432
969
  var _header$data_page_hea5, _header$data_page_hea6, _header$data_page_hea7, _header$data_page_hea8;
433
970
 
434
- const cursorEnd = cursor.offset + header.compressed_page_size;
435
- const valueCount = (_header$data_page_hea5 = header.data_page_header_v2) === null || _header$data_page_hea5 === void 0 ? void 0 : _header$data_page_hea5.num_values;
436
- const valueCountNonNull = valueCount - ((_header$data_page_hea6 = header.data_page_header_v2) === null || _header$data_page_hea6 === void 0 ? void 0 : _header$data_page_hea6.num_nulls);
437
- const valueEncoding = Util.getThriftEnum(_parquetThrift.Encoding, (_header$data_page_hea7 = header.data_page_header_v2) === null || _header$data_page_hea7 === void 0 ? void 0 : _header$data_page_hea7.encoding);
438
- let rLevels = new Array(valueCount);
971
+ var cursorEnd = cursor.offset + header.compressed_page_size;
972
+ var valueCount = (_header$data_page_hea5 = header.data_page_header_v2) === null || _header$data_page_hea5 === void 0 ? void 0 : _header$data_page_hea5.num_values;
973
+ var valueCountNonNull = valueCount - ((_header$data_page_hea6 = header.data_page_header_v2) === null || _header$data_page_hea6 === void 0 ? void 0 : _header$data_page_hea6.num_nulls);
974
+ var valueEncoding = Util.getThriftEnum(_parquetThrift.Encoding, (_header$data_page_hea7 = header.data_page_header_v2) === null || _header$data_page_hea7 === void 0 ? void 0 : _header$data_page_hea7.encoding);
975
+ var rLevels = new Array(valueCount);
439
976
 
440
977
  if (column.rLevelMax > 0) {
441
978
  rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount, {
@@ -446,7 +983,7 @@ function decodeDataPageV2(cursor, header, column, compression) {
446
983
  rLevels.fill(0);
447
984
  }
448
985
 
449
- let dLevels = new Array(valueCount);
986
+ var dLevels = new Array(valueCount);
450
987
 
451
988
  if (column.dLevelMax > 0) {
452
989
  dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount, {
@@ -457,10 +994,10 @@ function decodeDataPageV2(cursor, header, column, compression) {
457
994
  dLevels.fill(0);
458
995
  }
459
996
 
460
- let valuesBufCursor = cursor;
997
+ var valuesBufCursor = cursor;
461
998
 
462
999
  if ((_header$data_page_hea8 = header.data_page_header_v2) !== null && _header$data_page_hea8 !== void 0 && _header$data_page_hea8.is_compressed) {
463
- const valuesBuf = Compression.inflate(compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
1000
+ var valuesBuf = Compression.inflate(compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
464
1001
  valuesBufCursor = {
465
1002
  buffer: valuesBuf,
466
1003
  offset: 0,
@@ -469,27 +1006,27 @@ function decodeDataPageV2(cursor, header, column, compression) {
469
1006
  cursor.offset = cursorEnd;
470
1007
  }
471
1008
 
472
- const values = decodeValues(column.primitiveType, valueEncoding, valuesBufCursor, valueCountNonNull, {
1009
+ var values = decodeValues(column.primitiveType, valueEncoding, valuesBufCursor, valueCountNonNull, {
473
1010
  typeLength: column.typeLength,
474
1011
  bitWidth: column.typeLength
475
1012
  });
476
1013
  return {
477
1014
  dlevels: dLevels,
478
1015
  rlevels: rLevels,
479
- values,
1016
+ values: values,
480
1017
  count: valueCount
481
1018
  };
482
1019
  }
483
1020
 
484
1021
  function decodeSchema(schemaElements, offset, len) {
485
- const schema = {};
486
- let next = offset;
1022
+ var schema = {};
1023
+ var next = offset;
487
1024
 
488
- for (let i = 0; i < len; i++) {
489
- const schemaElement = schemaElements[next];
490
- const repetitionType = next > 0 ? Util.getThriftEnum(_parquetThrift.FieldRepetitionType, schemaElement.repetition_type) : 'ROOT';
491
- let optional = false;
492
- let repeated = false;
1025
+ for (var i = 0; i < len; i++) {
1026
+ var schemaElement = schemaElements[next];
1027
+ var repetitionType = next > 0 ? Util.getThriftEnum(_parquetThrift.FieldRepetitionType, schemaElement.repetition_type) : 'ROOT';
1028
+ var optional = false;
1029
+ var repeated = false;
493
1030
 
494
1031
  switch (repetitionType) {
495
1032
  case 'REQUIRED':
@@ -508,15 +1045,15 @@ function decodeSchema(schemaElements, offset, len) {
508
1045
  }
509
1046
 
510
1047
  if (schemaElement.num_children > 0) {
511
- const res = decodeSchema(schemaElements, next + 1, schemaElement.num_children);
1048
+ var res = decodeSchema(schemaElements, next + 1, schemaElement.num_children);
512
1049
  next = res.next;
513
1050
  schema[schemaElement.name] = {
514
- optional,
515
- repeated,
1051
+ optional: optional,
1052
+ repeated: repeated,
516
1053
  fields: res.schema
517
1054
  };
518
1055
  } else {
519
- let logicalType = Util.getThriftEnum(_parquetThrift.Type, schemaElement.type);
1056
+ var logicalType = Util.getThriftEnum(_parquetThrift.Type, schemaElement.type);
520
1057
 
521
1058
  if (schemaElement.converted_type) {
522
1059
  logicalType = Util.getThriftEnum(_parquetThrift.ConvertedType, schemaElement.converted_type);
@@ -525,17 +1062,17 @@ function decodeSchema(schemaElements, offset, len) {
525
1062
  schema[schemaElement.name] = {
526
1063
  type: logicalType,
527
1064
  typeLength: schemaElement.type_length,
528
- optional,
529
- repeated
1065
+ optional: optional,
1066
+ repeated: repeated
530
1067
  };
531
1068
  next++;
532
1069
  }
533
1070
  }
534
1071
 
535
1072
  return {
536
- schema,
537
- offset,
538
- next
1073
+ schema: schema,
1074
+ offset: offset,
1075
+ next: next
539
1076
  };
540
1077
  }
541
1078
  //# sourceMappingURL=reader.js.map