@loaders.gl/parquet 3.0.13 → 3.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/dist/dist.es5.min.js +1 -1
  2. package/dist/dist.es5.min.js.map +1 -1
  3. package/dist/dist.min.js +1 -1
  4. package/dist/dist.min.js.map +1 -1
  5. package/dist/es5/bundle.js +2 -2
  6. package/dist/es5/bundle.js.map +1 -1
  7. package/dist/es5/index.js +9 -9
  8. package/dist/es5/parquet-loader.js +70 -19
  9. package/dist/es5/parquet-loader.js.map +1 -1
  10. package/dist/es5/parquet-writer.js +4 -4
  11. package/dist/es5/parquet-writer.js.map +1 -1
  12. package/dist/es5/parquetjs/codecs/index.js +6 -4
  13. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  14. package/dist/es5/parquetjs/codecs/plain.js +43 -41
  15. package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
  16. package/dist/es5/parquetjs/codecs/rle.js +35 -25
  17. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  18. package/dist/es5/parquetjs/compression.js +9 -7
  19. package/dist/es5/parquetjs/compression.js.map +1 -1
  20. package/dist/es5/parquetjs/file.js +15 -15
  21. package/dist/es5/parquetjs/file.js.map +1 -1
  22. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
  23. package/dist/es5/parquetjs/parquet-thrift/BsonType.js +45 -31
  24. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  25. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +152 -141
  26. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  27. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +160 -147
  28. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  29. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +259 -248
  30. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  31. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +79 -67
  32. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  33. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
  34. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +1 -1
  35. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +124 -113
  36. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  37. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +169 -158
  38. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  39. package/dist/es5/parquetjs/parquet-thrift/DateType.js +45 -31
  40. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
  41. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +79 -68
  42. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  43. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +94 -83
  44. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  45. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +1 -1
  46. package/dist/es5/parquetjs/parquet-thrift/EnumType.js +45 -31
  47. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  48. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
  49. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +182 -170
  50. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  51. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +45 -31
  52. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  53. package/dist/es5/parquetjs/parquet-thrift/IntType.js +79 -68
  54. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
  55. package/dist/es5/parquetjs/parquet-thrift/JsonType.js +45 -31
  56. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  57. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +79 -68
  58. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  59. package/dist/es5/parquetjs/parquet-thrift/ListType.js +45 -31
  60. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
  61. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +343 -319
  62. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  63. package/dist/es5/parquetjs/parquet-thrift/MapType.js +45 -31
  64. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
  65. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +45 -31
  66. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  67. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +45 -31
  68. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  69. package/dist/es5/parquetjs/parquet-thrift/NullType.js +45 -31
  70. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
  71. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +75 -64
  72. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  73. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +94 -83
  74. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  75. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +169 -158
  76. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  77. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +94 -83
  78. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  79. package/dist/es5/parquetjs/parquet-thrift/PageType.js +1 -1
  80. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +124 -113
  81. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  82. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +199 -188
  83. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  84. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +94 -83
  85. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  86. package/dist/es5/parquetjs/parquet-thrift/Statistics.js +135 -124
  87. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  88. package/dist/es5/parquetjs/parquet-thrift/StringType.js +45 -31
  89. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
  90. package/dist/es5/parquetjs/parquet-thrift/TimeType.js +79 -68
  91. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  92. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +101 -88
  93. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  94. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +79 -68
  95. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  96. package/dist/es5/parquetjs/parquet-thrift/Type.js +1 -1
  97. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +45 -31
  98. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  99. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +45 -31
  100. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  101. package/dist/es5/parquetjs/parquet-thrift/index.js +43 -43
  102. package/dist/es5/parquetjs/reader.js +813 -276
  103. package/dist/es5/parquetjs/reader.js.map +1 -1
  104. package/dist/es5/parquetjs/schema/declare.js +11 -9
  105. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  106. package/dist/es5/parquetjs/schema/schema.js +87 -73
  107. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  108. package/dist/es5/parquetjs/schema/shred.js +95 -55
  109. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  110. package/dist/es5/parquetjs/schema/types.js +25 -25
  111. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  112. package/dist/es5/parquetjs/util.js +71 -39
  113. package/dist/es5/parquetjs/util.js.map +1 -1
  114. package/dist/es5/parquetjs/writer.js +467 -200
  115. package/dist/es5/parquetjs/writer.js.map +1 -1
  116. package/dist/esm/parquet-loader.js +1 -1
  117. package/dist/esm/parquet-writer.js +1 -1
  118. package/dist/parquet-worker.js +1 -1
  119. package/dist/parquet-worker.js.map +1 -1
  120. package/package.json +4 -4
@@ -2,11 +2,29 @@
2
2
 
3
3
  var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
4
 
5
+ var _typeof = require("@babel/runtime/helpers/typeof");
6
+
5
7
  Object.defineProperty(exports, "__esModule", {
6
8
  value: true
7
9
  });
8
10
  exports.ParquetTransformer = exports.ParquetEnvelopeWriter = exports.ParquetWriter = void 0;
9
11
 
12
+ var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
13
+
14
+ var _assertThisInitialized2 = _interopRequireDefault(require("@babel/runtime/helpers/assertThisInitialized"));
15
+
16
+ var _inherits2 = _interopRequireDefault(require("@babel/runtime/helpers/inherits"));
17
+
18
+ var _possibleConstructorReturn2 = _interopRequireDefault(require("@babel/runtime/helpers/possibleConstructorReturn"));
19
+
20
+ var _getPrototypeOf2 = _interopRequireDefault(require("@babel/runtime/helpers/getPrototypeOf"));
21
+
22
+ var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
23
+
24
+ var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
25
+
26
+ var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
27
+
10
28
  var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
11
29
 
12
30
  var _stream = require("stream");
@@ -23,33 +41,30 @@ var Util = _interopRequireWildcard(require("./util"));
23
41
 
24
42
  var _nodeInt = _interopRequireDefault(require("node-int64"));
25
43
 
26
- function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function (nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
44
+ function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
27
45
 
28
- function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || typeof obj !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
46
+ function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
29
47
 
30
- const PARQUET_MAGIC = 'PAR1';
31
- const PARQUET_VERSION = 1;
32
- const PARQUET_DEFAULT_PAGE_SIZE = 8192;
33
- const PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
34
- const PARQUET_RDLVL_TYPE = 'INT32';
35
- const PARQUET_RDLVL_ENCODING = 'RLE';
48
+ function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
36
49
 
37
- class ParquetWriter {
38
- static async openFile(schema, path, opts) {
39
- const outputStream = await Util.osopen(path, opts);
40
- return ParquetWriter.openStream(schema, outputStream, opts);
41
- }
50
+ function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
42
51
 
43
- static async openStream(schema, outputStream, opts) {
44
- if (!opts) {
45
- opts = {};
46
- }
52
+ function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
47
53
 
48
- const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
49
- return new ParquetWriter(schema, envelopeWriter, opts);
50
- }
54
+ function _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = (0, _getPrototypeOf2.default)(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = (0, _getPrototypeOf2.default)(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return (0, _possibleConstructorReturn2.default)(this, result); }; }
55
+
56
+ function _isNativeReflectConstruct() { if (typeof Reflect === "undefined" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === "function") return true; try { Boolean.prototype.valueOf.call(Reflect.construct(Boolean, [], function () {})); return true; } catch (e) { return false; } }
57
+
58
+ var PARQUET_MAGIC = 'PAR1';
59
+ var PARQUET_VERSION = 1;
60
+ var PARQUET_DEFAULT_PAGE_SIZE = 8192;
61
+ var PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
62
+ var PARQUET_RDLVL_TYPE = 'INT32';
63
+ var PARQUET_RDLVL_ENCODING = 'RLE';
51
64
 
52
- constructor(schema, envelopeWriter, opts) {
65
+ var ParquetWriter = function () {
66
+ function ParquetWriter(schema, envelopeWriter, opts) {
67
+ (0, _classCallCheck2.default)(this, ParquetWriter);
53
68
  (0, _defineProperty2.default)(this, "schema", void 0);
54
69
  (0, _defineProperty2.default)(this, "envelopeWriter", void 0);
55
70
  (0, _defineProperty2.default)(this, "rowBuffer", void 0);
@@ -71,61 +86,178 @@ class ParquetWriter {
71
86
  }
72
87
  }
73
88
 
74
- async appendRow(row) {
75
- if (this.closed) {
76
- throw new Error('writer was closed');
89
+ (0, _createClass2.default)(ParquetWriter, [{
90
+ key: "appendRow",
91
+ value: function () {
92
+ var _appendRow = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee(row) {
93
+ return _regenerator.default.wrap(function _callee$(_context) {
94
+ while (1) {
95
+ switch (_context.prev = _context.next) {
96
+ case 0:
97
+ if (!this.closed) {
98
+ _context.next = 2;
99
+ break;
100
+ }
101
+
102
+ throw new Error('writer was closed');
103
+
104
+ case 2:
105
+ Shred.shredRecord(this.schema, row, this.rowBuffer);
106
+
107
+ if (this.rowBuffer.rowCount >= this.rowGroupSize) {
108
+ this.rowBuffer = {};
109
+ }
110
+
111
+ case 4:
112
+ case "end":
113
+ return _context.stop();
114
+ }
115
+ }
116
+ }, _callee, this);
117
+ }));
118
+
119
+ function appendRow(_x) {
120
+ return _appendRow.apply(this, arguments);
121
+ }
122
+
123
+ return appendRow;
124
+ }()
125
+ }, {
126
+ key: "close",
127
+ value: function () {
128
+ var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(callback) {
129
+ return _regenerator.default.wrap(function _callee2$(_context2) {
130
+ while (1) {
131
+ switch (_context2.prev = _context2.next) {
132
+ case 0:
133
+ if (!this.closed) {
134
+ _context2.next = 2;
135
+ break;
136
+ }
137
+
138
+ throw new Error('writer was closed');
139
+
140
+ case 2:
141
+ this.closed = true;
142
+
143
+ if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
144
+ this.rowBuffer = {};
145
+ }
146
+
147
+ _context2.next = 6;
148
+ return this.envelopeWriter.writeFooter(this.userMetadata);
149
+
150
+ case 6:
151
+ _context2.next = 8;
152
+ return this.envelopeWriter.close();
153
+
154
+ case 8:
155
+ if (callback) {
156
+ callback();
157
+ }
158
+
159
+ case 9:
160
+ case "end":
161
+ return _context2.stop();
162
+ }
163
+ }
164
+ }, _callee2, this);
165
+ }));
166
+
167
+ function close(_x2) {
168
+ return _close.apply(this, arguments);
169
+ }
170
+
171
+ return close;
172
+ }()
173
+ }, {
174
+ key: "setMetadata",
175
+ value: function setMetadata(key, value) {
176
+ this.userMetadata[String(key)] = String(value);
77
177
  }
78
-
79
- Shred.shredRecord(this.schema, row, this.rowBuffer);
80
-
81
- if (this.rowBuffer.rowCount >= this.rowGroupSize) {
82
- this.rowBuffer = {};
178
+ }, {
179
+ key: "setRowGroupSize",
180
+ value: function setRowGroupSize(cnt) {
181
+ this.rowGroupSize = cnt;
83
182
  }
84
- }
85
-
86
- async close(callback) {
87
- if (this.closed) {
88
- throw new Error('writer was closed');
183
+ }, {
184
+ key: "setPageSize",
185
+ value: function setPageSize(cnt) {
186
+ this.envelopeWriter.setPageSize(cnt);
89
187
  }
90
-
91
- this.closed = true;
92
-
93
- if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
94
- this.rowBuffer = {};
95
- }
96
-
97
- await this.envelopeWriter.writeFooter(this.userMetadata);
98
- await this.envelopeWriter.close();
99
-
100
- if (callback) {
101
- callback();
102
- }
103
- }
104
-
105
- setMetadata(key, value) {
106
- this.userMetadata[String(key)] = String(value);
107
- }
108
-
109
- setRowGroupSize(cnt) {
110
- this.rowGroupSize = cnt;
111
- }
112
-
113
- setPageSize(cnt) {
114
- this.envelopeWriter.setPageSize(cnt);
115
- }
116
-
117
- }
188
+ }], [{
189
+ key: "openFile",
190
+ value: function () {
191
+ var _openFile = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(schema, path, opts) {
192
+ var outputStream;
193
+ return _regenerator.default.wrap(function _callee3$(_context3) {
194
+ while (1) {
195
+ switch (_context3.prev = _context3.next) {
196
+ case 0:
197
+ _context3.next = 2;
198
+ return Util.osopen(path, opts);
199
+
200
+ case 2:
201
+ outputStream = _context3.sent;
202
+ return _context3.abrupt("return", ParquetWriter.openStream(schema, outputStream, opts));
203
+
204
+ case 4:
205
+ case "end":
206
+ return _context3.stop();
207
+ }
208
+ }
209
+ }, _callee3);
210
+ }));
211
+
212
+ function openFile(_x3, _x4, _x5) {
213
+ return _openFile.apply(this, arguments);
214
+ }
215
+
216
+ return openFile;
217
+ }()
218
+ }, {
219
+ key: "openStream",
220
+ value: function () {
221
+ var _openStream = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4(schema, outputStream, opts) {
222
+ var envelopeWriter;
223
+ return _regenerator.default.wrap(function _callee4$(_context4) {
224
+ while (1) {
225
+ switch (_context4.prev = _context4.next) {
226
+ case 0:
227
+ if (!opts) {
228
+ opts = {};
229
+ }
230
+
231
+ _context4.next = 3;
232
+ return ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
233
+
234
+ case 3:
235
+ envelopeWriter = _context4.sent;
236
+ return _context4.abrupt("return", new ParquetWriter(schema, envelopeWriter, opts));
237
+
238
+ case 5:
239
+ case "end":
240
+ return _context4.stop();
241
+ }
242
+ }
243
+ }, _callee4);
244
+ }));
245
+
246
+ function openStream(_x6, _x7, _x8) {
247
+ return _openStream.apply(this, arguments);
248
+ }
249
+
250
+ return openStream;
251
+ }()
252
+ }]);
253
+ return ParquetWriter;
254
+ }();
118
255
 
119
256
  exports.ParquetWriter = ParquetWriter;
120
257
 
121
- class ParquetEnvelopeWriter {
122
- static async openStream(schema, outputStream, opts) {
123
- const writeFn = Util.oswrite.bind(undefined, outputStream);
124
- const closeFn = Util.osclose.bind(undefined, outputStream);
125
- return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
126
- }
127
-
128
- constructor(schema, writeFn, closeFn, fileOffset, opts) {
258
+ var ParquetEnvelopeWriter = function () {
259
+ function ParquetEnvelopeWriter(schema, writeFn, closeFn, fileOffset, opts) {
260
+ (0, _classCallCheck2.default)(this, ParquetEnvelopeWriter);
129
261
  (0, _defineProperty2.default)(this, "schema", void 0);
130
262
  (0, _defineProperty2.default)(this, "write", void 0);
131
263
  (0, _defineProperty2.default)(this, "close", void 0);
@@ -144,72 +276,186 @@ class ParquetEnvelopeWriter {
144
276
  this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
145
277
  }
146
278
 
147
- writeSection(buf) {
148
- this.offset += buf.length;
149
- return this.write(buf);
150
- }
151
-
152
- writeHeader() {
153
- return this.writeSection(Buffer.from(PARQUET_MAGIC));
154
- }
155
-
156
- async writeRowGroup(records) {
157
- const rgroup = encodeRowGroup(this.schema, records, {
158
- baseOffset: this.offset,
159
- pageSize: this.pageSize,
160
- useDataPageV2: this.useDataPageV2
161
- });
162
- this.rowCount += records.rowCount;
163
- this.rowGroups.push(rgroup.metadata);
164
- return this.writeSection(rgroup.body);
165
- }
166
-
167
- writeFooter(userMetadata) {
168
- if (!userMetadata) {
169
- userMetadata = {};
279
+ (0, _createClass2.default)(ParquetEnvelopeWriter, [{
280
+ key: "writeSection",
281
+ value: function writeSection(buf) {
282
+ this.offset += buf.length;
283
+ return this.write(buf);
284
+ }
285
+ }, {
286
+ key: "writeHeader",
287
+ value: function writeHeader() {
288
+ return this.writeSection(Buffer.from(PARQUET_MAGIC));
170
289
  }
290
+ }, {
291
+ key: "writeRowGroup",
292
+ value: function () {
293
+ var _writeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(records) {
294
+ var rgroup;
295
+ return _regenerator.default.wrap(function _callee5$(_context5) {
296
+ while (1) {
297
+ switch (_context5.prev = _context5.next) {
298
+ case 0:
299
+ rgroup = encodeRowGroup(this.schema, records, {
300
+ baseOffset: this.offset,
301
+ pageSize: this.pageSize,
302
+ useDataPageV2: this.useDataPageV2
303
+ });
304
+ this.rowCount += records.rowCount;
305
+ this.rowGroups.push(rgroup.metadata);
306
+ return _context5.abrupt("return", this.writeSection(rgroup.body));
307
+
308
+ case 4:
309
+ case "end":
310
+ return _context5.stop();
311
+ }
312
+ }
313
+ }, _callee5, this);
314
+ }));
315
+
316
+ function writeRowGroup(_x9) {
317
+ return _writeRowGroup.apply(this, arguments);
318
+ }
319
+
320
+ return writeRowGroup;
321
+ }()
322
+ }, {
323
+ key: "writeFooter",
324
+ value: function writeFooter(userMetadata) {
325
+ if (!userMetadata) {
326
+ userMetadata = {};
327
+ }
328
+
329
+ return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
330
+ }
331
+ }, {
332
+ key: "setPageSize",
333
+ value: function setPageSize(cnt) {
334
+ this.pageSize = cnt;
335
+ }
336
+ }], [{
337
+ key: "openStream",
338
+ value: function () {
339
+ var _openStream2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6(schema, outputStream, opts) {
340
+ var writeFn, closeFn;
341
+ return _regenerator.default.wrap(function _callee6$(_context6) {
342
+ while (1) {
343
+ switch (_context6.prev = _context6.next) {
344
+ case 0:
345
+ writeFn = Util.oswrite.bind(undefined, outputStream);
346
+ closeFn = Util.osclose.bind(undefined, outputStream);
347
+ return _context6.abrupt("return", new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts));
348
+
349
+ case 3:
350
+ case "end":
351
+ return _context6.stop();
352
+ }
353
+ }
354
+ }, _callee6);
355
+ }));
356
+
357
+ function openStream(_x10, _x11, _x12) {
358
+ return _openStream2.apply(this, arguments);
359
+ }
360
+
361
+ return openStream;
362
+ }()
363
+ }]);
364
+ return ParquetEnvelopeWriter;
365
+ }();
171
366
 
172
- return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
173
- }
367
+ exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
174
368
 
175
- setPageSize(cnt) {
176
- this.pageSize = cnt;
177
- }
369
+ var ParquetTransformer = function (_Transform) {
370
+ (0, _inherits2.default)(ParquetTransformer, _Transform);
178
371
 
179
- }
372
+ var _super = _createSuper(ParquetTransformer);
180
373
 
181
- exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
374
+ function ParquetTransformer(schema) {
375
+ var _this;
182
376
 
183
- class ParquetTransformer extends _stream.Transform {
184
- constructor(schema, opts = {}) {
185
- super({
377
+ var opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
378
+ (0, _classCallCheck2.default)(this, ParquetTransformer);
379
+ _this = _super.call(this, {
186
380
  objectMode: true
187
381
  });
188
- (0, _defineProperty2.default)(this, "writer", void 0);
189
-
190
- const writeProxy = function (t) {
191
- return async function (b) {
192
- t.push(b);
193
- };
194
- }(this);
195
-
196
- this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, async () => {}, 0, opts), opts);
382
+ (0, _defineProperty2.default)((0, _assertThisInitialized2.default)(_this), "writer", void 0);
383
+
384
+ var writeProxy = function (t) {
385
+ return function () {
386
+ var _ref = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7(b) {
387
+ return _regenerator.default.wrap(function _callee7$(_context7) {
388
+ while (1) {
389
+ switch (_context7.prev = _context7.next) {
390
+ case 0:
391
+ t.push(b);
392
+
393
+ case 1:
394
+ case "end":
395
+ return _context7.stop();
396
+ }
397
+ }
398
+ }, _callee7);
399
+ }));
400
+
401
+ return function (_x13) {
402
+ return _ref.apply(this, arguments);
403
+ };
404
+ }();
405
+ }((0, _assertThisInitialized2.default)(_this));
406
+
407
+ _this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8() {
408
+ return _regenerator.default.wrap(function _callee8$(_context8) {
409
+ while (1) {
410
+ switch (_context8.prev = _context8.next) {
411
+ case 0:
412
+ case "end":
413
+ return _context8.stop();
414
+ }
415
+ }
416
+ }, _callee8);
417
+ })), 0, opts), opts);
418
+ return _this;
197
419
  }
198
420
 
199
- _transform(row, encoding, callback) {
200
- if (row) {
201
- return this.writer.appendRow(row).then(callback);
202
- }
203
-
204
- callback();
205
- return Promise.resolve();
206
- }
207
-
208
- async _flush(callback) {
209
- await this.writer.close(callback);
210
- }
421
+ (0, _createClass2.default)(ParquetTransformer, [{
422
+ key: "_transform",
423
+ value: function _transform(row, encoding, callback) {
424
+ if (row) {
425
+ return this.writer.appendRow(row).then(callback);
426
+ }
211
427
 
212
- }
428
+ callback();
429
+ return Promise.resolve();
430
+ }
431
+ }, {
432
+ key: "_flush",
433
+ value: function () {
434
+ var _flush2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9(callback) {
435
+ return _regenerator.default.wrap(function _callee9$(_context9) {
436
+ while (1) {
437
+ switch (_context9.prev = _context9.next) {
438
+ case 0:
439
+ _context9.next = 2;
440
+ return this.writer.close(callback);
441
+
442
+ case 2:
443
+ case "end":
444
+ return _context9.stop();
445
+ }
446
+ }
447
+ }, _callee9, this);
448
+ }));
449
+
450
+ function _flush(_x14) {
451
+ return _flush2.apply(this, arguments);
452
+ }
453
+
454
+ return _flush;
455
+ }()
456
+ }]);
457
+ return ParquetTransformer;
458
+ }(_stream.Transform);
213
459
 
214
460
  exports.ParquetTransformer = ParquetTransformer;
215
461
 
@@ -222,7 +468,7 @@ function encodeValues(type, encoding, values, opts) {
222
468
  }
223
469
 
224
470
  function encodeDataPage(column, data) {
225
- let rLevelsBuf = Buffer.alloc(0);
471
+ var rLevelsBuf = Buffer.alloc(0);
226
472
 
227
473
  if (column.rLevelMax > 0) {
228
474
  rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
@@ -230,7 +476,7 @@ function encodeDataPage(column, data) {
230
476
  });
231
477
  }
232
478
 
233
- let dLevelsBuf = Buffer.alloc(0);
479
+ var dLevelsBuf = Buffer.alloc(0);
234
480
 
235
481
  if (column.dLevelMax > 0) {
236
482
  dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
@@ -238,13 +484,13 @@ function encodeDataPage(column, data) {
238
484
  });
239
485
  }
240
486
 
241
- const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
487
+ var valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
242
488
  typeLength: column.typeLength,
243
489
  bitWidth: column.typeLength
244
490
  });
245
- const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
246
- const compressedBuf = Compression.deflate(column.compression, dataBuf);
247
- const header = new _parquetThrift.PageHeader({
491
+ var dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
492
+ var compressedBuf = Compression.deflate(column.compression, dataBuf);
493
+ var header = new _parquetThrift.PageHeader({
248
494
  type: _parquetThrift.PageType.DATA_PAGE,
249
495
  data_page_header: new _parquetThrift.DataPageHeader({
250
496
  num_values: data.count,
@@ -255,22 +501,22 @@ function encodeDataPage(column, data) {
255
501
  uncompressed_page_size: dataBuf.length,
256
502
  compressed_page_size: compressedBuf.length
257
503
  });
258
- const headerBuf = Util.serializeThrift(header);
259
- const page = Buffer.concat([headerBuf, compressedBuf]);
504
+ var headerBuf = Util.serializeThrift(header);
505
+ var page = Buffer.concat([headerBuf, compressedBuf]);
260
506
  return {
261
- header,
507
+ header: header,
262
508
  headerSize: headerBuf.length,
263
- page
509
+ page: page
264
510
  };
265
511
  }
266
512
 
267
513
  function encodeDataPageV2(column, data, rowCount) {
268
- const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
514
+ var valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
269
515
  typeLength: column.typeLength,
270
516
  bitWidth: column.typeLength
271
517
  });
272
- const compressedBuf = Compression.deflate(column.compression, valuesBuf);
273
- let rLevelsBuf = Buffer.alloc(0);
518
+ var compressedBuf = Compression.deflate(column.compression, valuesBuf);
519
+ var rLevelsBuf = Buffer.alloc(0);
274
520
 
275
521
  if (column.rLevelMax > 0) {
276
522
  rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
@@ -279,7 +525,7 @@ function encodeDataPageV2(column, data, rowCount) {
279
525
  });
280
526
  }
281
527
 
282
- let dLevelsBuf = Buffer.alloc(0);
528
+ var dLevelsBuf = Buffer.alloc(0);
283
529
 
284
530
  if (column.dLevelMax > 0) {
285
531
  dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
@@ -288,7 +534,7 @@ function encodeDataPageV2(column, data, rowCount) {
288
534
  });
289
535
  }
290
536
 
291
- const header = new _parquetThrift.PageHeader({
537
+ var header = new _parquetThrift.PageHeader({
292
538
  type: _parquetThrift.PageType.DATA_PAGE_V2,
293
539
  data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
294
540
  num_values: data.count,
@@ -302,23 +548,23 @@ function encodeDataPageV2(column, data, rowCount) {
302
548
  uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
303
549
  compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
304
550
  });
305
- const headerBuf = Util.serializeThrift(header);
306
- const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
551
+ var headerBuf = Util.serializeThrift(header);
552
+ var page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
307
553
  return {
308
- header,
554
+ header: header,
309
555
  headerSize: headerBuf.length,
310
- page
556
+ page: page
311
557
  };
312
558
  }
313
559
 
314
560
  function encodeColumnChunk(column, buffer, offset, opts) {
315
- const data = buffer.columnData[column.path.join()];
316
- const baseOffset = (opts.baseOffset || 0) + offset;
317
- let pageBuf;
318
- let total_uncompressed_size = 0;
319
- let total_compressed_size = 0;
561
+ var data = buffer.columnData[column.path.join()];
562
+ var baseOffset = (opts.baseOffset || 0) + offset;
563
+ var pageBuf;
564
+ var total_uncompressed_size = 0;
565
+ var total_compressed_size = 0;
320
566
  {
321
- let result;
567
+ var result;
322
568
 
323
569
  if (opts.useDataPageV2) {
324
570
  result = encodeDataPageV2(column, data, buffer.rowCount);
@@ -330,58 +576,69 @@ function encodeColumnChunk(column, buffer, offset, opts) {
330
576
  total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
331
577
  total_compressed_size += result.header.compressed_page_size + result.headerSize;
332
578
  }
333
- const metadata = new _parquetThrift.ColumnMetaData({
579
+ var metadata = new _parquetThrift.ColumnMetaData({
334
580
  path_in_schema: column.path,
335
581
  num_values: data.count,
336
582
  data_page_offset: baseOffset,
337
583
  encodings: [],
338
- total_uncompressed_size,
339
- total_compressed_size,
584
+ total_uncompressed_size: total_uncompressed_size,
585
+ total_compressed_size: total_compressed_size,
340
586
  type: _parquetThrift.Type[column.primitiveType],
341
587
  codec: _parquetThrift.CompressionCodec[column.compression]
342
588
  });
343
589
  metadata.encodings.push(_parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]);
344
590
  metadata.encodings.push(_parquetThrift.Encoding[column.encoding]);
345
- const metadataOffset = baseOffset + pageBuf.length;
346
- const body = Buffer.concat([pageBuf, Util.serializeThrift(metadata)]);
591
+ var metadataOffset = baseOffset + pageBuf.length;
592
+ var body = Buffer.concat([pageBuf, Util.serializeThrift(metadata)]);
347
593
  return {
348
- body,
349
- metadata,
350
- metadataOffset
594
+ body: body,
595
+ metadata: metadata,
596
+ metadataOffset: metadataOffset
351
597
  };
352
598
  }
353
599
 
354
600
  function encodeRowGroup(schema, data, opts) {
355
- const metadata = new _parquetThrift.RowGroup({
601
+ var metadata = new _parquetThrift.RowGroup({
356
602
  num_rows: data.rowCount,
357
603
  columns: [],
358
604
  total_byte_size: 0
359
605
  });
360
- let body = Buffer.alloc(0);
361
-
362
- for (const field of schema.fieldList) {
363
- if (field.isNested) {
364
- continue;
606
+ var body = Buffer.alloc(0);
607
+
608
+ var _iterator = _createForOfIteratorHelper(schema.fieldList),
609
+ _step;
610
+
611
+ try {
612
+ for (_iterator.s(); !(_step = _iterator.n()).done;) {
613
+ var field = _step.value;
614
+
615
+ if (field.isNested) {
616
+ continue;
617
+ }
618
+
619
+ var cchunkData = encodeColumnChunk(field, data, body.length, opts);
620
+ var cchunk = new _parquetThrift.ColumnChunk({
621
+ file_offset: cchunkData.metadataOffset,
622
+ meta_data: cchunkData.metadata
623
+ });
624
+ metadata.columns.push(cchunk);
625
+ metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
626
+ body = Buffer.concat([body, cchunkData.body]);
365
627
  }
366
-
367
- const cchunkData = encodeColumnChunk(field, data, body.length, opts);
368
- const cchunk = new _parquetThrift.ColumnChunk({
369
- file_offset: cchunkData.metadataOffset,
370
- meta_data: cchunkData.metadata
371
- });
372
- metadata.columns.push(cchunk);
373
- metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
374
- body = Buffer.concat([body, cchunkData.body]);
628
+ } catch (err) {
629
+ _iterator.e(err);
630
+ } finally {
631
+ _iterator.f();
375
632
  }
376
633
 
377
634
  return {
378
- body,
379
- metadata
635
+ body: body,
636
+ metadata: metadata
380
637
  };
381
638
  }
382
639
 
383
640
  function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
384
- const metadata = new _parquetThrift.FileMetaData({
641
+ var metadata = new _parquetThrift.FileMetaData({
385
642
  version: PARQUET_VERSION,
386
643
  created_by: 'parquets',
387
644
  num_rows: rowCount,
@@ -390,47 +647,57 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
390
647
  key_value_metadata: []
391
648
  });
392
649
 
393
- for (const key in userMetadata) {
650
+ for (var key in userMetadata) {
394
651
  var _metadata$key_value_m, _metadata$key_value_m2;
395
652
 
396
- const kv = new _parquetThrift.KeyValue({
397
- key,
653
+ var kv = new _parquetThrift.KeyValue({
654
+ key: key,
398
655
  value: userMetadata[key]
399
656
  });
400
657
  (_metadata$key_value_m = metadata.key_value_metadata) === null || _metadata$key_value_m === void 0 ? void 0 : (_metadata$key_value_m2 = _metadata$key_value_m.push) === null || _metadata$key_value_m2 === void 0 ? void 0 : _metadata$key_value_m2.call(_metadata$key_value_m, kv);
401
658
  }
402
659
 
403
660
  {
404
- const schemaRoot = new _parquetThrift.SchemaElement({
661
+ var schemaRoot = new _parquetThrift.SchemaElement({
405
662
  name: 'root',
406
663
  num_children: Object.keys(schema.fields).length
407
664
  });
408
665
  metadata.schema.push(schemaRoot);
409
666
  }
410
667
 
411
- for (const field of schema.fieldList) {
412
- const relt = _parquetThrift.FieldRepetitionType[field.repetitionType];
413
- const schemaElem = new _parquetThrift.SchemaElement({
414
- name: field.name,
415
- repetition_type: relt
416
- });
417
-
418
- if (field.isNested) {
419
- schemaElem.num_children = field.fieldCount;
420
- } else {
421
- schemaElem.type = _parquetThrift.Type[field.primitiveType];
422
- }
423
-
424
- if (field.originalType) {
425
- schemaElem.converted_type = _parquetThrift.ConvertedType[field.originalType];
668
+ var _iterator2 = _createForOfIteratorHelper(schema.fieldList),
669
+ _step2;
670
+
671
+ try {
672
+ for (_iterator2.s(); !(_step2 = _iterator2.n()).done;) {
673
+ var field = _step2.value;
674
+ var relt = _parquetThrift.FieldRepetitionType[field.repetitionType];
675
+ var schemaElem = new _parquetThrift.SchemaElement({
676
+ name: field.name,
677
+ repetition_type: relt
678
+ });
679
+
680
+ if (field.isNested) {
681
+ schemaElem.num_children = field.fieldCount;
682
+ } else {
683
+ schemaElem.type = _parquetThrift.Type[field.primitiveType];
684
+ }
685
+
686
+ if (field.originalType) {
687
+ schemaElem.converted_type = _parquetThrift.ConvertedType[field.originalType];
688
+ }
689
+
690
+ schemaElem.type_length = field.typeLength;
691
+ metadata.schema.push(schemaElem);
426
692
  }
427
-
428
- schemaElem.type_length = field.typeLength;
429
- metadata.schema.push(schemaElem);
693
+ } catch (err) {
694
+ _iterator2.e(err);
695
+ } finally {
696
+ _iterator2.f();
430
697
  }
431
698
 
432
- const metadataEncoded = Util.serializeThrift(metadata);
433
- const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
699
+ var metadataEncoded = Util.serializeThrift(metadata);
700
+ var footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
434
701
  metadataEncoded.copy(footerEncoded);
435
702
  footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
436
703
  footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);