@loaders.gl/parquet 3.1.0-beta.7 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/dist/es5/bundle.js +1 -1
  2. package/dist/es5/bundle.js.map +1 -1
  3. package/dist/es5/constants.js +5 -5
  4. package/dist/es5/constants.js.map +1 -1
  5. package/dist/es5/index.js +19 -10
  6. package/dist/es5/index.js.map +1 -1
  7. package/dist/es5/lib/convert-schema.js +13 -13
  8. package/dist/es5/lib/convert-schema.js.map +1 -1
  9. package/dist/es5/lib/parse-parquet.js +154 -19
  10. package/dist/es5/lib/parse-parquet.js.map +1 -1
  11. package/dist/es5/lib/read-array-buffer.js +43 -6
  12. package/dist/es5/lib/read-array-buffer.js.map +1 -1
  13. package/dist/es5/parquet-loader.js +4 -4
  14. package/dist/es5/parquet-loader.js.map +1 -1
  15. package/dist/es5/parquet-writer.js +4 -4
  16. package/dist/es5/parquet-writer.js.map +1 -1
  17. package/dist/es5/parquetjs/codecs/dictionary.js +10 -2
  18. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
  19. package/dist/es5/parquetjs/codecs/index.js +6 -4
  20. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  21. package/dist/es5/parquetjs/codecs/plain.js +43 -41
  22. package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
  23. package/dist/es5/parquetjs/codecs/rle.js +35 -25
  24. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  25. package/dist/es5/parquetjs/compression.js +110 -27
  26. package/dist/es5/parquetjs/compression.js.map +1 -1
  27. package/dist/es5/parquetjs/encoder/writer.js +737 -301
  28. package/dist/es5/parquetjs/encoder/writer.js.map +1 -1
  29. package/dist/es5/parquetjs/file.js +15 -15
  30. package/dist/es5/parquetjs/file.js.map +1 -1
  31. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
  32. package/dist/es5/parquetjs/parquet-thrift/BsonType.js +45 -31
  33. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  34. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +152 -141
  35. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  36. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +160 -147
  37. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  38. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +259 -248
  39. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  40. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +79 -67
  41. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  42. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
  43. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +1 -1
  44. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +124 -113
  45. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  46. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +169 -158
  47. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  48. package/dist/es5/parquetjs/parquet-thrift/DateType.js +45 -31
  49. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
  50. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +79 -68
  51. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  52. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +94 -83
  53. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  54. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +1 -1
  55. package/dist/es5/parquetjs/parquet-thrift/EnumType.js +45 -31
  56. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  57. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
  58. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +182 -170
  59. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  60. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +45 -31
  61. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  62. package/dist/es5/parquetjs/parquet-thrift/IntType.js +79 -68
  63. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
  64. package/dist/es5/parquetjs/parquet-thrift/JsonType.js +45 -31
  65. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  66. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +79 -68
  67. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  68. package/dist/es5/parquetjs/parquet-thrift/ListType.js +45 -31
  69. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
  70. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +343 -319
  71. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  72. package/dist/es5/parquetjs/parquet-thrift/MapType.js +45 -31
  73. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
  74. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +45 -31
  75. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  76. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +45 -31
  77. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  78. package/dist/es5/parquetjs/parquet-thrift/NullType.js +45 -31
  79. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
  80. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +75 -64
  81. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  82. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +94 -83
  83. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  84. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +169 -158
  85. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  86. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +94 -83
  87. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  88. package/dist/es5/parquetjs/parquet-thrift/PageType.js +1 -1
  89. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +124 -113
  90. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  91. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +199 -188
  92. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  93. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +94 -83
  94. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  95. package/dist/es5/parquetjs/parquet-thrift/Statistics.js +135 -124
  96. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  97. package/dist/es5/parquetjs/parquet-thrift/StringType.js +45 -31
  98. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
  99. package/dist/es5/parquetjs/parquet-thrift/TimeType.js +79 -68
  100. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  101. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +101 -88
  102. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  103. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +79 -68
  104. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  105. package/dist/es5/parquetjs/parquet-thrift/Type.js +1 -1
  106. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +45 -31
  107. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  108. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +45 -31
  109. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  110. package/dist/es5/parquetjs/parquet-thrift/index.js +43 -43
  111. package/dist/es5/parquetjs/parser/decoders.js +391 -218
  112. package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
  113. package/dist/es5/parquetjs/parser/parquet-cursor.js +180 -62
  114. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -1
  115. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +370 -125
  116. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
  117. package/dist/es5/parquetjs/parser/parquet-reader.js +320 -91
  118. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  119. package/dist/es5/parquetjs/schema/declare.js +11 -9
  120. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  121. package/dist/es5/parquetjs/schema/schema.js +87 -73
  122. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  123. package/dist/es5/parquetjs/schema/shred.js +96 -56
  124. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  125. package/dist/es5/parquetjs/schema/types.js +40 -39
  126. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  127. package/dist/es5/parquetjs/utils/buffer-utils.js +1 -1
  128. package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -1
  129. package/dist/es5/parquetjs/utils/file-utils.js +12 -8
  130. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  131. package/dist/es5/parquetjs/utils/read-utils.js +50 -22
  132. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
  133. package/dist/esm/parquet-loader.js +1 -1
  134. package/dist/esm/parquet-loader.js.map +1 -1
  135. package/dist/esm/parquet-writer.js +1 -1
  136. package/dist/esm/parquet-writer.js.map +1 -1
  137. package/package.json +5 -5
@@ -2,11 +2,29 @@
2
2
 
3
3
  var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
4
 
5
+ var _typeof = require("@babel/runtime/helpers/typeof");
6
+
5
7
  Object.defineProperty(exports, "__esModule", {
6
8
  value: true
7
9
  });
8
10
  exports.ParquetTransformer = exports.ParquetEnvelopeWriter = exports.ParquetWriter = void 0;
9
11
 
12
+ var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
13
+
14
+ var _assertThisInitialized2 = _interopRequireDefault(require("@babel/runtime/helpers/assertThisInitialized"));
15
+
16
+ var _inherits2 = _interopRequireDefault(require("@babel/runtime/helpers/inherits"));
17
+
18
+ var _possibleConstructorReturn2 = _interopRequireDefault(require("@babel/runtime/helpers/possibleConstructorReturn"));
19
+
20
+ var _getPrototypeOf2 = _interopRequireDefault(require("@babel/runtime/helpers/getPrototypeOf"));
21
+
22
+ var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
23
+
24
+ var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
25
+
26
+ var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
27
+
10
28
  var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
11
29
 
12
30
  var _stream = require("stream");
@@ -25,33 +43,30 @@ var _readUtils = require("../utils/read-utils");
25
43
 
26
44
  var _nodeInt = _interopRequireDefault(require("node-int64"));
27
45
 
28
- function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function (nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
46
+ function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
29
47
 
30
- function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || typeof obj !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
48
+ function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
31
49
 
32
- const PARQUET_MAGIC = 'PAR1';
33
- const PARQUET_VERSION = 1;
34
- const PARQUET_DEFAULT_PAGE_SIZE = 8192;
35
- const PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
36
- const PARQUET_RDLVL_TYPE = 'INT32';
37
- const PARQUET_RDLVL_ENCODING = 'RLE';
50
+ function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
38
51
 
39
- class ParquetWriter {
40
- static async openFile(schema, path, opts) {
41
- const outputStream = await (0, _fileUtils.osopen)(path, opts);
42
- return ParquetWriter.openStream(schema, outputStream, opts);
43
- }
52
+ function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
44
53
 
45
- static async openStream(schema, outputStream, opts) {
46
- if (!opts) {
47
- opts = {};
48
- }
54
+ function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
49
55
 
50
- const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
51
- return new ParquetWriter(schema, envelopeWriter, opts);
52
- }
56
+ function _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = (0, _getPrototypeOf2.default)(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = (0, _getPrototypeOf2.default)(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return (0, _possibleConstructorReturn2.default)(this, result); }; }
57
+
58
+ function _isNativeReflectConstruct() { if (typeof Reflect === "undefined" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === "function") return true; try { Boolean.prototype.valueOf.call(Reflect.construct(Boolean, [], function () {})); return true; } catch (e) { return false; } }
53
59
 
54
- constructor(schema, envelopeWriter, opts) {
60
+ var PARQUET_MAGIC = 'PAR1';
61
+ var PARQUET_VERSION = 1;
62
+ var PARQUET_DEFAULT_PAGE_SIZE = 8192;
63
+ var PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
64
+ var PARQUET_RDLVL_TYPE = 'INT32';
65
+ var PARQUET_RDLVL_ENCODING = 'RLE';
66
+
67
+ var ParquetWriter = function () {
68
+ function ParquetWriter(schema, envelopeWriter, opts) {
69
+ (0, _classCallCheck2.default)(this, ParquetWriter);
55
70
  (0, _defineProperty2.default)(this, "schema", void 0);
56
71
  (0, _defineProperty2.default)(this, "envelopeWriter", void 0);
57
72
  (0, _defineProperty2.default)(this, "rowBuffer", void 0);
@@ -67,72 +82,217 @@ class ParquetWriter {
67
82
  this.writeHeader();
68
83
  }
69
84
 
70
- async writeHeader() {
71
- try {
72
- await this.envelopeWriter.writeHeader();
73
- } catch (err) {
74
- await this.envelopeWriter.close();
75
- throw err;
85
+ (0, _createClass2.default)(ParquetWriter, [{
86
+ key: "writeHeader",
87
+ value: function () {
88
+ var _writeHeader = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee() {
89
+ return _regenerator.default.wrap(function _callee$(_context) {
90
+ while (1) {
91
+ switch (_context.prev = _context.next) {
92
+ case 0:
93
+ _context.prev = 0;
94
+ _context.next = 3;
95
+ return this.envelopeWriter.writeHeader();
96
+
97
+ case 3:
98
+ _context.next = 10;
99
+ break;
100
+
101
+ case 5:
102
+ _context.prev = 5;
103
+ _context.t0 = _context["catch"](0);
104
+ _context.next = 9;
105
+ return this.envelopeWriter.close();
106
+
107
+ case 9:
108
+ throw _context.t0;
109
+
110
+ case 10:
111
+ case "end":
112
+ return _context.stop();
113
+ }
114
+ }
115
+ }, _callee, this, [[0, 5]]);
116
+ }));
117
+
118
+ function writeHeader() {
119
+ return _writeHeader.apply(this, arguments);
120
+ }
121
+
122
+ return writeHeader;
123
+ }()
124
+ }, {
125
+ key: "appendRow",
126
+ value: function () {
127
+ var _appendRow = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(row) {
128
+ return _regenerator.default.wrap(function _callee2$(_context2) {
129
+ while (1) {
130
+ switch (_context2.prev = _context2.next) {
131
+ case 0:
132
+ if (!this.closed) {
133
+ _context2.next = 2;
134
+ break;
135
+ }
136
+
137
+ throw new Error('writer was closed');
138
+
139
+ case 2:
140
+ Shred.shredRecord(this.schema, row, this.rowBuffer);
141
+
142
+ if (this.rowBuffer.rowCount >= this.rowGroupSize) {
143
+ this.rowBuffer = {};
144
+ }
145
+
146
+ case 4:
147
+ case "end":
148
+ return _context2.stop();
149
+ }
150
+ }
151
+ }, _callee2, this);
152
+ }));
153
+
154
+ function appendRow(_x) {
155
+ return _appendRow.apply(this, arguments);
156
+ }
157
+
158
+ return appendRow;
159
+ }()
160
+ }, {
161
+ key: "close",
162
+ value: function () {
163
+ var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(callback) {
164
+ return _regenerator.default.wrap(function _callee3$(_context3) {
165
+ while (1) {
166
+ switch (_context3.prev = _context3.next) {
167
+ case 0:
168
+ if (!this.closed) {
169
+ _context3.next = 2;
170
+ break;
171
+ }
172
+
173
+ throw new Error('writer was closed');
174
+
175
+ case 2:
176
+ this.closed = true;
177
+
178
+ if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
179
+ this.rowBuffer = {};
180
+ }
181
+
182
+ _context3.next = 6;
183
+ return this.envelopeWriter.writeFooter(this.userMetadata);
184
+
185
+ case 6:
186
+ _context3.next = 8;
187
+ return this.envelopeWriter.close();
188
+
189
+ case 8:
190
+ if (callback) {
191
+ callback();
192
+ }
193
+
194
+ case 9:
195
+ case "end":
196
+ return _context3.stop();
197
+ }
198
+ }
199
+ }, _callee3, this);
200
+ }));
201
+
202
+ function close(_x2) {
203
+ return _close.apply(this, arguments);
204
+ }
205
+
206
+ return close;
207
+ }()
208
+ }, {
209
+ key: "setMetadata",
210
+ value: function setMetadata(key, value) {
211
+ this.userMetadata[String(key)] = String(value);
76
212
  }
77
- }
78
-
79
- async appendRow(row) {
80
- if (this.closed) {
81
- throw new Error('writer was closed');
213
+ }, {
214
+ key: "setRowGroupSize",
215
+ value: function setRowGroupSize(cnt) {
216
+ this.rowGroupSize = cnt;
82
217
  }
83
-
84
- Shred.shredRecord(this.schema, row, this.rowBuffer);
85
-
86
- if (this.rowBuffer.rowCount >= this.rowGroupSize) {
87
- this.rowBuffer = {};
218
+ }, {
219
+ key: "setPageSize",
220
+ value: function setPageSize(cnt) {
221
+ this.envelopeWriter.setPageSize(cnt);
88
222
  }
89
- }
90
-
91
- async close(callback) {
92
- if (this.closed) {
93
- throw new Error('writer was closed');
94
- }
95
-
96
- this.closed = true;
97
-
98
- if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
99
- this.rowBuffer = {};
100
- }
101
-
102
- await this.envelopeWriter.writeFooter(this.userMetadata);
103
- await this.envelopeWriter.close();
104
-
105
- if (callback) {
106
- callback();
107
- }
108
- }
109
-
110
- setMetadata(key, value) {
111
- this.userMetadata[String(key)] = String(value);
112
- }
113
-
114
- setRowGroupSize(cnt) {
115
- this.rowGroupSize = cnt;
116
- }
117
-
118
- setPageSize(cnt) {
119
- this.envelopeWriter.setPageSize(cnt);
120
- }
121
-
122
- }
223
+ }], [{
224
+ key: "openFile",
225
+ value: function () {
226
+ var _openFile = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4(schema, path, opts) {
227
+ var outputStream;
228
+ return _regenerator.default.wrap(function _callee4$(_context4) {
229
+ while (1) {
230
+ switch (_context4.prev = _context4.next) {
231
+ case 0:
232
+ _context4.next = 2;
233
+ return (0, _fileUtils.osopen)(path, opts);
234
+
235
+ case 2:
236
+ outputStream = _context4.sent;
237
+ return _context4.abrupt("return", ParquetWriter.openStream(schema, outputStream, opts));
238
+
239
+ case 4:
240
+ case "end":
241
+ return _context4.stop();
242
+ }
243
+ }
244
+ }, _callee4);
245
+ }));
246
+
247
+ function openFile(_x3, _x4, _x5) {
248
+ return _openFile.apply(this, arguments);
249
+ }
250
+
251
+ return openFile;
252
+ }()
253
+ }, {
254
+ key: "openStream",
255
+ value: function () {
256
+ var _openStream = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(schema, outputStream, opts) {
257
+ var envelopeWriter;
258
+ return _regenerator.default.wrap(function _callee5$(_context5) {
259
+ while (1) {
260
+ switch (_context5.prev = _context5.next) {
261
+ case 0:
262
+ if (!opts) {
263
+ opts = {};
264
+ }
265
+
266
+ _context5.next = 3;
267
+ return ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
268
+
269
+ case 3:
270
+ envelopeWriter = _context5.sent;
271
+ return _context5.abrupt("return", new ParquetWriter(schema, envelopeWriter, opts));
272
+
273
+ case 5:
274
+ case "end":
275
+ return _context5.stop();
276
+ }
277
+ }
278
+ }, _callee5);
279
+ }));
280
+
281
+ function openStream(_x6, _x7, _x8) {
282
+ return _openStream.apply(this, arguments);
283
+ }
284
+
285
+ return openStream;
286
+ }()
287
+ }]);
288
+ return ParquetWriter;
289
+ }();
123
290
 
124
291
  exports.ParquetWriter = ParquetWriter;
125
292
 
126
- class ParquetEnvelopeWriter {
127
- static async openStream(schema, outputStream, opts) {
128
- const writeFn = _fileUtils.oswrite.bind(undefined, outputStream);
129
-
130
- const closeFn = _fileUtils.osclose.bind(undefined, outputStream);
131
-
132
- return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
133
- }
134
-
135
- constructor(schema, writeFn, closeFn, fileOffset, opts) {
293
+ var ParquetEnvelopeWriter = function () {
294
+ function ParquetEnvelopeWriter(schema, writeFn, closeFn, fileOffset, opts) {
295
+ (0, _classCallCheck2.default)(this, ParquetEnvelopeWriter);
136
296
  (0, _defineProperty2.default)(this, "schema", void 0);
137
297
  (0, _defineProperty2.default)(this, "write", void 0);
138
298
  (0, _defineProperty2.default)(this, "close", void 0);
@@ -151,72 +311,194 @@ class ParquetEnvelopeWriter {
151
311
  this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
152
312
  }
153
313
 
154
- writeSection(buf) {
155
- this.offset += buf.length;
156
- return this.write(buf);
157
- }
158
-
159
- writeHeader() {
160
- return this.writeSection(Buffer.from(PARQUET_MAGIC));
161
- }
162
-
163
- async writeRowGroup(records) {
164
- const rgroup = await encodeRowGroup(this.schema, records, {
165
- baseOffset: this.offset,
166
- pageSize: this.pageSize,
167
- useDataPageV2: this.useDataPageV2
168
- });
169
- this.rowCount += records.rowCount;
170
- this.rowGroups.push(rgroup.metadata);
171
- return await this.writeSection(rgroup.body);
172
- }
173
-
174
- writeFooter(userMetadata) {
175
- if (!userMetadata) {
176
- userMetadata = {};
314
+ (0, _createClass2.default)(ParquetEnvelopeWriter, [{
315
+ key: "writeSection",
316
+ value: function writeSection(buf) {
317
+ this.offset += buf.length;
318
+ return this.write(buf);
319
+ }
320
+ }, {
321
+ key: "writeHeader",
322
+ value: function writeHeader() {
323
+ return this.writeSection(Buffer.from(PARQUET_MAGIC));
324
+ }
325
+ }, {
326
+ key: "writeRowGroup",
327
+ value: function () {
328
+ var _writeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6(records) {
329
+ var rgroup;
330
+ return _regenerator.default.wrap(function _callee6$(_context6) {
331
+ while (1) {
332
+ switch (_context6.prev = _context6.next) {
333
+ case 0:
334
+ _context6.next = 2;
335
+ return encodeRowGroup(this.schema, records, {
336
+ baseOffset: this.offset,
337
+ pageSize: this.pageSize,
338
+ useDataPageV2: this.useDataPageV2
339
+ });
340
+
341
+ case 2:
342
+ rgroup = _context6.sent;
343
+ this.rowCount += records.rowCount;
344
+ this.rowGroups.push(rgroup.metadata);
345
+ _context6.next = 7;
346
+ return this.writeSection(rgroup.body);
347
+
348
+ case 7:
349
+ return _context6.abrupt("return", _context6.sent);
350
+
351
+ case 8:
352
+ case "end":
353
+ return _context6.stop();
354
+ }
355
+ }
356
+ }, _callee6, this);
357
+ }));
358
+
359
+ function writeRowGroup(_x9) {
360
+ return _writeRowGroup.apply(this, arguments);
361
+ }
362
+
363
+ return writeRowGroup;
364
+ }()
365
+ }, {
366
+ key: "writeFooter",
367
+ value: function writeFooter(userMetadata) {
368
+ if (!userMetadata) {
369
+ userMetadata = {};
370
+ }
371
+
372
+ return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
373
+ }
374
+ }, {
375
+ key: "setPageSize",
376
+ value: function setPageSize(cnt) {
377
+ this.pageSize = cnt;
177
378
  }
379
+ }], [{
380
+ key: "openStream",
381
+ value: function () {
382
+ var _openStream2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7(schema, outputStream, opts) {
383
+ var writeFn, closeFn;
384
+ return _regenerator.default.wrap(function _callee7$(_context7) {
385
+ while (1) {
386
+ switch (_context7.prev = _context7.next) {
387
+ case 0:
388
+ writeFn = _fileUtils.oswrite.bind(undefined, outputStream);
389
+ closeFn = _fileUtils.osclose.bind(undefined, outputStream);
390
+ return _context7.abrupt("return", new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts));
391
+
392
+ case 3:
393
+ case "end":
394
+ return _context7.stop();
395
+ }
396
+ }
397
+ }, _callee7);
398
+ }));
399
+
400
+ function openStream(_x10, _x11, _x12) {
401
+ return _openStream2.apply(this, arguments);
402
+ }
403
+
404
+ return openStream;
405
+ }()
406
+ }]);
407
+ return ParquetEnvelopeWriter;
408
+ }();
178
409
 
179
- return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
180
- }
410
+ exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
181
411
 
182
- setPageSize(cnt) {
183
- this.pageSize = cnt;
184
- }
412
+ var ParquetTransformer = function (_Transform) {
413
+ (0, _inherits2.default)(ParquetTransformer, _Transform);
185
414
 
186
- }
415
+ var _super = _createSuper(ParquetTransformer);
187
416
 
188
- exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
417
+ function ParquetTransformer(schema) {
418
+ var _this;
189
419
 
190
- class ParquetTransformer extends _stream.Transform {
191
- constructor(schema, opts = {}) {
192
- super({
420
+ var opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
421
+ (0, _classCallCheck2.default)(this, ParquetTransformer);
422
+ _this = _super.call(this, {
193
423
  objectMode: true
194
424
  });
195
- (0, _defineProperty2.default)(this, "writer", void 0);
196
-
197
- const writeProxy = function (t) {
198
- return async function (b) {
199
- t.push(b);
200
- };
201
- }(this);
202
-
203
- this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, async () => {}, 0, opts), opts);
425
+ (0, _defineProperty2.default)((0, _assertThisInitialized2.default)(_this), "writer", void 0);
426
+
427
+ var writeProxy = function (t) {
428
+ return function () {
429
+ var _ref = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8(b) {
430
+ return _regenerator.default.wrap(function _callee8$(_context8) {
431
+ while (1) {
432
+ switch (_context8.prev = _context8.next) {
433
+ case 0:
434
+ t.push(b);
435
+
436
+ case 1:
437
+ case "end":
438
+ return _context8.stop();
439
+ }
440
+ }
441
+ }, _callee8);
442
+ }));
443
+
444
+ return function (_x13) {
445
+ return _ref.apply(this, arguments);
446
+ };
447
+ }();
448
+ }((0, _assertThisInitialized2.default)(_this));
449
+
450
+ _this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9() {
451
+ return _regenerator.default.wrap(function _callee9$(_context9) {
452
+ while (1) {
453
+ switch (_context9.prev = _context9.next) {
454
+ case 0:
455
+ case "end":
456
+ return _context9.stop();
457
+ }
458
+ }
459
+ }, _callee9);
460
+ })), 0, opts), opts);
461
+ return _this;
204
462
  }
205
463
 
206
- _transform(row, encoding, callback) {
207
- if (row) {
208
- return this.writer.appendRow(row).then(callback);
209
- }
210
-
211
- callback();
212
- return Promise.resolve();
213
- }
464
+ (0, _createClass2.default)(ParquetTransformer, [{
465
+ key: "_transform",
466
+ value: function _transform(row, encoding, callback) {
467
+ if (row) {
468
+ return this.writer.appendRow(row).then(callback);
469
+ }
214
470
 
215
- async _flush(callback) {
216
- await this.writer.close(callback);
217
- }
218
-
219
- }
471
+ callback();
472
+ return Promise.resolve();
473
+ }
474
+ }, {
475
+ key: "_flush",
476
+ value: function () {
477
+ var _flush2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee10(callback) {
478
+ return _regenerator.default.wrap(function _callee10$(_context10) {
479
+ while (1) {
480
+ switch (_context10.prev = _context10.next) {
481
+ case 0:
482
+ _context10.next = 2;
483
+ return this.writer.close(callback);
484
+
485
+ case 2:
486
+ case "end":
487
+ return _context10.stop();
488
+ }
489
+ }
490
+ }, _callee10, this);
491
+ }));
492
+
493
+ function _flush(_x14) {
494
+ return _flush2.apply(this, arguments);
495
+ }
496
+
497
+ return _flush;
498
+ }()
499
+ }]);
500
+ return ParquetTransformer;
501
+ }(_stream.Transform);
220
502
 
221
503
  exports.ParquetTransformer = ParquetTransformer;
222
504
 
@@ -228,160 +510,304 @@ function encodeValues(type, encoding, values, opts) {
228
510
  return _codecs.PARQUET_CODECS[encoding].encodeValues(type, values, opts);
229
511
  }
230
512
 
231
- async function encodeDataPage(column, data) {
232
- let rLevelsBuf = Buffer.alloc(0);
233
-
234
- if (column.rLevelMax > 0) {
235
- rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
236
- bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax)
237
- });
238
- }
239
-
240
- let dLevelsBuf = Buffer.alloc(0);
241
-
242
- if (column.dLevelMax > 0) {
243
- dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
244
- bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax)
245
- });
246
- }
247
-
248
- const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
249
- typeLength: column.typeLength,
250
- bitWidth: column.typeLength
251
- });
252
- const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
253
- const compressedBuf = await Compression.deflate(column.compression, dataBuf);
254
- const header = new _parquetThrift.PageHeader({
255
- type: _parquetThrift.PageType.DATA_PAGE,
256
- data_page_header: new _parquetThrift.DataPageHeader({
257
- num_values: data.count,
258
- encoding: _parquetThrift.Encoding[column.encoding],
259
- definition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING],
260
- repetition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]
261
- }),
262
- uncompressed_page_size: dataBuf.length,
263
- compressed_page_size: compressedBuf.length
264
- });
265
- const headerBuf = (0, _readUtils.serializeThrift)(header);
266
- const page = Buffer.concat([headerBuf, compressedBuf]);
267
- return {
268
- header,
269
- headerSize: headerBuf.length,
270
- page
271
- };
513
+ function encodeDataPage(_x15, _x16) {
514
+ return _encodeDataPage.apply(this, arguments);
272
515
  }
273
516
 
274
- async function encodeDataPageV2(column, data, rowCount) {
275
- const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
276
- typeLength: column.typeLength,
277
- bitWidth: column.typeLength
278
- });
279
- const compressedBuf = await Compression.deflate(column.compression, valuesBuf);
280
- let rLevelsBuf = Buffer.alloc(0);
281
-
282
- if (column.rLevelMax > 0) {
283
- rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
284
- bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax),
285
- disableEnvelope: true
286
- });
287
- }
288
-
289
- let dLevelsBuf = Buffer.alloc(0);
290
-
291
- if (column.dLevelMax > 0) {
292
- dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
293
- bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax),
294
- disableEnvelope: true
295
- });
296
- }
517
+ function _encodeDataPage() {
518
+ _encodeDataPage = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee11(column, data) {
519
+ var rLevelsBuf, dLevelsBuf, valuesBuf, dataBuf, compressedBuf, header, headerBuf, page;
520
+ return _regenerator.default.wrap(function _callee11$(_context11) {
521
+ while (1) {
522
+ switch (_context11.prev = _context11.next) {
523
+ case 0:
524
+ rLevelsBuf = Buffer.alloc(0);
525
+
526
+ if (column.rLevelMax > 0) {
527
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
528
+ bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax)
529
+ });
530
+ }
531
+
532
+ dLevelsBuf = Buffer.alloc(0);
533
+
534
+ if (column.dLevelMax > 0) {
535
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
536
+ bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax)
537
+ });
538
+ }
539
+
540
+ valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
541
+ typeLength: column.typeLength,
542
+ bitWidth: column.typeLength
543
+ });
544
+ dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
545
+ _context11.next = 8;
546
+ return Compression.deflate(column.compression, dataBuf);
547
+
548
+ case 8:
549
+ compressedBuf = _context11.sent;
550
+ header = new _parquetThrift.PageHeader({
551
+ type: _parquetThrift.PageType.DATA_PAGE,
552
+ data_page_header: new _parquetThrift.DataPageHeader({
553
+ num_values: data.count,
554
+ encoding: _parquetThrift.Encoding[column.encoding],
555
+ definition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING],
556
+ repetition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]
557
+ }),
558
+ uncompressed_page_size: dataBuf.length,
559
+ compressed_page_size: compressedBuf.length
560
+ });
561
+ headerBuf = (0, _readUtils.serializeThrift)(header);
562
+ page = Buffer.concat([headerBuf, compressedBuf]);
563
+ return _context11.abrupt("return", {
564
+ header: header,
565
+ headerSize: headerBuf.length,
566
+ page: page
567
+ });
568
+
569
+ case 13:
570
+ case "end":
571
+ return _context11.stop();
572
+ }
573
+ }
574
+ }, _callee11);
575
+ }));
576
+ return _encodeDataPage.apply(this, arguments);
577
+ }
297
578
 
298
- const header = new _parquetThrift.PageHeader({
299
- type: _parquetThrift.PageType.DATA_PAGE_V2,
300
- data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
301
- num_values: data.count,
302
- num_nulls: data.count - data.values.length,
303
- num_rows: rowCount,
304
- encoding: _parquetThrift.Encoding[column.encoding],
305
- definition_levels_byte_length: dLevelsBuf.length,
306
- repetition_levels_byte_length: rLevelsBuf.length,
307
- is_compressed: column.compression !== 'UNCOMPRESSED'
308
- }),
309
- uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
310
- compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
311
- });
312
- const headerBuf = (0, _readUtils.serializeThrift)(header);
313
- const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
314
- return {
315
- header,
316
- headerSize: headerBuf.length,
317
- page
318
- };
579
+ function encodeDataPageV2(_x17, _x18, _x19) {
580
+ return _encodeDataPageV.apply(this, arguments);
319
581
  }
320
582
 
321
- async function encodeColumnChunk(column, buffer, offset, opts) {
322
- const data = buffer.columnData[column.path.join()];
323
- const baseOffset = (opts.baseOffset || 0) + offset;
324
- let pageBuf;
325
- let total_uncompressed_size = 0;
326
- let total_compressed_size = 0;
327
- {
328
- const result = opts.useDataPageV2 ? await encodeDataPageV2(column, data, buffer.rowCount) : await encodeDataPage(column, data);
329
- pageBuf = result.page;
330
- total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
331
- total_compressed_size += result.header.compressed_page_size + result.headerSize;
332
- }
333
- const metadata = new _parquetThrift.ColumnMetaData({
334
- path_in_schema: column.path,
335
- num_values: data.count,
336
- data_page_offset: baseOffset,
337
- encodings: [],
338
- total_uncompressed_size,
339
- total_compressed_size,
340
- type: _parquetThrift.Type[column.primitiveType],
341
- codec: _parquetThrift.CompressionCodec[column.compression]
342
- });
343
- metadata.encodings.push(_parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]);
344
- metadata.encodings.push(_parquetThrift.Encoding[column.encoding]);
345
- const metadataOffset = baseOffset + pageBuf.length;
346
- const body = Buffer.concat([pageBuf, (0, _readUtils.serializeThrift)(metadata)]);
347
- return {
348
- body,
349
- metadata,
350
- metadataOffset
351
- };
583
+ function _encodeDataPageV() {
584
+ _encodeDataPageV = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee12(column, data, rowCount) {
585
+ var valuesBuf, compressedBuf, rLevelsBuf, dLevelsBuf, header, headerBuf, page;
586
+ return _regenerator.default.wrap(function _callee12$(_context12) {
587
+ while (1) {
588
+ switch (_context12.prev = _context12.next) {
589
+ case 0:
590
+ valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
591
+ typeLength: column.typeLength,
592
+ bitWidth: column.typeLength
593
+ });
594
+ _context12.next = 3;
595
+ return Compression.deflate(column.compression, valuesBuf);
596
+
597
+ case 3:
598
+ compressedBuf = _context12.sent;
599
+ rLevelsBuf = Buffer.alloc(0);
600
+
601
+ if (column.rLevelMax > 0) {
602
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
603
+ bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax),
604
+ disableEnvelope: true
605
+ });
606
+ }
607
+
608
+ dLevelsBuf = Buffer.alloc(0);
609
+
610
+ if (column.dLevelMax > 0) {
611
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
612
+ bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax),
613
+ disableEnvelope: true
614
+ });
615
+ }
616
+
617
+ header = new _parquetThrift.PageHeader({
618
+ type: _parquetThrift.PageType.DATA_PAGE_V2,
619
+ data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
620
+ num_values: data.count,
621
+ num_nulls: data.count - data.values.length,
622
+ num_rows: rowCount,
623
+ encoding: _parquetThrift.Encoding[column.encoding],
624
+ definition_levels_byte_length: dLevelsBuf.length,
625
+ repetition_levels_byte_length: rLevelsBuf.length,
626
+ is_compressed: column.compression !== 'UNCOMPRESSED'
627
+ }),
628
+ uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
629
+ compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
630
+ });
631
+ headerBuf = (0, _readUtils.serializeThrift)(header);
632
+ page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
633
+ return _context12.abrupt("return", {
634
+ header: header,
635
+ headerSize: headerBuf.length,
636
+ page: page
637
+ });
638
+
639
+ case 12:
640
+ case "end":
641
+ return _context12.stop();
642
+ }
643
+ }
644
+ }, _callee12);
645
+ }));
646
+ return _encodeDataPageV.apply(this, arguments);
352
647
  }
353
648
 
354
- async function encodeRowGroup(schema, data, opts) {
355
- const metadata = new _parquetThrift.RowGroup({
356
- num_rows: data.rowCount,
357
- columns: [],
358
- total_byte_size: 0
359
- });
360
- let body = Buffer.alloc(0);
649
+ function encodeColumnChunk(_x20, _x21, _x22, _x23) {
650
+ return _encodeColumnChunk.apply(this, arguments);
651
+ }
361
652
 
362
- for (const field of schema.fieldList) {
363
- if (field.isNested) {
364
- continue;
365
- }
653
+ function _encodeColumnChunk() {
654
+ _encodeColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee13(column, buffer, offset, opts) {
655
+ var data, baseOffset, pageBuf, total_uncompressed_size, total_compressed_size, result, metadata, metadataOffset, body;
656
+ return _regenerator.default.wrap(function _callee13$(_context13) {
657
+ while (1) {
658
+ switch (_context13.prev = _context13.next) {
659
+ case 0:
660
+ data = buffer.columnData[column.path.join()];
661
+ baseOffset = (opts.baseOffset || 0) + offset;
662
+ total_uncompressed_size = 0;
663
+ total_compressed_size = 0;
664
+
665
+ if (!opts.useDataPageV2) {
666
+ _context13.next = 10;
667
+ break;
668
+ }
669
+
670
+ _context13.next = 7;
671
+ return encodeDataPageV2(column, data, buffer.rowCount);
672
+
673
+ case 7:
674
+ _context13.t0 = _context13.sent;
675
+ _context13.next = 13;
676
+ break;
677
+
678
+ case 10:
679
+ _context13.next = 12;
680
+ return encodeDataPage(column, data);
681
+
682
+ case 12:
683
+ _context13.t0 = _context13.sent;
684
+
685
+ case 13:
686
+ result = _context13.t0;
687
+ pageBuf = result.page;
688
+ total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
689
+ total_compressed_size += result.header.compressed_page_size + result.headerSize;
690
+ metadata = new _parquetThrift.ColumnMetaData({
691
+ path_in_schema: column.path,
692
+ num_values: data.count,
693
+ data_page_offset: baseOffset,
694
+ encodings: [],
695
+ total_uncompressed_size: total_uncompressed_size,
696
+ total_compressed_size: total_compressed_size,
697
+ type: _parquetThrift.Type[column.primitiveType],
698
+ codec: _parquetThrift.CompressionCodec[column.compression]
699
+ });
700
+ metadata.encodings.push(_parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]);
701
+ metadata.encodings.push(_parquetThrift.Encoding[column.encoding]);
702
+ metadataOffset = baseOffset + pageBuf.length;
703
+ body = Buffer.concat([pageBuf, (0, _readUtils.serializeThrift)(metadata)]);
704
+ return _context13.abrupt("return", {
705
+ body: body,
706
+ metadata: metadata,
707
+ metadataOffset: metadataOffset
708
+ });
709
+
710
+ case 23:
711
+ case "end":
712
+ return _context13.stop();
713
+ }
714
+ }
715
+ }, _callee13);
716
+ }));
717
+ return _encodeColumnChunk.apply(this, arguments);
718
+ }
366
719
 
367
- const cchunkData = await encodeColumnChunk(field, data, body.length, opts);
368
- const cchunk = new _parquetThrift.ColumnChunk({
369
- file_offset: cchunkData.metadataOffset,
370
- meta_data: cchunkData.metadata
371
- });
372
- metadata.columns.push(cchunk);
373
- metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
374
- body = Buffer.concat([body, cchunkData.body]);
375
- }
720
+ function encodeRowGroup(_x24, _x25, _x26) {
721
+ return _encodeRowGroup.apply(this, arguments);
722
+ }
376
723
 
377
- return {
378
- body,
379
- metadata
380
- };
724
+ function _encodeRowGroup() {
725
+ _encodeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee14(schema, data, opts) {
726
+ var metadata, body, _iterator2, _step2, field, cchunkData, cchunk;
727
+
728
+ return _regenerator.default.wrap(function _callee14$(_context14) {
729
+ while (1) {
730
+ switch (_context14.prev = _context14.next) {
731
+ case 0:
732
+ metadata = new _parquetThrift.RowGroup({
733
+ num_rows: data.rowCount,
734
+ columns: [],
735
+ total_byte_size: 0
736
+ });
737
+ body = Buffer.alloc(0);
738
+ _iterator2 = _createForOfIteratorHelper(schema.fieldList);
739
+ _context14.prev = 3;
740
+
741
+ _iterator2.s();
742
+
743
+ case 5:
744
+ if ((_step2 = _iterator2.n()).done) {
745
+ _context14.next = 18;
746
+ break;
747
+ }
748
+
749
+ field = _step2.value;
750
+
751
+ if (!field.isNested) {
752
+ _context14.next = 9;
753
+ break;
754
+ }
755
+
756
+ return _context14.abrupt("continue", 16);
757
+
758
+ case 9:
759
+ _context14.next = 11;
760
+ return encodeColumnChunk(field, data, body.length, opts);
761
+
762
+ case 11:
763
+ cchunkData = _context14.sent;
764
+ cchunk = new _parquetThrift.ColumnChunk({
765
+ file_offset: cchunkData.metadataOffset,
766
+ meta_data: cchunkData.metadata
767
+ });
768
+ metadata.columns.push(cchunk);
769
+ metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
770
+ body = Buffer.concat([body, cchunkData.body]);
771
+
772
+ case 16:
773
+ _context14.next = 5;
774
+ break;
775
+
776
+ case 18:
777
+ _context14.next = 23;
778
+ break;
779
+
780
+ case 20:
781
+ _context14.prev = 20;
782
+ _context14.t0 = _context14["catch"](3);
783
+
784
+ _iterator2.e(_context14.t0);
785
+
786
+ case 23:
787
+ _context14.prev = 23;
788
+
789
+ _iterator2.f();
790
+
791
+ return _context14.finish(23);
792
+
793
+ case 26:
794
+ return _context14.abrupt("return", {
795
+ body: body,
796
+ metadata: metadata
797
+ });
798
+
799
+ case 27:
800
+ case "end":
801
+ return _context14.stop();
802
+ }
803
+ }
804
+ }, _callee14, null, [[3, 20, 23, 26]]);
805
+ }));
806
+ return _encodeRowGroup.apply(this, arguments);
381
807
  }
382
808
 
383
809
  function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
384
- const metadata = new _parquetThrift.FileMetaData({
810
+ var metadata = new _parquetThrift.FileMetaData({
385
811
  version: PARQUET_VERSION,
386
812
  created_by: 'parquets',
387
813
  num_rows: rowCount,
@@ -390,47 +816,57 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
390
816
  key_value_metadata: []
391
817
  });
392
818
 
393
- for (const key in userMetadata) {
819
+ for (var key in userMetadata) {
394
820
  var _metadata$key_value_m, _metadata$key_value_m2;
395
821
 
396
- const kv = new _parquetThrift.KeyValue({
397
- key,
822
+ var kv = new _parquetThrift.KeyValue({
823
+ key: key,
398
824
  value: userMetadata[key]
399
825
  });
400
826
  (_metadata$key_value_m = metadata.key_value_metadata) === null || _metadata$key_value_m === void 0 ? void 0 : (_metadata$key_value_m2 = _metadata$key_value_m.push) === null || _metadata$key_value_m2 === void 0 ? void 0 : _metadata$key_value_m2.call(_metadata$key_value_m, kv);
401
827
  }
402
828
 
403
829
  {
404
- const schemaRoot = new _parquetThrift.SchemaElement({
830
+ var schemaRoot = new _parquetThrift.SchemaElement({
405
831
  name: 'root',
406
832
  num_children: Object.keys(schema.fields).length
407
833
  });
408
834
  metadata.schema.push(schemaRoot);
409
835
  }
410
836
 
411
- for (const field of schema.fieldList) {
412
- const relt = _parquetThrift.FieldRepetitionType[field.repetitionType];
413
- const schemaElem = new _parquetThrift.SchemaElement({
414
- name: field.name,
415
- repetition_type: relt
416
- });
417
-
418
- if (field.isNested) {
419
- schemaElem.num_children = field.fieldCount;
420
- } else {
421
- schemaElem.type = _parquetThrift.Type[field.primitiveType];
837
+ var _iterator = _createForOfIteratorHelper(schema.fieldList),
838
+ _step;
839
+
840
+ try {
841
+ for (_iterator.s(); !(_step = _iterator.n()).done;) {
842
+ var field = _step.value;
843
+ var relt = _parquetThrift.FieldRepetitionType[field.repetitionType];
844
+ var schemaElem = new _parquetThrift.SchemaElement({
845
+ name: field.name,
846
+ repetition_type: relt
847
+ });
848
+
849
+ if (field.isNested) {
850
+ schemaElem.num_children = field.fieldCount;
851
+ } else {
852
+ schemaElem.type = _parquetThrift.Type[field.primitiveType];
853
+ }
854
+
855
+ if (field.originalType) {
856
+ schemaElem.converted_type = _parquetThrift.ConvertedType[field.originalType];
857
+ }
858
+
859
+ schemaElem.type_length = field.typeLength;
860
+ metadata.schema.push(schemaElem);
422
861
  }
423
-
424
- if (field.originalType) {
425
- schemaElem.converted_type = _parquetThrift.ConvertedType[field.originalType];
426
- }
427
-
428
- schemaElem.type_length = field.typeLength;
429
- metadata.schema.push(schemaElem);
862
+ } catch (err) {
863
+ _iterator.e(err);
864
+ } finally {
865
+ _iterator.f();
430
866
  }
431
867
 
432
- const metadataEncoded = (0, _readUtils.serializeThrift)(metadata);
433
- const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
868
+ var metadataEncoded = (0, _readUtils.serializeThrift)(metadata);
869
+ var footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
434
870
  metadataEncoded.copy(footerEncoded);
435
871
  footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
436
872
  footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);