@loaders.gl/parquet 3.4.13 → 3.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/dist/dist.min.js +15 -24
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/bundle.js +1 -1
  4. package/dist/es5/bundle.js.map +1 -1
  5. package/dist/es5/constants.js +5 -5
  6. package/dist/es5/constants.js.map +1 -1
  7. package/dist/es5/index.js +24 -24
  8. package/dist/es5/index.js.map +1 -1
  9. package/dist/es5/lib/arrow/convert-row-group-to-columns.js +2 -7
  10. package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -1
  11. package/dist/es5/lib/arrow/convert-schema-from-parquet.js +22 -33
  12. package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -1
  13. package/dist/es5/lib/arrow/convert-schema-to-parquet.js +2 -2
  14. package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -1
  15. package/dist/es5/lib/geo/decode-geo-metadata.js +16 -27
  16. package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -1
  17. package/dist/es5/lib/parsers/parse-parquet-to-columns.js +20 -151
  18. package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -1
  19. package/dist/es5/lib/parsers/parse-parquet-to-rows.js +13 -138
  20. package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -1
  21. package/dist/es5/lib/wasm/encode-parquet-wasm.js +8 -29
  22. package/dist/es5/lib/wasm/encode-parquet-wasm.js.map +1 -1
  23. package/dist/es5/lib/wasm/load-wasm/index.js +1 -1
  24. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js +10 -33
  25. package/dist/es5/lib/wasm/load-wasm/load-wasm-browser.js.map +1 -1
  26. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js +4 -22
  27. package/dist/es5/lib/wasm/load-wasm/load-wasm-node.js.map +1 -1
  28. package/dist/es5/lib/wasm/parse-parquet-wasm.js +13 -46
  29. package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -1
  30. package/dist/es5/parquet-loader.js +4 -4
  31. package/dist/es5/parquet-loader.js.map +1 -1
  32. package/dist/es5/parquet-wasm-loader.js +4 -4
  33. package/dist/es5/parquet-wasm-loader.js.map +1 -1
  34. package/dist/es5/parquet-wasm-writer.js +3 -3
  35. package/dist/es5/parquet-wasm-writer.js.map +1 -1
  36. package/dist/es5/parquet-writer.js +4 -4
  37. package/dist/es5/parquet-writer.js.map +1 -1
  38. package/dist/es5/parquetjs/codecs/dictionary.js +3 -6
  39. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
  40. package/dist/es5/parquetjs/codecs/index.js +4 -5
  41. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  42. package/dist/es5/parquetjs/codecs/plain.js +41 -41
  43. package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
  44. package/dist/es5/parquetjs/codecs/rle.js +25 -30
  45. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  46. package/dist/es5/parquetjs/compression.js +26 -90
  47. package/dist/es5/parquetjs/compression.js.map +1 -1
  48. package/dist/es5/parquetjs/encoder/parquet-encoder.js +245 -536
  49. package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -1
  50. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
  51. package/dist/es5/parquetjs/parquet-thrift/BsonType.js +28 -40
  52. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  53. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +123 -133
  54. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  55. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +138 -150
  56. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  57. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +241 -251
  58. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  59. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +58 -70
  60. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  61. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
  62. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +1 -1
  63. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +97 -107
  64. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  65. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +136 -146
  66. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  67. package/dist/es5/parquetjs/parquet-thrift/DateType.js +28 -40
  68. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
  69. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +58 -68
  70. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  71. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +71 -81
  72. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  73. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +1 -1
  74. package/dist/es5/parquetjs/parquet-thrift/EnumType.js +28 -40
  75. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  76. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
  77. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +164 -174
  78. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  79. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +28 -40
  80. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  81. package/dist/es5/parquetjs/parquet-thrift/IntType.js +58 -68
  82. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
  83. package/dist/es5/parquetjs/parquet-thrift/JsonType.js +28 -40
  84. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  85. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +58 -68
  86. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  87. package/dist/es5/parquetjs/parquet-thrift/ListType.js +28 -40
  88. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
  89. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +274 -310
  90. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  91. package/dist/es5/parquetjs/parquet-thrift/MapType.js +28 -40
  92. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
  93. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +28 -40
  94. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  95. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +28 -40
  96. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  97. package/dist/es5/parquetjs/parquet-thrift/NullType.js +28 -40
  98. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
  99. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +56 -66
  100. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  101. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +71 -81
  102. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  103. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +136 -146
  104. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  105. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +71 -81
  106. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  107. package/dist/es5/parquetjs/parquet-thrift/PageType.js +1 -1
  108. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +105 -115
  109. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  110. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +162 -172
  111. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  112. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +71 -81
  113. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  114. package/dist/es5/parquetjs/parquet-thrift/Statistics.js +106 -116
  115. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  116. package/dist/es5/parquetjs/parquet-thrift/StringType.js +28 -40
  117. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
  118. package/dist/es5/parquetjs/parquet-thrift/TimeType.js +58 -68
  119. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  120. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +76 -90
  121. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  122. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +58 -68
  123. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  124. package/dist/es5/parquetjs/parquet-thrift/Type.js +1 -1
  125. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +28 -40
  126. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  127. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +28 -40
  128. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  129. package/dist/es5/parquetjs/parquet-thrift/index.js +43 -43
  130. package/dist/es5/parquetjs/parquet-thrift/index.js.map +1 -1
  131. package/dist/es5/parquetjs/parser/decoders.js +195 -327
  132. package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
  133. package/dist/es5/parquetjs/parser/parquet-reader.js +155 -582
  134. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  135. package/dist/es5/parquetjs/schema/declare.js +10 -11
  136. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  137. package/dist/es5/parquetjs/schema/schema.js +65 -82
  138. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  139. package/dist/es5/parquetjs/schema/shred.js +56 -87
  140. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  141. package/dist/es5/parquetjs/schema/types.js +40 -40
  142. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  143. package/dist/es5/parquetjs/utils/file-utils.js +8 -12
  144. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  145. package/dist/es5/parquetjs/utils/read-utils.js +22 -39
  146. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
  147. package/dist/esm/parquet-loader.js +1 -1
  148. package/dist/esm/parquet-wasm-loader.js +1 -1
  149. package/dist/esm/parquet-wasm-writer.js +1 -1
  150. package/dist/esm/parquet-writer.js +1 -1
  151. package/dist/parquet-worker.js +15 -24
  152. package/dist/parquet-worker.js.map +3 -3
  153. package/package.json +6 -6
@@ -1,15 +1,10 @@
1
1
  "use strict";
2
2
 
3
3
  var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
- var _typeof = require("@babel/runtime/helpers/typeof");
5
4
  Object.defineProperty(exports, "__esModule", {
6
5
  value: true
7
6
  });
8
7
  exports.ParquetEnvelopeWriter = exports.ParquetEncoder = void 0;
9
- var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
10
- var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
11
- var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
12
- var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
13
8
  var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
14
9
  var _codecs = require("../codecs");
15
10
  var Compression = _interopRequireWildcard(require("../compression"));
@@ -18,20 +13,25 @@ var _parquetThrift = require("../parquet-thrift");
18
13
  var _fileUtils = require("../utils/file-utils");
19
14
  var _readUtils = require("../utils/read-utils");
20
15
  var _nodeInt = _interopRequireDefault(require("node-int64"));
21
- function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
22
- function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
23
- function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
24
- function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
25
- function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) arr2[i] = arr[i]; return arr2; }
26
- var PARQUET_MAGIC = 'PAR1';
27
- var PARQUET_VERSION = 1;
28
- var PARQUET_DEFAULT_PAGE_SIZE = 8192;
29
- var PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
30
- var PARQUET_RDLVL_TYPE = 'INT32';
31
- var PARQUET_RDLVL_ENCODING = 'RLE';
32
- var ParquetEncoder = function () {
33
- function ParquetEncoder(schema, envelopeWriter, opts) {
34
- (0, _classCallCheck2.default)(this, ParquetEncoder);
16
+ function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function (nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
17
+ function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || typeof obj !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
18
+ const PARQUET_MAGIC = 'PAR1';
19
+ const PARQUET_VERSION = 1;
20
+ const PARQUET_DEFAULT_PAGE_SIZE = 8192;
21
+ const PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
22
+ const PARQUET_RDLVL_TYPE = 'INT32';
23
+ const PARQUET_RDLVL_ENCODING = 'RLE';
24
+ class ParquetEncoder {
25
+ static async openFile(schema, path, opts) {
26
+ const outputStream = await (0, _fileUtils.osopen)(path, opts);
27
+ return ParquetEncoder.openStream(schema, outputStream, opts);
28
+ }
29
+ static async openStream(schema, outputStream) {
30
+ let opts = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {};
31
+ const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
32
+ return new ParquetEncoder(schema, envelopeWriter, opts);
33
+ }
34
+ constructor(schema, envelopeWriter, opts) {
35
35
  (0, _defineProperty2.default)(this, "schema", void 0);
36
36
  (0, _defineProperty2.default)(this, "envelopeWriter", void 0);
37
37
  (0, _defineProperty2.default)(this, "rowBuffer", void 0);
@@ -46,175 +46,55 @@ var ParquetEncoder = function () {
46
46
  this.userMetadata = {};
47
47
  this.writeHeader();
48
48
  }
49
- (0, _createClass2.default)(ParquetEncoder, [{
50
- key: "writeHeader",
51
- value: function () {
52
- var _writeHeader = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee() {
53
- return _regenerator.default.wrap(function _callee$(_context) {
54
- while (1) switch (_context.prev = _context.next) {
55
- case 0:
56
- _context.prev = 0;
57
- _context.next = 3;
58
- return this.envelopeWriter.writeHeader();
59
- case 3:
60
- _context.next = 10;
61
- break;
62
- case 5:
63
- _context.prev = 5;
64
- _context.t0 = _context["catch"](0);
65
- _context.next = 9;
66
- return this.envelopeWriter.close();
67
- case 9:
68
- throw _context.t0;
69
- case 10:
70
- case "end":
71
- return _context.stop();
72
- }
73
- }, _callee, this, [[0, 5]]);
74
- }));
75
- function writeHeader() {
76
- return _writeHeader.apply(this, arguments);
77
- }
78
- return writeHeader;
79
- }()
80
- }, {
81
- key: "appendRow",
82
- value: function () {
83
- var _appendRow = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(row) {
84
- return _regenerator.default.wrap(function _callee2$(_context2) {
85
- while (1) switch (_context2.prev = _context2.next) {
86
- case 0:
87
- if (!this.closed) {
88
- _context2.next = 2;
89
- break;
90
- }
91
- throw new Error('writer was closed');
92
- case 2:
93
- Shred.shredRecord(this.schema, row, this.rowBuffer);
94
- if (this.rowBuffer.rowCount >= this.rowGroupSize) {
95
- this.rowBuffer = {};
96
- }
97
- case 4:
98
- case "end":
99
- return _context2.stop();
100
- }
101
- }, _callee2, this);
102
- }));
103
- function appendRow(_x) {
104
- return _appendRow.apply(this, arguments);
105
- }
106
- return appendRow;
107
- }()
108
- }, {
109
- key: "close",
110
- value: function () {
111
- var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(callback) {
112
- return _regenerator.default.wrap(function _callee3$(_context3) {
113
- while (1) switch (_context3.prev = _context3.next) {
114
- case 0:
115
- if (!this.closed) {
116
- _context3.next = 2;
117
- break;
118
- }
119
- throw new Error('writer was closed');
120
- case 2:
121
- this.closed = true;
122
- if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
123
- this.rowBuffer = {};
124
- }
125
- _context3.next = 6;
126
- return this.envelopeWriter.writeFooter(this.userMetadata);
127
- case 6:
128
- _context3.next = 8;
129
- return this.envelopeWriter.close();
130
- case 8:
131
- if (callback) {
132
- callback();
133
- }
134
- case 9:
135
- case "end":
136
- return _context3.stop();
137
- }
138
- }, _callee3, this);
139
- }));
140
- function close(_x2) {
141
- return _close.apply(this, arguments);
142
- }
143
- return close;
144
- }()
145
- }, {
146
- key: "setMetadata",
147
- value: function setMetadata(key, value) {
148
- this.userMetadata[String(key)] = String(value);
49
+ async writeHeader() {
50
+ try {
51
+ await this.envelopeWriter.writeHeader();
52
+ } catch (err) {
53
+ await this.envelopeWriter.close();
54
+ throw err;
55
+ }
56
+ }
57
+ async appendRow(row) {
58
+ if (this.closed) {
59
+ throw new Error('writer was closed');
60
+ }
61
+ Shred.shredRecord(this.schema, row, this.rowBuffer);
62
+ if (this.rowBuffer.rowCount >= this.rowGroupSize) {
63
+ this.rowBuffer = {};
64
+ }
65
+ }
66
+ async close(callback) {
67
+ if (this.closed) {
68
+ throw new Error('writer was closed');
149
69
  }
150
- }, {
151
- key: "setRowGroupSize",
152
- value: function setRowGroupSize(cnt) {
153
- this.rowGroupSize = cnt;
70
+ this.closed = true;
71
+ if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
72
+ this.rowBuffer = {};
154
73
  }
155
- }, {
156
- key: "setPageSize",
157
- value: function setPageSize(cnt) {
158
- this.envelopeWriter.setPageSize(cnt);
74
+ await this.envelopeWriter.writeFooter(this.userMetadata);
75
+ await this.envelopeWriter.close();
76
+ if (callback) {
77
+ callback();
159
78
  }
160
- }], [{
161
- key: "openFile",
162
- value: function () {
163
- var _openFile = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4(schema, path, opts) {
164
- var outputStream;
165
- return _regenerator.default.wrap(function _callee4$(_context4) {
166
- while (1) switch (_context4.prev = _context4.next) {
167
- case 0:
168
- _context4.next = 2;
169
- return (0, _fileUtils.osopen)(path, opts);
170
- case 2:
171
- outputStream = _context4.sent;
172
- return _context4.abrupt("return", ParquetEncoder.openStream(schema, outputStream, opts));
173
- case 4:
174
- case "end":
175
- return _context4.stop();
176
- }
177
- }, _callee4);
178
- }));
179
- function openFile(_x3, _x4, _x5) {
180
- return _openFile.apply(this, arguments);
181
- }
182
- return openFile;
183
- }()
184
- }, {
185
- key: "openStream",
186
- value: function () {
187
- var _openStream = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(schema, outputStream) {
188
- var opts,
189
- envelopeWriter,
190
- _args5 = arguments;
191
- return _regenerator.default.wrap(function _callee5$(_context5) {
192
- while (1) switch (_context5.prev = _context5.next) {
193
- case 0:
194
- opts = _args5.length > 2 && _args5[2] !== undefined ? _args5[2] : {};
195
- _context5.next = 3;
196
- return ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
197
- case 3:
198
- envelopeWriter = _context5.sent;
199
- return _context5.abrupt("return", new ParquetEncoder(schema, envelopeWriter, opts));
200
- case 5:
201
- case "end":
202
- return _context5.stop();
203
- }
204
- }, _callee5);
205
- }));
206
- function openStream(_x6, _x7) {
207
- return _openStream.apply(this, arguments);
208
- }
209
- return openStream;
210
- }()
211
- }]);
212
- return ParquetEncoder;
213
- }();
79
+ }
80
+ setMetadata(key, value) {
81
+ this.userMetadata[String(key)] = String(value);
82
+ }
83
+ setRowGroupSize(cnt) {
84
+ this.rowGroupSize = cnt;
85
+ }
86
+ setPageSize(cnt) {
87
+ this.envelopeWriter.setPageSize(cnt);
88
+ }
89
+ }
214
90
  exports.ParquetEncoder = ParquetEncoder;
215
- var ParquetEnvelopeWriter = function () {
216
- function ParquetEnvelopeWriter(schema, writeFn, closeFn, fileOffset, opts) {
217
- (0, _classCallCheck2.default)(this, ParquetEnvelopeWriter);
91
+ class ParquetEnvelopeWriter {
92
+ static async openStream(schema, outputStream, opts) {
93
+ const writeFn = _fileUtils.oswrite.bind(undefined, outputStream);
94
+ const closeFn = _fileUtils.osclose.bind(undefined, outputStream);
95
+ return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
96
+ }
97
+ constructor(schema, writeFn, closeFn, fileOffset, opts) {
218
98
  (0, _defineProperty2.default)(this, "schema", void 0);
219
99
  (0, _defineProperty2.default)(this, "write", void 0);
220
100
  (0, _defineProperty2.default)(this, "close", void 0);
@@ -232,88 +112,33 @@ var ParquetEnvelopeWriter = function () {
232
112
  this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;
233
113
  this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
234
114
  }
235
- (0, _createClass2.default)(ParquetEnvelopeWriter, [{
236
- key: "writeSection",
237
- value: function writeSection(buf) {
238
- this.offset += buf.length;
239
- return this.write(buf);
240
- }
241
- }, {
242
- key: "writeHeader",
243
- value: function writeHeader() {
244
- return this.writeSection(Buffer.from(PARQUET_MAGIC));
245
- }
246
- }, {
247
- key: "writeRowGroup",
248
- value: function () {
249
- var _writeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6(records) {
250
- var rgroup;
251
- return _regenerator.default.wrap(function _callee6$(_context6) {
252
- while (1) switch (_context6.prev = _context6.next) {
253
- case 0:
254
- _context6.next = 2;
255
- return encodeRowGroup(this.schema, records, {
256
- baseOffset: this.offset,
257
- pageSize: this.pageSize,
258
- useDataPageV2: this.useDataPageV2
259
- });
260
- case 2:
261
- rgroup = _context6.sent;
262
- this.rowCount += records.rowCount;
263
- this.rowGroups.push(rgroup.metadata);
264
- _context6.next = 7;
265
- return this.writeSection(rgroup.body);
266
- case 7:
267
- return _context6.abrupt("return", _context6.sent);
268
- case 8:
269
- case "end":
270
- return _context6.stop();
271
- }
272
- }, _callee6, this);
273
- }));
274
- function writeRowGroup(_x8) {
275
- return _writeRowGroup.apply(this, arguments);
276
- }
277
- return writeRowGroup;
278
- }()
279
- }, {
280
- key: "writeFooter",
281
- value: function writeFooter(userMetadata) {
282
- if (!userMetadata) {
283
- userMetadata = {};
284
- }
285
- return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
286
- }
287
- }, {
288
- key: "setPageSize",
289
- value: function setPageSize(cnt) {
290
- this.pageSize = cnt;
115
+ writeSection(buf) {
116
+ this.offset += buf.length;
117
+ return this.write(buf);
118
+ }
119
+ writeHeader() {
120
+ return this.writeSection(Buffer.from(PARQUET_MAGIC));
121
+ }
122
+ async writeRowGroup(records) {
123
+ const rgroup = await encodeRowGroup(this.schema, records, {
124
+ baseOffset: this.offset,
125
+ pageSize: this.pageSize,
126
+ useDataPageV2: this.useDataPageV2
127
+ });
128
+ this.rowCount += records.rowCount;
129
+ this.rowGroups.push(rgroup.metadata);
130
+ return await this.writeSection(rgroup.body);
131
+ }
132
+ writeFooter(userMetadata) {
133
+ if (!userMetadata) {
134
+ userMetadata = {};
291
135
  }
292
- }], [{
293
- key: "openStream",
294
- value: function () {
295
- var _openStream2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7(schema, outputStream, opts) {
296
- var writeFn, closeFn;
297
- return _regenerator.default.wrap(function _callee7$(_context7) {
298
- while (1) switch (_context7.prev = _context7.next) {
299
- case 0:
300
- writeFn = _fileUtils.oswrite.bind(undefined, outputStream);
301
- closeFn = _fileUtils.osclose.bind(undefined, outputStream);
302
- return _context7.abrupt("return", new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts));
303
- case 3:
304
- case "end":
305
- return _context7.stop();
306
- }
307
- }, _callee7);
308
- }));
309
- function openStream(_x9, _x10, _x11) {
310
- return _openStream2.apply(this, arguments);
311
- }
312
- return openStream;
313
- }()
314
- }]);
315
- return ParquetEnvelopeWriter;
316
- }();
136
+ return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
137
+ }
138
+ setPageSize(cnt) {
139
+ this.pageSize = cnt;
140
+ }
141
+ }
317
142
  exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
318
143
  function encodeValues(type, encoding, values, opts) {
319
144
  if (!(encoding in _codecs.PARQUET_CODECS)) {
@@ -321,252 +146,145 @@ function encodeValues(type, encoding, values, opts) {
321
146
  }
322
147
  return _codecs.PARQUET_CODECS[encoding].encodeValues(type, values, opts);
323
148
  }
324
- function encodeDataPage(_x12, _x13) {
325
- return _encodeDataPage.apply(this, arguments);
326
- }
327
- function _encodeDataPage() {
328
- _encodeDataPage = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8(column, data) {
329
- var rLevelsBuf, dLevelsBuf, valuesBuf, dataBuf, compressedBuf, header, headerBuf, page;
330
- return _regenerator.default.wrap(function _callee8$(_context8) {
331
- while (1) switch (_context8.prev = _context8.next) {
332
- case 0:
333
- rLevelsBuf = Buffer.alloc(0);
334
- if (column.rLevelMax > 0) {
335
- rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
336
- bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax)
337
- });
338
- }
339
- dLevelsBuf = Buffer.alloc(0);
340
- if (column.dLevelMax > 0) {
341
- dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
342
- bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax)
343
- });
344
- }
345
- valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
346
- typeLength: column.typeLength,
347
- bitWidth: column.typeLength
348
- });
349
- dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
350
- _context8.next = 8;
351
- return Compression.deflate(column.compression, dataBuf);
352
- case 8:
353
- compressedBuf = _context8.sent;
354
- header = new _parquetThrift.PageHeader({
355
- type: _parquetThrift.PageType.DATA_PAGE,
356
- data_page_header: new _parquetThrift.DataPageHeader({
357
- num_values: data.count,
358
- encoding: _parquetThrift.Encoding[column.encoding],
359
- definition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING],
360
- repetition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]
361
- }),
362
- uncompressed_page_size: dataBuf.length,
363
- compressed_page_size: compressedBuf.length
364
- });
365
- headerBuf = (0, _readUtils.serializeThrift)(header);
366
- page = Buffer.concat([headerBuf, compressedBuf]);
367
- return _context8.abrupt("return", {
368
- header: header,
369
- headerSize: headerBuf.length,
370
- page: page
371
- });
372
- case 13:
373
- case "end":
374
- return _context8.stop();
375
- }
376
- }, _callee8);
377
- }));
378
- return _encodeDataPage.apply(this, arguments);
379
- }
380
- function encodeDataPageV2(_x14, _x15, _x16) {
381
- return _encodeDataPageV.apply(this, arguments);
382
- }
383
- function _encodeDataPageV() {
384
- _encodeDataPageV = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9(column, data, rowCount) {
385
- var valuesBuf, compressedBuf, rLevelsBuf, dLevelsBuf, header, headerBuf, page;
386
- return _regenerator.default.wrap(function _callee9$(_context9) {
387
- while (1) switch (_context9.prev = _context9.next) {
388
- case 0:
389
- valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
390
- typeLength: column.typeLength,
391
- bitWidth: column.typeLength
392
- });
393
- _context9.next = 3;
394
- return Compression.deflate(column.compression, valuesBuf);
395
- case 3:
396
- compressedBuf = _context9.sent;
397
- rLevelsBuf = Buffer.alloc(0);
398
- if (column.rLevelMax > 0) {
399
- rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
400
- bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax),
401
- disableEnvelope: true
402
- });
403
- }
404
- dLevelsBuf = Buffer.alloc(0);
405
- if (column.dLevelMax > 0) {
406
- dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
407
- bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax),
408
- disableEnvelope: true
409
- });
410
- }
411
- header = new _parquetThrift.PageHeader({
412
- type: _parquetThrift.PageType.DATA_PAGE_V2,
413
- data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
414
- num_values: data.count,
415
- num_nulls: data.count - data.values.length,
416
- num_rows: rowCount,
417
- encoding: _parquetThrift.Encoding[column.encoding],
418
- definition_levels_byte_length: dLevelsBuf.length,
419
- repetition_levels_byte_length: rLevelsBuf.length,
420
- is_compressed: column.compression !== 'UNCOMPRESSED'
421
- }),
422
- uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
423
- compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
424
- });
425
- headerBuf = (0, _readUtils.serializeThrift)(header);
426
- page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
427
- return _context9.abrupt("return", {
428
- header: header,
429
- headerSize: headerBuf.length,
430
- page: page
431
- });
432
- case 12:
433
- case "end":
434
- return _context9.stop();
435
- }
436
- }, _callee9);
437
- }));
438
- return _encodeDataPageV.apply(this, arguments);
439
- }
440
- function encodeColumnChunk(_x17, _x18, _x19, _x20) {
441
- return _encodeColumnChunk.apply(this, arguments);
149
+ async function encodeDataPage(column, data) {
150
+ let rLevelsBuf = Buffer.alloc(0);
151
+ if (column.rLevelMax > 0) {
152
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
153
+ bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax)
154
+ });
155
+ }
156
+ let dLevelsBuf = Buffer.alloc(0);
157
+ if (column.dLevelMax > 0) {
158
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
159
+ bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax)
160
+ });
161
+ }
162
+ const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
163
+ typeLength: column.typeLength,
164
+ bitWidth: column.typeLength
165
+ });
166
+ const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
167
+ const compressedBuf = await Compression.deflate(column.compression, dataBuf);
168
+ const header = new _parquetThrift.PageHeader({
169
+ type: _parquetThrift.PageType.DATA_PAGE,
170
+ data_page_header: new _parquetThrift.DataPageHeader({
171
+ num_values: data.count,
172
+ encoding: _parquetThrift.Encoding[column.encoding],
173
+ definition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING],
174
+ repetition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]
175
+ }),
176
+ uncompressed_page_size: dataBuf.length,
177
+ compressed_page_size: compressedBuf.length
178
+ });
179
+ const headerBuf = (0, _readUtils.serializeThrift)(header);
180
+ const page = Buffer.concat([headerBuf, compressedBuf]);
181
+ return {
182
+ header,
183
+ headerSize: headerBuf.length,
184
+ page
185
+ };
442
186
  }
443
- function _encodeColumnChunk() {
444
- _encodeColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee10(column, buffer, offset, opts) {
445
- var data, baseOffset, pageBuf, total_uncompressed_size, total_compressed_size, result, metadata, metadataOffset, body;
446
- return _regenerator.default.wrap(function _callee10$(_context10) {
447
- while (1) switch (_context10.prev = _context10.next) {
448
- case 0:
449
- data = buffer.columnData[column.path.join()];
450
- baseOffset = (opts.baseOffset || 0) + offset;
451
- total_uncompressed_size = 0;
452
- total_compressed_size = 0;
453
- if (!opts.useDataPageV2) {
454
- _context10.next = 10;
455
- break;
456
- }
457
- _context10.next = 7;
458
- return encodeDataPageV2(column, data, buffer.rowCount);
459
- case 7:
460
- _context10.t0 = _context10.sent;
461
- _context10.next = 13;
462
- break;
463
- case 10:
464
- _context10.next = 12;
465
- return encodeDataPage(column, data);
466
- case 12:
467
- _context10.t0 = _context10.sent;
468
- case 13:
469
- result = _context10.t0;
470
- pageBuf = result.page;
471
- total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
472
- total_compressed_size += result.header.compressed_page_size + result.headerSize;
473
- metadata = new _parquetThrift.ColumnMetaData({
474
- path_in_schema: column.path,
475
- num_values: data.count,
476
- data_page_offset: baseOffset,
477
- encodings: [],
478
- total_uncompressed_size: total_uncompressed_size,
479
- total_compressed_size: total_compressed_size,
480
- type: _parquetThrift.Type[column.primitiveType],
481
- codec: _parquetThrift.CompressionCodec[column.compression]
482
- });
483
- metadata.encodings.push(_parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]);
484
- metadata.encodings.push(_parquetThrift.Encoding[column.encoding]);
485
- metadataOffset = baseOffset + pageBuf.length;
486
- body = Buffer.concat([pageBuf, (0, _readUtils.serializeThrift)(metadata)]);
487
- return _context10.abrupt("return", {
488
- body: body,
489
- metadata: metadata,
490
- metadataOffset: metadataOffset
491
- });
492
- case 23:
493
- case "end":
494
- return _context10.stop();
495
- }
496
- }, _callee10);
497
- }));
498
- return _encodeColumnChunk.apply(this, arguments);
187
+ async function encodeDataPageV2(column, data, rowCount) {
188
+ const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
189
+ typeLength: column.typeLength,
190
+ bitWidth: column.typeLength
191
+ });
192
+ const compressedBuf = await Compression.deflate(column.compression, valuesBuf);
193
+ let rLevelsBuf = Buffer.alloc(0);
194
+ if (column.rLevelMax > 0) {
195
+ rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
196
+ bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax),
197
+ disableEnvelope: true
198
+ });
199
+ }
200
+ let dLevelsBuf = Buffer.alloc(0);
201
+ if (column.dLevelMax > 0) {
202
+ dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
203
+ bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax),
204
+ disableEnvelope: true
205
+ });
206
+ }
207
+ const header = new _parquetThrift.PageHeader({
208
+ type: _parquetThrift.PageType.DATA_PAGE_V2,
209
+ data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
210
+ num_values: data.count,
211
+ num_nulls: data.count - data.values.length,
212
+ num_rows: rowCount,
213
+ encoding: _parquetThrift.Encoding[column.encoding],
214
+ definition_levels_byte_length: dLevelsBuf.length,
215
+ repetition_levels_byte_length: rLevelsBuf.length,
216
+ is_compressed: column.compression !== 'UNCOMPRESSED'
217
+ }),
218
+ uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
219
+ compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
220
+ });
221
+ const headerBuf = (0, _readUtils.serializeThrift)(header);
222
+ const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
223
+ return {
224
+ header,
225
+ headerSize: headerBuf.length,
226
+ page
227
+ };
499
228
  }
500
- function encodeRowGroup(_x21, _x22, _x23) {
501
- return _encodeRowGroup.apply(this, arguments);
229
+ async function encodeColumnChunk(column, buffer, offset, opts) {
230
+ const data = buffer.columnData[column.path.join()];
231
+ const baseOffset = (opts.baseOffset || 0) + offset;
232
+ let pageBuf;
233
+ let total_uncompressed_size = 0;
234
+ let total_compressed_size = 0;
235
+ {
236
+ const result = opts.useDataPageV2 ? await encodeDataPageV2(column, data, buffer.rowCount) : await encodeDataPage(column, data);
237
+ pageBuf = result.page;
238
+ total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
239
+ total_compressed_size += result.header.compressed_page_size + result.headerSize;
240
+ }
241
+ const metadata = new _parquetThrift.ColumnMetaData({
242
+ path_in_schema: column.path,
243
+ num_values: data.count,
244
+ data_page_offset: baseOffset,
245
+ encodings: [],
246
+ total_uncompressed_size,
247
+ total_compressed_size,
248
+ type: _parquetThrift.Type[column.primitiveType],
249
+ codec: _parquetThrift.CompressionCodec[column.compression]
250
+ });
251
+ metadata.encodings.push(_parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]);
252
+ metadata.encodings.push(_parquetThrift.Encoding[column.encoding]);
253
+ const metadataOffset = baseOffset + pageBuf.length;
254
+ const body = Buffer.concat([pageBuf, (0, _readUtils.serializeThrift)(metadata)]);
255
+ return {
256
+ body,
257
+ metadata,
258
+ metadataOffset
259
+ };
502
260
  }
503
- function _encodeRowGroup() {
504
- _encodeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee11(schema, data, opts) {
505
- var metadata, body, _iterator2, _step2, field, cchunkData, cchunk;
506
- return _regenerator.default.wrap(function _callee11$(_context11) {
507
- while (1) switch (_context11.prev = _context11.next) {
508
- case 0:
509
- metadata = new _parquetThrift.RowGroup({
510
- num_rows: data.rowCount,
511
- columns: [],
512
- total_byte_size: 0
513
- });
514
- body = Buffer.alloc(0);
515
- _iterator2 = _createForOfIteratorHelper(schema.fieldList);
516
- _context11.prev = 3;
517
- _iterator2.s();
518
- case 5:
519
- if ((_step2 = _iterator2.n()).done) {
520
- _context11.next = 18;
521
- break;
522
- }
523
- field = _step2.value;
524
- if (!field.isNested) {
525
- _context11.next = 9;
526
- break;
527
- }
528
- return _context11.abrupt("continue", 16);
529
- case 9:
530
- _context11.next = 11;
531
- return encodeColumnChunk(field, data, body.length, opts);
532
- case 11:
533
- cchunkData = _context11.sent;
534
- cchunk = new _parquetThrift.ColumnChunk({
535
- file_offset: cchunkData.metadataOffset,
536
- meta_data: cchunkData.metadata
537
- });
538
- metadata.columns.push(cchunk);
539
- metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
540
- body = Buffer.concat([body, cchunkData.body]);
541
- case 16:
542
- _context11.next = 5;
543
- break;
544
- case 18:
545
- _context11.next = 23;
546
- break;
547
- case 20:
548
- _context11.prev = 20;
549
- _context11.t0 = _context11["catch"](3);
550
- _iterator2.e(_context11.t0);
551
- case 23:
552
- _context11.prev = 23;
553
- _iterator2.f();
554
- return _context11.finish(23);
555
- case 26:
556
- return _context11.abrupt("return", {
557
- body: body,
558
- metadata: metadata
559
- });
560
- case 27:
561
- case "end":
562
- return _context11.stop();
563
- }
564
- }, _callee11, null, [[3, 20, 23, 26]]);
565
- }));
566
- return _encodeRowGroup.apply(this, arguments);
261
+ async function encodeRowGroup(schema, data, opts) {
262
+ const metadata = new _parquetThrift.RowGroup({
263
+ num_rows: data.rowCount,
264
+ columns: [],
265
+ total_byte_size: 0
266
+ });
267
+ let body = Buffer.alloc(0);
268
+ for (const field of schema.fieldList) {
269
+ if (field.isNested) {
270
+ continue;
271
+ }
272
+ const cchunkData = await encodeColumnChunk(field, data, body.length, opts);
273
+ const cchunk = new _parquetThrift.ColumnChunk({
274
+ file_offset: cchunkData.metadataOffset,
275
+ meta_data: cchunkData.metadata
276
+ });
277
+ metadata.columns.push(cchunk);
278
+ metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
279
+ body = Buffer.concat([body, cchunkData.body]);
280
+ }
281
+ return {
282
+ body,
283
+ metadata
284
+ };
567
285
  }
568
286
  function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
569
- var metadata = new _parquetThrift.FileMetaData({
287
+ const metadata = new _parquetThrift.FileMetaData({
570
288
  version: PARQUET_VERSION,
571
289
  created_by: 'parquets',
572
290
  num_rows: rowCount,
@@ -574,49 +292,40 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
574
292
  schema: [],
575
293
  key_value_metadata: []
576
294
  });
577
- for (var key in userMetadata) {
295
+ for (const key in userMetadata) {
578
296
  var _metadata$key_value_m, _metadata$key_value_m2, _metadata$key_value_m3;
579
- var kv = new _parquetThrift.KeyValue({
580
- key: key,
297
+ const kv = new _parquetThrift.KeyValue({
298
+ key,
581
299
  value: userMetadata[key]
582
300
  });
583
301
  (_metadata$key_value_m = metadata.key_value_metadata) === null || _metadata$key_value_m === void 0 ? void 0 : (_metadata$key_value_m2 = (_metadata$key_value_m3 = _metadata$key_value_m).push) === null || _metadata$key_value_m2 === void 0 ? void 0 : _metadata$key_value_m2.call(_metadata$key_value_m3, kv);
584
302
  }
585
303
  {
586
- var schemaRoot = new _parquetThrift.SchemaElement({
304
+ const schemaRoot = new _parquetThrift.SchemaElement({
587
305
  name: 'root',
588
306
  num_children: Object.keys(schema.fields).length
589
307
  });
590
308
  metadata.schema.push(schemaRoot);
591
309
  }
592
- var _iterator = _createForOfIteratorHelper(schema.fieldList),
593
- _step;
594
- try {
595
- for (_iterator.s(); !(_step = _iterator.n()).done;) {
596
- var field = _step.value;
597
- var relt = _parquetThrift.FieldRepetitionType[field.repetitionType];
598
- var schemaElem = new _parquetThrift.SchemaElement({
599
- name: field.name,
600
- repetition_type: relt
601
- });
602
- if (field.isNested) {
603
- schemaElem.num_children = field.fieldCount;
604
- } else {
605
- schemaElem.type = _parquetThrift.Type[field.primitiveType];
606
- }
607
- if (field.originalType) {
608
- schemaElem.converted_type = _parquetThrift.ConvertedType[field.originalType];
609
- }
610
- schemaElem.type_length = field.typeLength;
611
- metadata.schema.push(schemaElem);
310
+ for (const field of schema.fieldList) {
311
+ const relt = _parquetThrift.FieldRepetitionType[field.repetitionType];
312
+ const schemaElem = new _parquetThrift.SchemaElement({
313
+ name: field.name,
314
+ repetition_type: relt
315
+ });
316
+ if (field.isNested) {
317
+ schemaElem.num_children = field.fieldCount;
318
+ } else {
319
+ schemaElem.type = _parquetThrift.Type[field.primitiveType];
320
+ }
321
+ if (field.originalType) {
322
+ schemaElem.converted_type = _parquetThrift.ConvertedType[field.originalType];
612
323
  }
613
- } catch (err) {
614
- _iterator.e(err);
615
- } finally {
616
- _iterator.f();
324
+ schemaElem.type_length = field.typeLength;
325
+ metadata.schema.push(schemaElem);
617
326
  }
618
- var metadataEncoded = (0, _readUtils.serializeThrift)(metadata);
619
- var footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
327
+ const metadataEncoded = (0, _readUtils.serializeThrift)(metadata);
328
+ const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
620
329
  metadataEncoded.copy(footerEncoded);
621
330
  footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
622
331
  footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);