@loaders.gl/parquet 3.1.0-beta.7 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es5/bundle.js +1 -1
- package/dist/es5/bundle.js.map +1 -1
- package/dist/es5/constants.js +5 -5
- package/dist/es5/constants.js.map +1 -1
- package/dist/es5/index.js +19 -10
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/convert-schema.js +13 -13
- package/dist/es5/lib/convert-schema.js.map +1 -1
- package/dist/es5/lib/parse-parquet.js +154 -19
- package/dist/es5/lib/parse-parquet.js.map +1 -1
- package/dist/es5/lib/read-array-buffer.js +43 -6
- package/dist/es5/lib/read-array-buffer.js.map +1 -1
- package/dist/es5/parquet-loader.js +4 -4
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-writer.js +4 -4
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/codecs/dictionary.js +10 -2
- package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
- package/dist/es5/parquetjs/codecs/index.js +6 -4
- package/dist/es5/parquetjs/codecs/index.js.map +1 -1
- package/dist/es5/parquetjs/codecs/plain.js +43 -41
- package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
- package/dist/es5/parquetjs/codecs/rle.js +35 -25
- package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
- package/dist/es5/parquetjs/compression.js +110 -27
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/encoder/writer.js +737 -301
- package/dist/es5/parquetjs/encoder/writer.js.map +1 -1
- package/dist/es5/parquetjs/file.js +15 -15
- package/dist/es5/parquetjs/file.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +152 -141
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +160 -147
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +259 -248
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +79 -67
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +124 -113
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +169 -158
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DateType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +182 -170
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IntType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ListType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +343 -319
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MapType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/NullType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +75 -64
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +169 -158
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +124 -113
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +199 -188
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js +135 -124
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/StringType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +101 -88
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Type.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/index.js +43 -43
- package/dist/es5/parquetjs/parser/decoders.js +391 -218
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-cursor.js +180 -62
- package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +370 -125
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js +320 -91
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +11 -9
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/schema.js +87 -73
- package/dist/es5/parquetjs/schema/schema.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +96 -56
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js +40 -39
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js +1 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -1
- package/dist/es5/parquetjs/utils/file-utils.js +12 -8
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/es5/parquetjs/utils/read-utils.js +50 -22
- package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
- package/dist/esm/parquet-loader.js +1 -1
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquet-writer.js.map +1 -1
- package/package.json +5 -5
|
@@ -2,11 +2,29 @@
|
|
|
2
2
|
|
|
3
3
|
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
|
|
4
4
|
|
|
5
|
+
var _typeof = require("@babel/runtime/helpers/typeof");
|
|
6
|
+
|
|
5
7
|
Object.defineProperty(exports, "__esModule", {
|
|
6
8
|
value: true
|
|
7
9
|
});
|
|
8
10
|
exports.ParquetTransformer = exports.ParquetEnvelopeWriter = exports.ParquetWriter = void 0;
|
|
9
11
|
|
|
12
|
+
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
|
|
13
|
+
|
|
14
|
+
var _assertThisInitialized2 = _interopRequireDefault(require("@babel/runtime/helpers/assertThisInitialized"));
|
|
15
|
+
|
|
16
|
+
var _inherits2 = _interopRequireDefault(require("@babel/runtime/helpers/inherits"));
|
|
17
|
+
|
|
18
|
+
var _possibleConstructorReturn2 = _interopRequireDefault(require("@babel/runtime/helpers/possibleConstructorReturn"));
|
|
19
|
+
|
|
20
|
+
var _getPrototypeOf2 = _interopRequireDefault(require("@babel/runtime/helpers/getPrototypeOf"));
|
|
21
|
+
|
|
22
|
+
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
|
|
23
|
+
|
|
24
|
+
var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
|
|
25
|
+
|
|
26
|
+
var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
|
|
27
|
+
|
|
10
28
|
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
|
|
11
29
|
|
|
12
30
|
var _stream = require("stream");
|
|
@@ -25,33 +43,30 @@ var _readUtils = require("../utils/read-utils");
|
|
|
25
43
|
|
|
26
44
|
var _nodeInt = _interopRequireDefault(require("node-int64"));
|
|
27
45
|
|
|
28
|
-
function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function (nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
|
|
46
|
+
function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
|
|
29
47
|
|
|
30
|
-
function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null ||
|
|
48
|
+
function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
|
|
31
49
|
|
|
32
|
-
|
|
33
|
-
const PARQUET_VERSION = 1;
|
|
34
|
-
const PARQUET_DEFAULT_PAGE_SIZE = 8192;
|
|
35
|
-
const PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
|
|
36
|
-
const PARQUET_RDLVL_TYPE = 'INT32';
|
|
37
|
-
const PARQUET_RDLVL_ENCODING = 'RLE';
|
|
50
|
+
function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
|
|
38
51
|
|
|
39
|
-
|
|
40
|
-
static async openFile(schema, path, opts) {
|
|
41
|
-
const outputStream = await (0, _fileUtils.osopen)(path, opts);
|
|
42
|
-
return ParquetWriter.openStream(schema, outputStream, opts);
|
|
43
|
-
}
|
|
52
|
+
function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
|
|
44
53
|
|
|
45
|
-
|
|
46
|
-
if (!opts) {
|
|
47
|
-
opts = {};
|
|
48
|
-
}
|
|
54
|
+
function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
|
|
49
55
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
56
|
+
function _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = (0, _getPrototypeOf2.default)(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = (0, _getPrototypeOf2.default)(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return (0, _possibleConstructorReturn2.default)(this, result); }; }
|
|
57
|
+
|
|
58
|
+
function _isNativeReflectConstruct() { if (typeof Reflect === "undefined" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === "function") return true; try { Boolean.prototype.valueOf.call(Reflect.construct(Boolean, [], function () {})); return true; } catch (e) { return false; } }
|
|
53
59
|
|
|
54
|
-
|
|
60
|
+
var PARQUET_MAGIC = 'PAR1';
|
|
61
|
+
var PARQUET_VERSION = 1;
|
|
62
|
+
var PARQUET_DEFAULT_PAGE_SIZE = 8192;
|
|
63
|
+
var PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
|
|
64
|
+
var PARQUET_RDLVL_TYPE = 'INT32';
|
|
65
|
+
var PARQUET_RDLVL_ENCODING = 'RLE';
|
|
66
|
+
|
|
67
|
+
var ParquetWriter = function () {
|
|
68
|
+
function ParquetWriter(schema, envelopeWriter, opts) {
|
|
69
|
+
(0, _classCallCheck2.default)(this, ParquetWriter);
|
|
55
70
|
(0, _defineProperty2.default)(this, "schema", void 0);
|
|
56
71
|
(0, _defineProperty2.default)(this, "envelopeWriter", void 0);
|
|
57
72
|
(0, _defineProperty2.default)(this, "rowBuffer", void 0);
|
|
@@ -67,72 +82,217 @@ class ParquetWriter {
|
|
|
67
82
|
this.writeHeader();
|
|
68
83
|
}
|
|
69
84
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
85
|
+
(0, _createClass2.default)(ParquetWriter, [{
|
|
86
|
+
key: "writeHeader",
|
|
87
|
+
value: function () {
|
|
88
|
+
var _writeHeader = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee() {
|
|
89
|
+
return _regenerator.default.wrap(function _callee$(_context) {
|
|
90
|
+
while (1) {
|
|
91
|
+
switch (_context.prev = _context.next) {
|
|
92
|
+
case 0:
|
|
93
|
+
_context.prev = 0;
|
|
94
|
+
_context.next = 3;
|
|
95
|
+
return this.envelopeWriter.writeHeader();
|
|
96
|
+
|
|
97
|
+
case 3:
|
|
98
|
+
_context.next = 10;
|
|
99
|
+
break;
|
|
100
|
+
|
|
101
|
+
case 5:
|
|
102
|
+
_context.prev = 5;
|
|
103
|
+
_context.t0 = _context["catch"](0);
|
|
104
|
+
_context.next = 9;
|
|
105
|
+
return this.envelopeWriter.close();
|
|
106
|
+
|
|
107
|
+
case 9:
|
|
108
|
+
throw _context.t0;
|
|
109
|
+
|
|
110
|
+
case 10:
|
|
111
|
+
case "end":
|
|
112
|
+
return _context.stop();
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}, _callee, this, [[0, 5]]);
|
|
116
|
+
}));
|
|
117
|
+
|
|
118
|
+
function writeHeader() {
|
|
119
|
+
return _writeHeader.apply(this, arguments);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return writeHeader;
|
|
123
|
+
}()
|
|
124
|
+
}, {
|
|
125
|
+
key: "appendRow",
|
|
126
|
+
value: function () {
|
|
127
|
+
var _appendRow = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(row) {
|
|
128
|
+
return _regenerator.default.wrap(function _callee2$(_context2) {
|
|
129
|
+
while (1) {
|
|
130
|
+
switch (_context2.prev = _context2.next) {
|
|
131
|
+
case 0:
|
|
132
|
+
if (!this.closed) {
|
|
133
|
+
_context2.next = 2;
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
throw new Error('writer was closed');
|
|
138
|
+
|
|
139
|
+
case 2:
|
|
140
|
+
Shred.shredRecord(this.schema, row, this.rowBuffer);
|
|
141
|
+
|
|
142
|
+
if (this.rowBuffer.rowCount >= this.rowGroupSize) {
|
|
143
|
+
this.rowBuffer = {};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
case 4:
|
|
147
|
+
case "end":
|
|
148
|
+
return _context2.stop();
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}, _callee2, this);
|
|
152
|
+
}));
|
|
153
|
+
|
|
154
|
+
function appendRow(_x) {
|
|
155
|
+
return _appendRow.apply(this, arguments);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return appendRow;
|
|
159
|
+
}()
|
|
160
|
+
}, {
|
|
161
|
+
key: "close",
|
|
162
|
+
value: function () {
|
|
163
|
+
var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(callback) {
|
|
164
|
+
return _regenerator.default.wrap(function _callee3$(_context3) {
|
|
165
|
+
while (1) {
|
|
166
|
+
switch (_context3.prev = _context3.next) {
|
|
167
|
+
case 0:
|
|
168
|
+
if (!this.closed) {
|
|
169
|
+
_context3.next = 2;
|
|
170
|
+
break;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
throw new Error('writer was closed');
|
|
174
|
+
|
|
175
|
+
case 2:
|
|
176
|
+
this.closed = true;
|
|
177
|
+
|
|
178
|
+
if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
|
|
179
|
+
this.rowBuffer = {};
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
_context3.next = 6;
|
|
183
|
+
return this.envelopeWriter.writeFooter(this.userMetadata);
|
|
184
|
+
|
|
185
|
+
case 6:
|
|
186
|
+
_context3.next = 8;
|
|
187
|
+
return this.envelopeWriter.close();
|
|
188
|
+
|
|
189
|
+
case 8:
|
|
190
|
+
if (callback) {
|
|
191
|
+
callback();
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
case 9:
|
|
195
|
+
case "end":
|
|
196
|
+
return _context3.stop();
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
}, _callee3, this);
|
|
200
|
+
}));
|
|
201
|
+
|
|
202
|
+
function close(_x2) {
|
|
203
|
+
return _close.apply(this, arguments);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return close;
|
|
207
|
+
}()
|
|
208
|
+
}, {
|
|
209
|
+
key: "setMetadata",
|
|
210
|
+
value: function setMetadata(key, value) {
|
|
211
|
+
this.userMetadata[String(key)] = String(value);
|
|
76
212
|
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
throw new Error('writer was closed');
|
|
213
|
+
}, {
|
|
214
|
+
key: "setRowGroupSize",
|
|
215
|
+
value: function setRowGroupSize(cnt) {
|
|
216
|
+
this.rowGroupSize = cnt;
|
|
82
217
|
}
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
this.rowBuffer = {};
|
|
218
|
+
}, {
|
|
219
|
+
key: "setPageSize",
|
|
220
|
+
value: function setPageSize(cnt) {
|
|
221
|
+
this.envelopeWriter.setPageSize(cnt);
|
|
88
222
|
}
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
223
|
+
}], [{
|
|
224
|
+
key: "openFile",
|
|
225
|
+
value: function () {
|
|
226
|
+
var _openFile = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4(schema, path, opts) {
|
|
227
|
+
var outputStream;
|
|
228
|
+
return _regenerator.default.wrap(function _callee4$(_context4) {
|
|
229
|
+
while (1) {
|
|
230
|
+
switch (_context4.prev = _context4.next) {
|
|
231
|
+
case 0:
|
|
232
|
+
_context4.next = 2;
|
|
233
|
+
return (0, _fileUtils.osopen)(path, opts);
|
|
234
|
+
|
|
235
|
+
case 2:
|
|
236
|
+
outputStream = _context4.sent;
|
|
237
|
+
return _context4.abrupt("return", ParquetWriter.openStream(schema, outputStream, opts));
|
|
238
|
+
|
|
239
|
+
case 4:
|
|
240
|
+
case "end":
|
|
241
|
+
return _context4.stop();
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
}, _callee4);
|
|
245
|
+
}));
|
|
246
|
+
|
|
247
|
+
function openFile(_x3, _x4, _x5) {
|
|
248
|
+
return _openFile.apply(this, arguments);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
return openFile;
|
|
252
|
+
}()
|
|
253
|
+
}, {
|
|
254
|
+
key: "openStream",
|
|
255
|
+
value: function () {
|
|
256
|
+
var _openStream = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(schema, outputStream, opts) {
|
|
257
|
+
var envelopeWriter;
|
|
258
|
+
return _regenerator.default.wrap(function _callee5$(_context5) {
|
|
259
|
+
while (1) {
|
|
260
|
+
switch (_context5.prev = _context5.next) {
|
|
261
|
+
case 0:
|
|
262
|
+
if (!opts) {
|
|
263
|
+
opts = {};
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
_context5.next = 3;
|
|
267
|
+
return ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
|
|
268
|
+
|
|
269
|
+
case 3:
|
|
270
|
+
envelopeWriter = _context5.sent;
|
|
271
|
+
return _context5.abrupt("return", new ParquetWriter(schema, envelopeWriter, opts));
|
|
272
|
+
|
|
273
|
+
case 5:
|
|
274
|
+
case "end":
|
|
275
|
+
return _context5.stop();
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}, _callee5);
|
|
279
|
+
}));
|
|
280
|
+
|
|
281
|
+
function openStream(_x6, _x7, _x8) {
|
|
282
|
+
return _openStream.apply(this, arguments);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return openStream;
|
|
286
|
+
}()
|
|
287
|
+
}]);
|
|
288
|
+
return ParquetWriter;
|
|
289
|
+
}();
|
|
123
290
|
|
|
124
291
|
exports.ParquetWriter = ParquetWriter;
|
|
125
292
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
const closeFn = _fileUtils.osclose.bind(undefined, outputStream);
|
|
131
|
-
|
|
132
|
-
return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
constructor(schema, writeFn, closeFn, fileOffset, opts) {
|
|
293
|
+
var ParquetEnvelopeWriter = function () {
|
|
294
|
+
function ParquetEnvelopeWriter(schema, writeFn, closeFn, fileOffset, opts) {
|
|
295
|
+
(0, _classCallCheck2.default)(this, ParquetEnvelopeWriter);
|
|
136
296
|
(0, _defineProperty2.default)(this, "schema", void 0);
|
|
137
297
|
(0, _defineProperty2.default)(this, "write", void 0);
|
|
138
298
|
(0, _defineProperty2.default)(this, "close", void 0);
|
|
@@ -151,72 +311,194 @@ class ParquetEnvelopeWriter {
|
|
|
151
311
|
this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
|
|
152
312
|
}
|
|
153
313
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
314
|
+
(0, _createClass2.default)(ParquetEnvelopeWriter, [{
|
|
315
|
+
key: "writeSection",
|
|
316
|
+
value: function writeSection(buf) {
|
|
317
|
+
this.offset += buf.length;
|
|
318
|
+
return this.write(buf);
|
|
319
|
+
}
|
|
320
|
+
}, {
|
|
321
|
+
key: "writeHeader",
|
|
322
|
+
value: function writeHeader() {
|
|
323
|
+
return this.writeSection(Buffer.from(PARQUET_MAGIC));
|
|
324
|
+
}
|
|
325
|
+
}, {
|
|
326
|
+
key: "writeRowGroup",
|
|
327
|
+
value: function () {
|
|
328
|
+
var _writeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6(records) {
|
|
329
|
+
var rgroup;
|
|
330
|
+
return _regenerator.default.wrap(function _callee6$(_context6) {
|
|
331
|
+
while (1) {
|
|
332
|
+
switch (_context6.prev = _context6.next) {
|
|
333
|
+
case 0:
|
|
334
|
+
_context6.next = 2;
|
|
335
|
+
return encodeRowGroup(this.schema, records, {
|
|
336
|
+
baseOffset: this.offset,
|
|
337
|
+
pageSize: this.pageSize,
|
|
338
|
+
useDataPageV2: this.useDataPageV2
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
case 2:
|
|
342
|
+
rgroup = _context6.sent;
|
|
343
|
+
this.rowCount += records.rowCount;
|
|
344
|
+
this.rowGroups.push(rgroup.metadata);
|
|
345
|
+
_context6.next = 7;
|
|
346
|
+
return this.writeSection(rgroup.body);
|
|
347
|
+
|
|
348
|
+
case 7:
|
|
349
|
+
return _context6.abrupt("return", _context6.sent);
|
|
350
|
+
|
|
351
|
+
case 8:
|
|
352
|
+
case "end":
|
|
353
|
+
return _context6.stop();
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}, _callee6, this);
|
|
357
|
+
}));
|
|
358
|
+
|
|
359
|
+
function writeRowGroup(_x9) {
|
|
360
|
+
return _writeRowGroup.apply(this, arguments);
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
return writeRowGroup;
|
|
364
|
+
}()
|
|
365
|
+
}, {
|
|
366
|
+
key: "writeFooter",
|
|
367
|
+
value: function writeFooter(userMetadata) {
|
|
368
|
+
if (!userMetadata) {
|
|
369
|
+
userMetadata = {};
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
|
|
373
|
+
}
|
|
374
|
+
}, {
|
|
375
|
+
key: "setPageSize",
|
|
376
|
+
value: function setPageSize(cnt) {
|
|
377
|
+
this.pageSize = cnt;
|
|
177
378
|
}
|
|
379
|
+
}], [{
|
|
380
|
+
key: "openStream",
|
|
381
|
+
value: function () {
|
|
382
|
+
var _openStream2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7(schema, outputStream, opts) {
|
|
383
|
+
var writeFn, closeFn;
|
|
384
|
+
return _regenerator.default.wrap(function _callee7$(_context7) {
|
|
385
|
+
while (1) {
|
|
386
|
+
switch (_context7.prev = _context7.next) {
|
|
387
|
+
case 0:
|
|
388
|
+
writeFn = _fileUtils.oswrite.bind(undefined, outputStream);
|
|
389
|
+
closeFn = _fileUtils.osclose.bind(undefined, outputStream);
|
|
390
|
+
return _context7.abrupt("return", new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts));
|
|
391
|
+
|
|
392
|
+
case 3:
|
|
393
|
+
case "end":
|
|
394
|
+
return _context7.stop();
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
}, _callee7);
|
|
398
|
+
}));
|
|
399
|
+
|
|
400
|
+
function openStream(_x10, _x11, _x12) {
|
|
401
|
+
return _openStream2.apply(this, arguments);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
return openStream;
|
|
405
|
+
}()
|
|
406
|
+
}]);
|
|
407
|
+
return ParquetEnvelopeWriter;
|
|
408
|
+
}();
|
|
178
409
|
|
|
179
|
-
|
|
180
|
-
}
|
|
410
|
+
exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
|
|
181
411
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
}
|
|
412
|
+
var ParquetTransformer = function (_Transform) {
|
|
413
|
+
(0, _inherits2.default)(ParquetTransformer, _Transform);
|
|
185
414
|
|
|
186
|
-
|
|
415
|
+
var _super = _createSuper(ParquetTransformer);
|
|
187
416
|
|
|
188
|
-
|
|
417
|
+
function ParquetTransformer(schema) {
|
|
418
|
+
var _this;
|
|
189
419
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
420
|
+
var opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
|
|
421
|
+
(0, _classCallCheck2.default)(this, ParquetTransformer);
|
|
422
|
+
_this = _super.call(this, {
|
|
193
423
|
objectMode: true
|
|
194
424
|
});
|
|
195
|
-
(0, _defineProperty2.default)(
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
return
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
425
|
+
(0, _defineProperty2.default)((0, _assertThisInitialized2.default)(_this), "writer", void 0);
|
|
426
|
+
|
|
427
|
+
var writeProxy = function (t) {
|
|
428
|
+
return function () {
|
|
429
|
+
var _ref = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8(b) {
|
|
430
|
+
return _regenerator.default.wrap(function _callee8$(_context8) {
|
|
431
|
+
while (1) {
|
|
432
|
+
switch (_context8.prev = _context8.next) {
|
|
433
|
+
case 0:
|
|
434
|
+
t.push(b);
|
|
435
|
+
|
|
436
|
+
case 1:
|
|
437
|
+
case "end":
|
|
438
|
+
return _context8.stop();
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
}, _callee8);
|
|
442
|
+
}));
|
|
443
|
+
|
|
444
|
+
return function (_x13) {
|
|
445
|
+
return _ref.apply(this, arguments);
|
|
446
|
+
};
|
|
447
|
+
}();
|
|
448
|
+
}((0, _assertThisInitialized2.default)(_this));
|
|
449
|
+
|
|
450
|
+
_this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9() {
|
|
451
|
+
return _regenerator.default.wrap(function _callee9$(_context9) {
|
|
452
|
+
while (1) {
|
|
453
|
+
switch (_context9.prev = _context9.next) {
|
|
454
|
+
case 0:
|
|
455
|
+
case "end":
|
|
456
|
+
return _context9.stop();
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
}, _callee9);
|
|
460
|
+
})), 0, opts), opts);
|
|
461
|
+
return _this;
|
|
204
462
|
}
|
|
205
463
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
return Promise.resolve();
|
|
213
|
-
}
|
|
464
|
+
(0, _createClass2.default)(ParquetTransformer, [{
|
|
465
|
+
key: "_transform",
|
|
466
|
+
value: function _transform(row, encoding, callback) {
|
|
467
|
+
if (row) {
|
|
468
|
+
return this.writer.appendRow(row).then(callback);
|
|
469
|
+
}
|
|
214
470
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
471
|
+
callback();
|
|
472
|
+
return Promise.resolve();
|
|
473
|
+
}
|
|
474
|
+
}, {
|
|
475
|
+
key: "_flush",
|
|
476
|
+
value: function () {
|
|
477
|
+
var _flush2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee10(callback) {
|
|
478
|
+
return _regenerator.default.wrap(function _callee10$(_context10) {
|
|
479
|
+
while (1) {
|
|
480
|
+
switch (_context10.prev = _context10.next) {
|
|
481
|
+
case 0:
|
|
482
|
+
_context10.next = 2;
|
|
483
|
+
return this.writer.close(callback);
|
|
484
|
+
|
|
485
|
+
case 2:
|
|
486
|
+
case "end":
|
|
487
|
+
return _context10.stop();
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
}, _callee10, this);
|
|
491
|
+
}));
|
|
492
|
+
|
|
493
|
+
function _flush(_x14) {
|
|
494
|
+
return _flush2.apply(this, arguments);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
return _flush;
|
|
498
|
+
}()
|
|
499
|
+
}]);
|
|
500
|
+
return ParquetTransformer;
|
|
501
|
+
}(_stream.Transform);
|
|
220
502
|
|
|
221
503
|
exports.ParquetTransformer = ParquetTransformer;
|
|
222
504
|
|
|
@@ -228,160 +510,304 @@ function encodeValues(type, encoding, values, opts) {
|
|
|
228
510
|
return _codecs.PARQUET_CODECS[encoding].encodeValues(type, values, opts);
|
|
229
511
|
}
|
|
230
512
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
if (column.rLevelMax > 0) {
|
|
235
|
-
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
236
|
-
bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax)
|
|
237
|
-
});
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
let dLevelsBuf = Buffer.alloc(0);
|
|
241
|
-
|
|
242
|
-
if (column.dLevelMax > 0) {
|
|
243
|
-
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
244
|
-
bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax)
|
|
245
|
-
});
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
const valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
|
|
249
|
-
typeLength: column.typeLength,
|
|
250
|
-
bitWidth: column.typeLength
|
|
251
|
-
});
|
|
252
|
-
const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
|
|
253
|
-
const compressedBuf = await Compression.deflate(column.compression, dataBuf);
|
|
254
|
-
const header = new _parquetThrift.PageHeader({
|
|
255
|
-
type: _parquetThrift.PageType.DATA_PAGE,
|
|
256
|
-
data_page_header: new _parquetThrift.DataPageHeader({
|
|
257
|
-
num_values: data.count,
|
|
258
|
-
encoding: _parquetThrift.Encoding[column.encoding],
|
|
259
|
-
definition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING],
|
|
260
|
-
repetition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]
|
|
261
|
-
}),
|
|
262
|
-
uncompressed_page_size: dataBuf.length,
|
|
263
|
-
compressed_page_size: compressedBuf.length
|
|
264
|
-
});
|
|
265
|
-
const headerBuf = (0, _readUtils.serializeThrift)(header);
|
|
266
|
-
const page = Buffer.concat([headerBuf, compressedBuf]);
|
|
267
|
-
return {
|
|
268
|
-
header,
|
|
269
|
-
headerSize: headerBuf.length,
|
|
270
|
-
page
|
|
271
|
-
};
|
|
513
|
+
function encodeDataPage(_x15, _x16) {
|
|
514
|
+
return _encodeDataPage.apply(this, arguments);
|
|
272
515
|
}
|
|
273
516
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
517
|
+
function _encodeDataPage() {
|
|
518
|
+
_encodeDataPage = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee11(column, data) {
|
|
519
|
+
var rLevelsBuf, dLevelsBuf, valuesBuf, dataBuf, compressedBuf, header, headerBuf, page;
|
|
520
|
+
return _regenerator.default.wrap(function _callee11$(_context11) {
|
|
521
|
+
while (1) {
|
|
522
|
+
switch (_context11.prev = _context11.next) {
|
|
523
|
+
case 0:
|
|
524
|
+
rLevelsBuf = Buffer.alloc(0);
|
|
525
|
+
|
|
526
|
+
if (column.rLevelMax > 0) {
|
|
527
|
+
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
528
|
+
bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax)
|
|
529
|
+
});
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
dLevelsBuf = Buffer.alloc(0);
|
|
533
|
+
|
|
534
|
+
if (column.dLevelMax > 0) {
|
|
535
|
+
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
536
|
+
bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax)
|
|
537
|
+
});
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
|
|
541
|
+
typeLength: column.typeLength,
|
|
542
|
+
bitWidth: column.typeLength
|
|
543
|
+
});
|
|
544
|
+
dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
|
|
545
|
+
_context11.next = 8;
|
|
546
|
+
return Compression.deflate(column.compression, dataBuf);
|
|
547
|
+
|
|
548
|
+
case 8:
|
|
549
|
+
compressedBuf = _context11.sent;
|
|
550
|
+
header = new _parquetThrift.PageHeader({
|
|
551
|
+
type: _parquetThrift.PageType.DATA_PAGE,
|
|
552
|
+
data_page_header: new _parquetThrift.DataPageHeader({
|
|
553
|
+
num_values: data.count,
|
|
554
|
+
encoding: _parquetThrift.Encoding[column.encoding],
|
|
555
|
+
definition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING],
|
|
556
|
+
repetition_level_encoding: _parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]
|
|
557
|
+
}),
|
|
558
|
+
uncompressed_page_size: dataBuf.length,
|
|
559
|
+
compressed_page_size: compressedBuf.length
|
|
560
|
+
});
|
|
561
|
+
headerBuf = (0, _readUtils.serializeThrift)(header);
|
|
562
|
+
page = Buffer.concat([headerBuf, compressedBuf]);
|
|
563
|
+
return _context11.abrupt("return", {
|
|
564
|
+
header: header,
|
|
565
|
+
headerSize: headerBuf.length,
|
|
566
|
+
page: page
|
|
567
|
+
});
|
|
568
|
+
|
|
569
|
+
case 13:
|
|
570
|
+
case "end":
|
|
571
|
+
return _context11.stop();
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
}, _callee11);
|
|
575
|
+
}));
|
|
576
|
+
return _encodeDataPage.apply(this, arguments);
|
|
577
|
+
}
|
|
297
578
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
|
|
301
|
-
num_values: data.count,
|
|
302
|
-
num_nulls: data.count - data.values.length,
|
|
303
|
-
num_rows: rowCount,
|
|
304
|
-
encoding: _parquetThrift.Encoding[column.encoding],
|
|
305
|
-
definition_levels_byte_length: dLevelsBuf.length,
|
|
306
|
-
repetition_levels_byte_length: rLevelsBuf.length,
|
|
307
|
-
is_compressed: column.compression !== 'UNCOMPRESSED'
|
|
308
|
-
}),
|
|
309
|
-
uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
|
|
310
|
-
compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
|
|
311
|
-
});
|
|
312
|
-
const headerBuf = (0, _readUtils.serializeThrift)(header);
|
|
313
|
-
const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
|
|
314
|
-
return {
|
|
315
|
-
header,
|
|
316
|
-
headerSize: headerBuf.length,
|
|
317
|
-
page
|
|
318
|
-
};
|
|
579
|
+
function encodeDataPageV2(_x17, _x18, _x19) {
|
|
580
|
+
return _encodeDataPageV.apply(this, arguments);
|
|
319
581
|
}
|
|
320
582
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
583
|
+
function _encodeDataPageV() {
|
|
584
|
+
_encodeDataPageV = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee12(column, data, rowCount) {
|
|
585
|
+
var valuesBuf, compressedBuf, rLevelsBuf, dLevelsBuf, header, headerBuf, page;
|
|
586
|
+
return _regenerator.default.wrap(function _callee12$(_context12) {
|
|
587
|
+
while (1) {
|
|
588
|
+
switch (_context12.prev = _context12.next) {
|
|
589
|
+
case 0:
|
|
590
|
+
valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
|
|
591
|
+
typeLength: column.typeLength,
|
|
592
|
+
bitWidth: column.typeLength
|
|
593
|
+
});
|
|
594
|
+
_context12.next = 3;
|
|
595
|
+
return Compression.deflate(column.compression, valuesBuf);
|
|
596
|
+
|
|
597
|
+
case 3:
|
|
598
|
+
compressedBuf = _context12.sent;
|
|
599
|
+
rLevelsBuf = Buffer.alloc(0);
|
|
600
|
+
|
|
601
|
+
if (column.rLevelMax > 0) {
|
|
602
|
+
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
603
|
+
bitWidth: (0, _readUtils.getBitWidth)(column.rLevelMax),
|
|
604
|
+
disableEnvelope: true
|
|
605
|
+
});
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
dLevelsBuf = Buffer.alloc(0);
|
|
609
|
+
|
|
610
|
+
if (column.dLevelMax > 0) {
|
|
611
|
+
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
612
|
+
bitWidth: (0, _readUtils.getBitWidth)(column.dLevelMax),
|
|
613
|
+
disableEnvelope: true
|
|
614
|
+
});
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
header = new _parquetThrift.PageHeader({
|
|
618
|
+
type: _parquetThrift.PageType.DATA_PAGE_V2,
|
|
619
|
+
data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
|
|
620
|
+
num_values: data.count,
|
|
621
|
+
num_nulls: data.count - data.values.length,
|
|
622
|
+
num_rows: rowCount,
|
|
623
|
+
encoding: _parquetThrift.Encoding[column.encoding],
|
|
624
|
+
definition_levels_byte_length: dLevelsBuf.length,
|
|
625
|
+
repetition_levels_byte_length: rLevelsBuf.length,
|
|
626
|
+
is_compressed: column.compression !== 'UNCOMPRESSED'
|
|
627
|
+
}),
|
|
628
|
+
uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
|
|
629
|
+
compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
|
|
630
|
+
});
|
|
631
|
+
headerBuf = (0, _readUtils.serializeThrift)(header);
|
|
632
|
+
page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
|
|
633
|
+
return _context12.abrupt("return", {
|
|
634
|
+
header: header,
|
|
635
|
+
headerSize: headerBuf.length,
|
|
636
|
+
page: page
|
|
637
|
+
});
|
|
638
|
+
|
|
639
|
+
case 12:
|
|
640
|
+
case "end":
|
|
641
|
+
return _context12.stop();
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
}, _callee12);
|
|
645
|
+
}));
|
|
646
|
+
return _encodeDataPageV.apply(this, arguments);
|
|
352
647
|
}
|
|
353
648
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
columns: [],
|
|
358
|
-
total_byte_size: 0
|
|
359
|
-
});
|
|
360
|
-
let body = Buffer.alloc(0);
|
|
649
|
+
function encodeColumnChunk(_x20, _x21, _x22, _x23) {
|
|
650
|
+
return _encodeColumnChunk.apply(this, arguments);
|
|
651
|
+
}
|
|
361
652
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
653
|
+
function _encodeColumnChunk() {
|
|
654
|
+
_encodeColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee13(column, buffer, offset, opts) {
|
|
655
|
+
var data, baseOffset, pageBuf, total_uncompressed_size, total_compressed_size, result, metadata, metadataOffset, body;
|
|
656
|
+
return _regenerator.default.wrap(function _callee13$(_context13) {
|
|
657
|
+
while (1) {
|
|
658
|
+
switch (_context13.prev = _context13.next) {
|
|
659
|
+
case 0:
|
|
660
|
+
data = buffer.columnData[column.path.join()];
|
|
661
|
+
baseOffset = (opts.baseOffset || 0) + offset;
|
|
662
|
+
total_uncompressed_size = 0;
|
|
663
|
+
total_compressed_size = 0;
|
|
664
|
+
|
|
665
|
+
if (!opts.useDataPageV2) {
|
|
666
|
+
_context13.next = 10;
|
|
667
|
+
break;
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
_context13.next = 7;
|
|
671
|
+
return encodeDataPageV2(column, data, buffer.rowCount);
|
|
672
|
+
|
|
673
|
+
case 7:
|
|
674
|
+
_context13.t0 = _context13.sent;
|
|
675
|
+
_context13.next = 13;
|
|
676
|
+
break;
|
|
677
|
+
|
|
678
|
+
case 10:
|
|
679
|
+
_context13.next = 12;
|
|
680
|
+
return encodeDataPage(column, data);
|
|
681
|
+
|
|
682
|
+
case 12:
|
|
683
|
+
_context13.t0 = _context13.sent;
|
|
684
|
+
|
|
685
|
+
case 13:
|
|
686
|
+
result = _context13.t0;
|
|
687
|
+
pageBuf = result.page;
|
|
688
|
+
total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
|
|
689
|
+
total_compressed_size += result.header.compressed_page_size + result.headerSize;
|
|
690
|
+
metadata = new _parquetThrift.ColumnMetaData({
|
|
691
|
+
path_in_schema: column.path,
|
|
692
|
+
num_values: data.count,
|
|
693
|
+
data_page_offset: baseOffset,
|
|
694
|
+
encodings: [],
|
|
695
|
+
total_uncompressed_size: total_uncompressed_size,
|
|
696
|
+
total_compressed_size: total_compressed_size,
|
|
697
|
+
type: _parquetThrift.Type[column.primitiveType],
|
|
698
|
+
codec: _parquetThrift.CompressionCodec[column.compression]
|
|
699
|
+
});
|
|
700
|
+
metadata.encodings.push(_parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]);
|
|
701
|
+
metadata.encodings.push(_parquetThrift.Encoding[column.encoding]);
|
|
702
|
+
metadataOffset = baseOffset + pageBuf.length;
|
|
703
|
+
body = Buffer.concat([pageBuf, (0, _readUtils.serializeThrift)(metadata)]);
|
|
704
|
+
return _context13.abrupt("return", {
|
|
705
|
+
body: body,
|
|
706
|
+
metadata: metadata,
|
|
707
|
+
metadataOffset: metadataOffset
|
|
708
|
+
});
|
|
709
|
+
|
|
710
|
+
case 23:
|
|
711
|
+
case "end":
|
|
712
|
+
return _context13.stop();
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
}, _callee13);
|
|
716
|
+
}));
|
|
717
|
+
return _encodeColumnChunk.apply(this, arguments);
|
|
718
|
+
}
|
|
366
719
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
meta_data: cchunkData.metadata
|
|
371
|
-
});
|
|
372
|
-
metadata.columns.push(cchunk);
|
|
373
|
-
metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
|
|
374
|
-
body = Buffer.concat([body, cchunkData.body]);
|
|
375
|
-
}
|
|
720
|
+
function encodeRowGroup(_x24, _x25, _x26) {
|
|
721
|
+
return _encodeRowGroup.apply(this, arguments);
|
|
722
|
+
}
|
|
376
723
|
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
metadata
|
|
380
|
-
|
|
724
|
+
function _encodeRowGroup() {
|
|
725
|
+
_encodeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee14(schema, data, opts) {
|
|
726
|
+
var metadata, body, _iterator2, _step2, field, cchunkData, cchunk;
|
|
727
|
+
|
|
728
|
+
return _regenerator.default.wrap(function _callee14$(_context14) {
|
|
729
|
+
while (1) {
|
|
730
|
+
switch (_context14.prev = _context14.next) {
|
|
731
|
+
case 0:
|
|
732
|
+
metadata = new _parquetThrift.RowGroup({
|
|
733
|
+
num_rows: data.rowCount,
|
|
734
|
+
columns: [],
|
|
735
|
+
total_byte_size: 0
|
|
736
|
+
});
|
|
737
|
+
body = Buffer.alloc(0);
|
|
738
|
+
_iterator2 = _createForOfIteratorHelper(schema.fieldList);
|
|
739
|
+
_context14.prev = 3;
|
|
740
|
+
|
|
741
|
+
_iterator2.s();
|
|
742
|
+
|
|
743
|
+
case 5:
|
|
744
|
+
if ((_step2 = _iterator2.n()).done) {
|
|
745
|
+
_context14.next = 18;
|
|
746
|
+
break;
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
field = _step2.value;
|
|
750
|
+
|
|
751
|
+
if (!field.isNested) {
|
|
752
|
+
_context14.next = 9;
|
|
753
|
+
break;
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
return _context14.abrupt("continue", 16);
|
|
757
|
+
|
|
758
|
+
case 9:
|
|
759
|
+
_context14.next = 11;
|
|
760
|
+
return encodeColumnChunk(field, data, body.length, opts);
|
|
761
|
+
|
|
762
|
+
case 11:
|
|
763
|
+
cchunkData = _context14.sent;
|
|
764
|
+
cchunk = new _parquetThrift.ColumnChunk({
|
|
765
|
+
file_offset: cchunkData.metadataOffset,
|
|
766
|
+
meta_data: cchunkData.metadata
|
|
767
|
+
});
|
|
768
|
+
metadata.columns.push(cchunk);
|
|
769
|
+
metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
|
|
770
|
+
body = Buffer.concat([body, cchunkData.body]);
|
|
771
|
+
|
|
772
|
+
case 16:
|
|
773
|
+
_context14.next = 5;
|
|
774
|
+
break;
|
|
775
|
+
|
|
776
|
+
case 18:
|
|
777
|
+
_context14.next = 23;
|
|
778
|
+
break;
|
|
779
|
+
|
|
780
|
+
case 20:
|
|
781
|
+
_context14.prev = 20;
|
|
782
|
+
_context14.t0 = _context14["catch"](3);
|
|
783
|
+
|
|
784
|
+
_iterator2.e(_context14.t0);
|
|
785
|
+
|
|
786
|
+
case 23:
|
|
787
|
+
_context14.prev = 23;
|
|
788
|
+
|
|
789
|
+
_iterator2.f();
|
|
790
|
+
|
|
791
|
+
return _context14.finish(23);
|
|
792
|
+
|
|
793
|
+
case 26:
|
|
794
|
+
return _context14.abrupt("return", {
|
|
795
|
+
body: body,
|
|
796
|
+
metadata: metadata
|
|
797
|
+
});
|
|
798
|
+
|
|
799
|
+
case 27:
|
|
800
|
+
case "end":
|
|
801
|
+
return _context14.stop();
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
}, _callee14, null, [[3, 20, 23, 26]]);
|
|
805
|
+
}));
|
|
806
|
+
return _encodeRowGroup.apply(this, arguments);
|
|
381
807
|
}
|
|
382
808
|
|
|
383
809
|
function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
|
|
384
|
-
|
|
810
|
+
var metadata = new _parquetThrift.FileMetaData({
|
|
385
811
|
version: PARQUET_VERSION,
|
|
386
812
|
created_by: 'parquets',
|
|
387
813
|
num_rows: rowCount,
|
|
@@ -390,47 +816,57 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
|
|
|
390
816
|
key_value_metadata: []
|
|
391
817
|
});
|
|
392
818
|
|
|
393
|
-
for (
|
|
819
|
+
for (var key in userMetadata) {
|
|
394
820
|
var _metadata$key_value_m, _metadata$key_value_m2;
|
|
395
821
|
|
|
396
|
-
|
|
397
|
-
key,
|
|
822
|
+
var kv = new _parquetThrift.KeyValue({
|
|
823
|
+
key: key,
|
|
398
824
|
value: userMetadata[key]
|
|
399
825
|
});
|
|
400
826
|
(_metadata$key_value_m = metadata.key_value_metadata) === null || _metadata$key_value_m === void 0 ? void 0 : (_metadata$key_value_m2 = _metadata$key_value_m.push) === null || _metadata$key_value_m2 === void 0 ? void 0 : _metadata$key_value_m2.call(_metadata$key_value_m, kv);
|
|
401
827
|
}
|
|
402
828
|
|
|
403
829
|
{
|
|
404
|
-
|
|
830
|
+
var schemaRoot = new _parquetThrift.SchemaElement({
|
|
405
831
|
name: 'root',
|
|
406
832
|
num_children: Object.keys(schema.fields).length
|
|
407
833
|
});
|
|
408
834
|
metadata.schema.push(schemaRoot);
|
|
409
835
|
}
|
|
410
836
|
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
837
|
+
var _iterator = _createForOfIteratorHelper(schema.fieldList),
|
|
838
|
+
_step;
|
|
839
|
+
|
|
840
|
+
try {
|
|
841
|
+
for (_iterator.s(); !(_step = _iterator.n()).done;) {
|
|
842
|
+
var field = _step.value;
|
|
843
|
+
var relt = _parquetThrift.FieldRepetitionType[field.repetitionType];
|
|
844
|
+
var schemaElem = new _parquetThrift.SchemaElement({
|
|
845
|
+
name: field.name,
|
|
846
|
+
repetition_type: relt
|
|
847
|
+
});
|
|
848
|
+
|
|
849
|
+
if (field.isNested) {
|
|
850
|
+
schemaElem.num_children = field.fieldCount;
|
|
851
|
+
} else {
|
|
852
|
+
schemaElem.type = _parquetThrift.Type[field.primitiveType];
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
if (field.originalType) {
|
|
856
|
+
schemaElem.converted_type = _parquetThrift.ConvertedType[field.originalType];
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
schemaElem.type_length = field.typeLength;
|
|
860
|
+
metadata.schema.push(schemaElem);
|
|
422
861
|
}
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
schemaElem.type_length = field.typeLength;
|
|
429
|
-
metadata.schema.push(schemaElem);
|
|
862
|
+
} catch (err) {
|
|
863
|
+
_iterator.e(err);
|
|
864
|
+
} finally {
|
|
865
|
+
_iterator.f();
|
|
430
866
|
}
|
|
431
867
|
|
|
432
|
-
|
|
433
|
-
|
|
868
|
+
var metadataEncoded = (0, _readUtils.serializeThrift)(metadata);
|
|
869
|
+
var footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
|
|
434
870
|
metadataEncoded.copy(footerEncoded);
|
|
435
871
|
footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
|
|
436
872
|
footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);
|