@loaders.gl/parquet 3.0.13 → 3.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.es5.min.js +1 -1
- package/dist/dist.es5.min.js.map +1 -1
- package/dist/dist.min.js +1 -1
- package/dist/dist.min.js.map +1 -1
- package/dist/es5/bundle.js +2 -2
- package/dist/es5/bundle.js.map +1 -1
- package/dist/es5/index.js +9 -9
- package/dist/es5/parquet-loader.js +70 -19
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-writer.js +4 -4
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/codecs/index.js +6 -4
- package/dist/es5/parquetjs/codecs/index.js.map +1 -1
- package/dist/es5/parquetjs/codecs/plain.js +43 -41
- package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
- package/dist/es5/parquetjs/codecs/rle.js +35 -25
- package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
- package/dist/es5/parquetjs/compression.js +9 -7
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/file.js +15 -15
- package/dist/es5/parquetjs/file.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +152 -141
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +160 -147
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +259 -248
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +79 -67
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +124 -113
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +169 -158
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DateType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +182 -170
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IntType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ListType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +343 -319
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MapType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/NullType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +75 -64
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +169 -158
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +124 -113
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +199 -188
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js +135 -124
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/StringType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +101 -88
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Type.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/index.js +43 -43
- package/dist/es5/parquetjs/reader.js +813 -276
- package/dist/es5/parquetjs/reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +11 -9
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/schema.js +87 -73
- package/dist/es5/parquetjs/schema/schema.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +95 -55
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js +25 -25
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/util.js +71 -39
- package/dist/es5/parquetjs/util.js.map +1 -1
- package/dist/es5/parquetjs/writer.js +467 -200
- package/dist/es5/parquetjs/writer.js.map +1 -1
- package/dist/esm/parquet-loader.js +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/parquet-worker.js +1 -1
- package/dist/parquet-worker.js.map +1 -1
- package/package.json +4 -4
|
@@ -2,11 +2,29 @@
|
|
|
2
2
|
|
|
3
3
|
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
|
|
4
4
|
|
|
5
|
+
var _typeof = require("@babel/runtime/helpers/typeof");
|
|
6
|
+
|
|
5
7
|
Object.defineProperty(exports, "__esModule", {
|
|
6
8
|
value: true
|
|
7
9
|
});
|
|
8
10
|
exports.ParquetTransformer = exports.ParquetEnvelopeWriter = exports.ParquetWriter = void 0;
|
|
9
11
|
|
|
12
|
+
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
|
|
13
|
+
|
|
14
|
+
var _assertThisInitialized2 = _interopRequireDefault(require("@babel/runtime/helpers/assertThisInitialized"));
|
|
15
|
+
|
|
16
|
+
var _inherits2 = _interopRequireDefault(require("@babel/runtime/helpers/inherits"));
|
|
17
|
+
|
|
18
|
+
var _possibleConstructorReturn2 = _interopRequireDefault(require("@babel/runtime/helpers/possibleConstructorReturn"));
|
|
19
|
+
|
|
20
|
+
var _getPrototypeOf2 = _interopRequireDefault(require("@babel/runtime/helpers/getPrototypeOf"));
|
|
21
|
+
|
|
22
|
+
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
|
|
23
|
+
|
|
24
|
+
var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
|
|
25
|
+
|
|
26
|
+
var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
|
|
27
|
+
|
|
10
28
|
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
|
|
11
29
|
|
|
12
30
|
var _stream = require("stream");
|
|
@@ -23,33 +41,30 @@ var Util = _interopRequireWildcard(require("./util"));
|
|
|
23
41
|
|
|
24
42
|
var _nodeInt = _interopRequireDefault(require("node-int64"));
|
|
25
43
|
|
|
26
|
-
function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function (nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
|
|
44
|
+
function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
|
|
27
45
|
|
|
28
|
-
function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null ||
|
|
46
|
+
function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
|
|
29
47
|
|
|
30
|
-
|
|
31
|
-
const PARQUET_VERSION = 1;
|
|
32
|
-
const PARQUET_DEFAULT_PAGE_SIZE = 8192;
|
|
33
|
-
const PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
|
|
34
|
-
const PARQUET_RDLVL_TYPE = 'INT32';
|
|
35
|
-
const PARQUET_RDLVL_ENCODING = 'RLE';
|
|
48
|
+
function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
|
|
36
49
|
|
|
37
|
-
|
|
38
|
-
static async openFile(schema, path, opts) {
|
|
39
|
-
const outputStream = await Util.osopen(path, opts);
|
|
40
|
-
return ParquetWriter.openStream(schema, outputStream, opts);
|
|
41
|
-
}
|
|
50
|
+
function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
|
|
42
51
|
|
|
43
|
-
|
|
44
|
-
if (!opts) {
|
|
45
|
-
opts = {};
|
|
46
|
-
}
|
|
52
|
+
function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
|
|
47
53
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
54
|
+
function _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = (0, _getPrototypeOf2.default)(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = (0, _getPrototypeOf2.default)(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return (0, _possibleConstructorReturn2.default)(this, result); }; }
|
|
55
|
+
|
|
56
|
+
function _isNativeReflectConstruct() { if (typeof Reflect === "undefined" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === "function") return true; try { Boolean.prototype.valueOf.call(Reflect.construct(Boolean, [], function () {})); return true; } catch (e) { return false; } }
|
|
57
|
+
|
|
58
|
+
var PARQUET_MAGIC = 'PAR1';
|
|
59
|
+
var PARQUET_VERSION = 1;
|
|
60
|
+
var PARQUET_DEFAULT_PAGE_SIZE = 8192;
|
|
61
|
+
var PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
|
|
62
|
+
var PARQUET_RDLVL_TYPE = 'INT32';
|
|
63
|
+
var PARQUET_RDLVL_ENCODING = 'RLE';
|
|
51
64
|
|
|
52
|
-
|
|
65
|
+
var ParquetWriter = function () {
|
|
66
|
+
function ParquetWriter(schema, envelopeWriter, opts) {
|
|
67
|
+
(0, _classCallCheck2.default)(this, ParquetWriter);
|
|
53
68
|
(0, _defineProperty2.default)(this, "schema", void 0);
|
|
54
69
|
(0, _defineProperty2.default)(this, "envelopeWriter", void 0);
|
|
55
70
|
(0, _defineProperty2.default)(this, "rowBuffer", void 0);
|
|
@@ -71,61 +86,178 @@ class ParquetWriter {
|
|
|
71
86
|
}
|
|
72
87
|
}
|
|
73
88
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
89
|
+
(0, _createClass2.default)(ParquetWriter, [{
|
|
90
|
+
key: "appendRow",
|
|
91
|
+
value: function () {
|
|
92
|
+
var _appendRow = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee(row) {
|
|
93
|
+
return _regenerator.default.wrap(function _callee$(_context) {
|
|
94
|
+
while (1) {
|
|
95
|
+
switch (_context.prev = _context.next) {
|
|
96
|
+
case 0:
|
|
97
|
+
if (!this.closed) {
|
|
98
|
+
_context.next = 2;
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
throw new Error('writer was closed');
|
|
103
|
+
|
|
104
|
+
case 2:
|
|
105
|
+
Shred.shredRecord(this.schema, row, this.rowBuffer);
|
|
106
|
+
|
|
107
|
+
if (this.rowBuffer.rowCount >= this.rowGroupSize) {
|
|
108
|
+
this.rowBuffer = {};
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
case 4:
|
|
112
|
+
case "end":
|
|
113
|
+
return _context.stop();
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}, _callee, this);
|
|
117
|
+
}));
|
|
118
|
+
|
|
119
|
+
function appendRow(_x) {
|
|
120
|
+
return _appendRow.apply(this, arguments);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return appendRow;
|
|
124
|
+
}()
|
|
125
|
+
}, {
|
|
126
|
+
key: "close",
|
|
127
|
+
value: function () {
|
|
128
|
+
var _close = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(callback) {
|
|
129
|
+
return _regenerator.default.wrap(function _callee2$(_context2) {
|
|
130
|
+
while (1) {
|
|
131
|
+
switch (_context2.prev = _context2.next) {
|
|
132
|
+
case 0:
|
|
133
|
+
if (!this.closed) {
|
|
134
|
+
_context2.next = 2;
|
|
135
|
+
break;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
throw new Error('writer was closed');
|
|
139
|
+
|
|
140
|
+
case 2:
|
|
141
|
+
this.closed = true;
|
|
142
|
+
|
|
143
|
+
if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {
|
|
144
|
+
this.rowBuffer = {};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
_context2.next = 6;
|
|
148
|
+
return this.envelopeWriter.writeFooter(this.userMetadata);
|
|
149
|
+
|
|
150
|
+
case 6:
|
|
151
|
+
_context2.next = 8;
|
|
152
|
+
return this.envelopeWriter.close();
|
|
153
|
+
|
|
154
|
+
case 8:
|
|
155
|
+
if (callback) {
|
|
156
|
+
callback();
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
case 9:
|
|
160
|
+
case "end":
|
|
161
|
+
return _context2.stop();
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}, _callee2, this);
|
|
165
|
+
}));
|
|
166
|
+
|
|
167
|
+
function close(_x2) {
|
|
168
|
+
return _close.apply(this, arguments);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return close;
|
|
172
|
+
}()
|
|
173
|
+
}, {
|
|
174
|
+
key: "setMetadata",
|
|
175
|
+
value: function setMetadata(key, value) {
|
|
176
|
+
this.userMetadata[String(key)] = String(value);
|
|
77
177
|
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
this.rowBuffer = {};
|
|
178
|
+
}, {
|
|
179
|
+
key: "setRowGroupSize",
|
|
180
|
+
value: function setRowGroupSize(cnt) {
|
|
181
|
+
this.rowGroupSize = cnt;
|
|
83
182
|
}
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
throw new Error('writer was closed');
|
|
183
|
+
}, {
|
|
184
|
+
key: "setPageSize",
|
|
185
|
+
value: function setPageSize(cnt) {
|
|
186
|
+
this.envelopeWriter.setPageSize(cnt);
|
|
89
187
|
}
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
188
|
+
}], [{
|
|
189
|
+
key: "openFile",
|
|
190
|
+
value: function () {
|
|
191
|
+
var _openFile = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(schema, path, opts) {
|
|
192
|
+
var outputStream;
|
|
193
|
+
return _regenerator.default.wrap(function _callee3$(_context3) {
|
|
194
|
+
while (1) {
|
|
195
|
+
switch (_context3.prev = _context3.next) {
|
|
196
|
+
case 0:
|
|
197
|
+
_context3.next = 2;
|
|
198
|
+
return Util.osopen(path, opts);
|
|
199
|
+
|
|
200
|
+
case 2:
|
|
201
|
+
outputStream = _context3.sent;
|
|
202
|
+
return _context3.abrupt("return", ParquetWriter.openStream(schema, outputStream, opts));
|
|
203
|
+
|
|
204
|
+
case 4:
|
|
205
|
+
case "end":
|
|
206
|
+
return _context3.stop();
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}, _callee3);
|
|
210
|
+
}));
|
|
211
|
+
|
|
212
|
+
function openFile(_x3, _x4, _x5) {
|
|
213
|
+
return _openFile.apply(this, arguments);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return openFile;
|
|
217
|
+
}()
|
|
218
|
+
}, {
|
|
219
|
+
key: "openStream",
|
|
220
|
+
value: function () {
|
|
221
|
+
var _openStream = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4(schema, outputStream, opts) {
|
|
222
|
+
var envelopeWriter;
|
|
223
|
+
return _regenerator.default.wrap(function _callee4$(_context4) {
|
|
224
|
+
while (1) {
|
|
225
|
+
switch (_context4.prev = _context4.next) {
|
|
226
|
+
case 0:
|
|
227
|
+
if (!opts) {
|
|
228
|
+
opts = {};
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
_context4.next = 3;
|
|
232
|
+
return ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
|
|
233
|
+
|
|
234
|
+
case 3:
|
|
235
|
+
envelopeWriter = _context4.sent;
|
|
236
|
+
return _context4.abrupt("return", new ParquetWriter(schema, envelopeWriter, opts));
|
|
237
|
+
|
|
238
|
+
case 5:
|
|
239
|
+
case "end":
|
|
240
|
+
return _context4.stop();
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
}, _callee4);
|
|
244
|
+
}));
|
|
245
|
+
|
|
246
|
+
function openStream(_x6, _x7, _x8) {
|
|
247
|
+
return _openStream.apply(this, arguments);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return openStream;
|
|
251
|
+
}()
|
|
252
|
+
}]);
|
|
253
|
+
return ParquetWriter;
|
|
254
|
+
}();
|
|
118
255
|
|
|
119
256
|
exports.ParquetWriter = ParquetWriter;
|
|
120
257
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
const closeFn = Util.osclose.bind(undefined, outputStream);
|
|
125
|
-
return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);
|
|
126
|
-
}
|
|
127
|
-
|
|
128
|
-
constructor(schema, writeFn, closeFn, fileOffset, opts) {
|
|
258
|
+
var ParquetEnvelopeWriter = function () {
|
|
259
|
+
function ParquetEnvelopeWriter(schema, writeFn, closeFn, fileOffset, opts) {
|
|
260
|
+
(0, _classCallCheck2.default)(this, ParquetEnvelopeWriter);
|
|
129
261
|
(0, _defineProperty2.default)(this, "schema", void 0);
|
|
130
262
|
(0, _defineProperty2.default)(this, "write", void 0);
|
|
131
263
|
(0, _defineProperty2.default)(this, "close", void 0);
|
|
@@ -144,72 +276,186 @@ class ParquetEnvelopeWriter {
|
|
|
144
276
|
this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;
|
|
145
277
|
}
|
|
146
278
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
const rgroup = encodeRowGroup(this.schema, records, {
|
|
158
|
-
baseOffset: this.offset,
|
|
159
|
-
pageSize: this.pageSize,
|
|
160
|
-
useDataPageV2: this.useDataPageV2
|
|
161
|
-
});
|
|
162
|
-
this.rowCount += records.rowCount;
|
|
163
|
-
this.rowGroups.push(rgroup.metadata);
|
|
164
|
-
return this.writeSection(rgroup.body);
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
writeFooter(userMetadata) {
|
|
168
|
-
if (!userMetadata) {
|
|
169
|
-
userMetadata = {};
|
|
279
|
+
(0, _createClass2.default)(ParquetEnvelopeWriter, [{
|
|
280
|
+
key: "writeSection",
|
|
281
|
+
value: function writeSection(buf) {
|
|
282
|
+
this.offset += buf.length;
|
|
283
|
+
return this.write(buf);
|
|
284
|
+
}
|
|
285
|
+
}, {
|
|
286
|
+
key: "writeHeader",
|
|
287
|
+
value: function writeHeader() {
|
|
288
|
+
return this.writeSection(Buffer.from(PARQUET_MAGIC));
|
|
170
289
|
}
|
|
290
|
+
}, {
|
|
291
|
+
key: "writeRowGroup",
|
|
292
|
+
value: function () {
|
|
293
|
+
var _writeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(records) {
|
|
294
|
+
var rgroup;
|
|
295
|
+
return _regenerator.default.wrap(function _callee5$(_context5) {
|
|
296
|
+
while (1) {
|
|
297
|
+
switch (_context5.prev = _context5.next) {
|
|
298
|
+
case 0:
|
|
299
|
+
rgroup = encodeRowGroup(this.schema, records, {
|
|
300
|
+
baseOffset: this.offset,
|
|
301
|
+
pageSize: this.pageSize,
|
|
302
|
+
useDataPageV2: this.useDataPageV2
|
|
303
|
+
});
|
|
304
|
+
this.rowCount += records.rowCount;
|
|
305
|
+
this.rowGroups.push(rgroup.metadata);
|
|
306
|
+
return _context5.abrupt("return", this.writeSection(rgroup.body));
|
|
307
|
+
|
|
308
|
+
case 4:
|
|
309
|
+
case "end":
|
|
310
|
+
return _context5.stop();
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}, _callee5, this);
|
|
314
|
+
}));
|
|
315
|
+
|
|
316
|
+
function writeRowGroup(_x9) {
|
|
317
|
+
return _writeRowGroup.apply(this, arguments);
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
return writeRowGroup;
|
|
321
|
+
}()
|
|
322
|
+
}, {
|
|
323
|
+
key: "writeFooter",
|
|
324
|
+
value: function writeFooter(userMetadata) {
|
|
325
|
+
if (!userMetadata) {
|
|
326
|
+
userMetadata = {};
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata));
|
|
330
|
+
}
|
|
331
|
+
}, {
|
|
332
|
+
key: "setPageSize",
|
|
333
|
+
value: function setPageSize(cnt) {
|
|
334
|
+
this.pageSize = cnt;
|
|
335
|
+
}
|
|
336
|
+
}], [{
|
|
337
|
+
key: "openStream",
|
|
338
|
+
value: function () {
|
|
339
|
+
var _openStream2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee6(schema, outputStream, opts) {
|
|
340
|
+
var writeFn, closeFn;
|
|
341
|
+
return _regenerator.default.wrap(function _callee6$(_context6) {
|
|
342
|
+
while (1) {
|
|
343
|
+
switch (_context6.prev = _context6.next) {
|
|
344
|
+
case 0:
|
|
345
|
+
writeFn = Util.oswrite.bind(undefined, outputStream);
|
|
346
|
+
closeFn = Util.osclose.bind(undefined, outputStream);
|
|
347
|
+
return _context6.abrupt("return", new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts));
|
|
348
|
+
|
|
349
|
+
case 3:
|
|
350
|
+
case "end":
|
|
351
|
+
return _context6.stop();
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
}, _callee6);
|
|
355
|
+
}));
|
|
356
|
+
|
|
357
|
+
function openStream(_x10, _x11, _x12) {
|
|
358
|
+
return _openStream2.apply(this, arguments);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
return openStream;
|
|
362
|
+
}()
|
|
363
|
+
}]);
|
|
364
|
+
return ParquetEnvelopeWriter;
|
|
365
|
+
}();
|
|
171
366
|
|
|
172
|
-
|
|
173
|
-
}
|
|
367
|
+
exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
|
|
174
368
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
}
|
|
369
|
+
var ParquetTransformer = function (_Transform) {
|
|
370
|
+
(0, _inherits2.default)(ParquetTransformer, _Transform);
|
|
178
371
|
|
|
179
|
-
|
|
372
|
+
var _super = _createSuper(ParquetTransformer);
|
|
180
373
|
|
|
181
|
-
|
|
374
|
+
function ParquetTransformer(schema) {
|
|
375
|
+
var _this;
|
|
182
376
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
377
|
+
var opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
|
|
378
|
+
(0, _classCallCheck2.default)(this, ParquetTransformer);
|
|
379
|
+
_this = _super.call(this, {
|
|
186
380
|
objectMode: true
|
|
187
381
|
});
|
|
188
|
-
(0, _defineProperty2.default)(
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
return
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
382
|
+
(0, _defineProperty2.default)((0, _assertThisInitialized2.default)(_this), "writer", void 0);
|
|
383
|
+
|
|
384
|
+
var writeProxy = function (t) {
|
|
385
|
+
return function () {
|
|
386
|
+
var _ref = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee7(b) {
|
|
387
|
+
return _regenerator.default.wrap(function _callee7$(_context7) {
|
|
388
|
+
while (1) {
|
|
389
|
+
switch (_context7.prev = _context7.next) {
|
|
390
|
+
case 0:
|
|
391
|
+
t.push(b);
|
|
392
|
+
|
|
393
|
+
case 1:
|
|
394
|
+
case "end":
|
|
395
|
+
return _context7.stop();
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
}, _callee7);
|
|
399
|
+
}));
|
|
400
|
+
|
|
401
|
+
return function (_x13) {
|
|
402
|
+
return _ref.apply(this, arguments);
|
|
403
|
+
};
|
|
404
|
+
}();
|
|
405
|
+
}((0, _assertThisInitialized2.default)(_this));
|
|
406
|
+
|
|
407
|
+
_this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8() {
|
|
408
|
+
return _regenerator.default.wrap(function _callee8$(_context8) {
|
|
409
|
+
while (1) {
|
|
410
|
+
switch (_context8.prev = _context8.next) {
|
|
411
|
+
case 0:
|
|
412
|
+
case "end":
|
|
413
|
+
return _context8.stop();
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
}, _callee8);
|
|
417
|
+
})), 0, opts), opts);
|
|
418
|
+
return _this;
|
|
197
419
|
}
|
|
198
420
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
return Promise.resolve();
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
async _flush(callback) {
|
|
209
|
-
await this.writer.close(callback);
|
|
210
|
-
}
|
|
421
|
+
(0, _createClass2.default)(ParquetTransformer, [{
|
|
422
|
+
key: "_transform",
|
|
423
|
+
value: function _transform(row, encoding, callback) {
|
|
424
|
+
if (row) {
|
|
425
|
+
return this.writer.appendRow(row).then(callback);
|
|
426
|
+
}
|
|
211
427
|
|
|
212
|
-
|
|
428
|
+
callback();
|
|
429
|
+
return Promise.resolve();
|
|
430
|
+
}
|
|
431
|
+
}, {
|
|
432
|
+
key: "_flush",
|
|
433
|
+
value: function () {
|
|
434
|
+
var _flush2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9(callback) {
|
|
435
|
+
return _regenerator.default.wrap(function _callee9$(_context9) {
|
|
436
|
+
while (1) {
|
|
437
|
+
switch (_context9.prev = _context9.next) {
|
|
438
|
+
case 0:
|
|
439
|
+
_context9.next = 2;
|
|
440
|
+
return this.writer.close(callback);
|
|
441
|
+
|
|
442
|
+
case 2:
|
|
443
|
+
case "end":
|
|
444
|
+
return _context9.stop();
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
}, _callee9, this);
|
|
448
|
+
}));
|
|
449
|
+
|
|
450
|
+
function _flush(_x14) {
|
|
451
|
+
return _flush2.apply(this, arguments);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
return _flush;
|
|
455
|
+
}()
|
|
456
|
+
}]);
|
|
457
|
+
return ParquetTransformer;
|
|
458
|
+
}(_stream.Transform);
|
|
213
459
|
|
|
214
460
|
exports.ParquetTransformer = ParquetTransformer;
|
|
215
461
|
|
|
@@ -222,7 +468,7 @@ function encodeValues(type, encoding, values, opts) {
|
|
|
222
468
|
}
|
|
223
469
|
|
|
224
470
|
function encodeDataPage(column, data) {
|
|
225
|
-
|
|
471
|
+
var rLevelsBuf = Buffer.alloc(0);
|
|
226
472
|
|
|
227
473
|
if (column.rLevelMax > 0) {
|
|
228
474
|
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
@@ -230,7 +476,7 @@ function encodeDataPage(column, data) {
|
|
|
230
476
|
});
|
|
231
477
|
}
|
|
232
478
|
|
|
233
|
-
|
|
479
|
+
var dLevelsBuf = Buffer.alloc(0);
|
|
234
480
|
|
|
235
481
|
if (column.dLevelMax > 0) {
|
|
236
482
|
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
@@ -238,13 +484,13 @@ function encodeDataPage(column, data) {
|
|
|
238
484
|
});
|
|
239
485
|
}
|
|
240
486
|
|
|
241
|
-
|
|
487
|
+
var valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
|
|
242
488
|
typeLength: column.typeLength,
|
|
243
489
|
bitWidth: column.typeLength
|
|
244
490
|
});
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
491
|
+
var dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
|
|
492
|
+
var compressedBuf = Compression.deflate(column.compression, dataBuf);
|
|
493
|
+
var header = new _parquetThrift.PageHeader({
|
|
248
494
|
type: _parquetThrift.PageType.DATA_PAGE,
|
|
249
495
|
data_page_header: new _parquetThrift.DataPageHeader({
|
|
250
496
|
num_values: data.count,
|
|
@@ -255,22 +501,22 @@ function encodeDataPage(column, data) {
|
|
|
255
501
|
uncompressed_page_size: dataBuf.length,
|
|
256
502
|
compressed_page_size: compressedBuf.length
|
|
257
503
|
});
|
|
258
|
-
|
|
259
|
-
|
|
504
|
+
var headerBuf = Util.serializeThrift(header);
|
|
505
|
+
var page = Buffer.concat([headerBuf, compressedBuf]);
|
|
260
506
|
return {
|
|
261
|
-
header,
|
|
507
|
+
header: header,
|
|
262
508
|
headerSize: headerBuf.length,
|
|
263
|
-
page
|
|
509
|
+
page: page
|
|
264
510
|
};
|
|
265
511
|
}
|
|
266
512
|
|
|
267
513
|
function encodeDataPageV2(column, data, rowCount) {
|
|
268
|
-
|
|
514
|
+
var valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
|
|
269
515
|
typeLength: column.typeLength,
|
|
270
516
|
bitWidth: column.typeLength
|
|
271
517
|
});
|
|
272
|
-
|
|
273
|
-
|
|
518
|
+
var compressedBuf = Compression.deflate(column.compression, valuesBuf);
|
|
519
|
+
var rLevelsBuf = Buffer.alloc(0);
|
|
274
520
|
|
|
275
521
|
if (column.rLevelMax > 0) {
|
|
276
522
|
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
@@ -279,7 +525,7 @@ function encodeDataPageV2(column, data, rowCount) {
|
|
|
279
525
|
});
|
|
280
526
|
}
|
|
281
527
|
|
|
282
|
-
|
|
528
|
+
var dLevelsBuf = Buffer.alloc(0);
|
|
283
529
|
|
|
284
530
|
if (column.dLevelMax > 0) {
|
|
285
531
|
dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {
|
|
@@ -288,7 +534,7 @@ function encodeDataPageV2(column, data, rowCount) {
|
|
|
288
534
|
});
|
|
289
535
|
}
|
|
290
536
|
|
|
291
|
-
|
|
537
|
+
var header = new _parquetThrift.PageHeader({
|
|
292
538
|
type: _parquetThrift.PageType.DATA_PAGE_V2,
|
|
293
539
|
data_page_header_v2: new _parquetThrift.DataPageHeaderV2({
|
|
294
540
|
num_values: data.count,
|
|
@@ -302,23 +548,23 @@ function encodeDataPageV2(column, data, rowCount) {
|
|
|
302
548
|
uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,
|
|
303
549
|
compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length
|
|
304
550
|
});
|
|
305
|
-
|
|
306
|
-
|
|
551
|
+
var headerBuf = Util.serializeThrift(header);
|
|
552
|
+
var page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
|
|
307
553
|
return {
|
|
308
|
-
header,
|
|
554
|
+
header: header,
|
|
309
555
|
headerSize: headerBuf.length,
|
|
310
|
-
page
|
|
556
|
+
page: page
|
|
311
557
|
};
|
|
312
558
|
}
|
|
313
559
|
|
|
314
560
|
function encodeColumnChunk(column, buffer, offset, opts) {
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
561
|
+
var data = buffer.columnData[column.path.join()];
|
|
562
|
+
var baseOffset = (opts.baseOffset || 0) + offset;
|
|
563
|
+
var pageBuf;
|
|
564
|
+
var total_uncompressed_size = 0;
|
|
565
|
+
var total_compressed_size = 0;
|
|
320
566
|
{
|
|
321
|
-
|
|
567
|
+
var result;
|
|
322
568
|
|
|
323
569
|
if (opts.useDataPageV2) {
|
|
324
570
|
result = encodeDataPageV2(column, data, buffer.rowCount);
|
|
@@ -330,58 +576,69 @@ function encodeColumnChunk(column, buffer, offset, opts) {
|
|
|
330
576
|
total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
|
|
331
577
|
total_compressed_size += result.header.compressed_page_size + result.headerSize;
|
|
332
578
|
}
|
|
333
|
-
|
|
579
|
+
var metadata = new _parquetThrift.ColumnMetaData({
|
|
334
580
|
path_in_schema: column.path,
|
|
335
581
|
num_values: data.count,
|
|
336
582
|
data_page_offset: baseOffset,
|
|
337
583
|
encodings: [],
|
|
338
|
-
total_uncompressed_size,
|
|
339
|
-
total_compressed_size,
|
|
584
|
+
total_uncompressed_size: total_uncompressed_size,
|
|
585
|
+
total_compressed_size: total_compressed_size,
|
|
340
586
|
type: _parquetThrift.Type[column.primitiveType],
|
|
341
587
|
codec: _parquetThrift.CompressionCodec[column.compression]
|
|
342
588
|
});
|
|
343
589
|
metadata.encodings.push(_parquetThrift.Encoding[PARQUET_RDLVL_ENCODING]);
|
|
344
590
|
metadata.encodings.push(_parquetThrift.Encoding[column.encoding]);
|
|
345
|
-
|
|
346
|
-
|
|
591
|
+
var metadataOffset = baseOffset + pageBuf.length;
|
|
592
|
+
var body = Buffer.concat([pageBuf, Util.serializeThrift(metadata)]);
|
|
347
593
|
return {
|
|
348
|
-
body,
|
|
349
|
-
metadata,
|
|
350
|
-
metadataOffset
|
|
594
|
+
body: body,
|
|
595
|
+
metadata: metadata,
|
|
596
|
+
metadataOffset: metadataOffset
|
|
351
597
|
};
|
|
352
598
|
}
|
|
353
599
|
|
|
354
600
|
function encodeRowGroup(schema, data, opts) {
|
|
355
|
-
|
|
601
|
+
var metadata = new _parquetThrift.RowGroup({
|
|
356
602
|
num_rows: data.rowCount,
|
|
357
603
|
columns: [],
|
|
358
604
|
total_byte_size: 0
|
|
359
605
|
});
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
606
|
+
var body = Buffer.alloc(0);
|
|
607
|
+
|
|
608
|
+
var _iterator = _createForOfIteratorHelper(schema.fieldList),
|
|
609
|
+
_step;
|
|
610
|
+
|
|
611
|
+
try {
|
|
612
|
+
for (_iterator.s(); !(_step = _iterator.n()).done;) {
|
|
613
|
+
var field = _step.value;
|
|
614
|
+
|
|
615
|
+
if (field.isNested) {
|
|
616
|
+
continue;
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
var cchunkData = encodeColumnChunk(field, data, body.length, opts);
|
|
620
|
+
var cchunk = new _parquetThrift.ColumnChunk({
|
|
621
|
+
file_offset: cchunkData.metadataOffset,
|
|
622
|
+
meta_data: cchunkData.metadata
|
|
623
|
+
});
|
|
624
|
+
metadata.columns.push(cchunk);
|
|
625
|
+
metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
|
|
626
|
+
body = Buffer.concat([body, cchunkData.body]);
|
|
365
627
|
}
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
meta_data: cchunkData.metadata
|
|
371
|
-
});
|
|
372
|
-
metadata.columns.push(cchunk);
|
|
373
|
-
metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
|
|
374
|
-
body = Buffer.concat([body, cchunkData.body]);
|
|
628
|
+
} catch (err) {
|
|
629
|
+
_iterator.e(err);
|
|
630
|
+
} finally {
|
|
631
|
+
_iterator.f();
|
|
375
632
|
}
|
|
376
633
|
|
|
377
634
|
return {
|
|
378
|
-
body,
|
|
379
|
-
metadata
|
|
635
|
+
body: body,
|
|
636
|
+
metadata: metadata
|
|
380
637
|
};
|
|
381
638
|
}
|
|
382
639
|
|
|
383
640
|
function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
|
|
384
|
-
|
|
641
|
+
var metadata = new _parquetThrift.FileMetaData({
|
|
385
642
|
version: PARQUET_VERSION,
|
|
386
643
|
created_by: 'parquets',
|
|
387
644
|
num_rows: rowCount,
|
|
@@ -390,47 +647,57 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
|
|
|
390
647
|
key_value_metadata: []
|
|
391
648
|
});
|
|
392
649
|
|
|
393
|
-
for (
|
|
650
|
+
for (var key in userMetadata) {
|
|
394
651
|
var _metadata$key_value_m, _metadata$key_value_m2;
|
|
395
652
|
|
|
396
|
-
|
|
397
|
-
key,
|
|
653
|
+
var kv = new _parquetThrift.KeyValue({
|
|
654
|
+
key: key,
|
|
398
655
|
value: userMetadata[key]
|
|
399
656
|
});
|
|
400
657
|
(_metadata$key_value_m = metadata.key_value_metadata) === null || _metadata$key_value_m === void 0 ? void 0 : (_metadata$key_value_m2 = _metadata$key_value_m.push) === null || _metadata$key_value_m2 === void 0 ? void 0 : _metadata$key_value_m2.call(_metadata$key_value_m, kv);
|
|
401
658
|
}
|
|
402
659
|
|
|
403
660
|
{
|
|
404
|
-
|
|
661
|
+
var schemaRoot = new _parquetThrift.SchemaElement({
|
|
405
662
|
name: 'root',
|
|
406
663
|
num_children: Object.keys(schema.fields).length
|
|
407
664
|
});
|
|
408
665
|
metadata.schema.push(schemaRoot);
|
|
409
666
|
}
|
|
410
667
|
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
668
|
+
var _iterator2 = _createForOfIteratorHelper(schema.fieldList),
|
|
669
|
+
_step2;
|
|
670
|
+
|
|
671
|
+
try {
|
|
672
|
+
for (_iterator2.s(); !(_step2 = _iterator2.n()).done;) {
|
|
673
|
+
var field = _step2.value;
|
|
674
|
+
var relt = _parquetThrift.FieldRepetitionType[field.repetitionType];
|
|
675
|
+
var schemaElem = new _parquetThrift.SchemaElement({
|
|
676
|
+
name: field.name,
|
|
677
|
+
repetition_type: relt
|
|
678
|
+
});
|
|
679
|
+
|
|
680
|
+
if (field.isNested) {
|
|
681
|
+
schemaElem.num_children = field.fieldCount;
|
|
682
|
+
} else {
|
|
683
|
+
schemaElem.type = _parquetThrift.Type[field.primitiveType];
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
if (field.originalType) {
|
|
687
|
+
schemaElem.converted_type = _parquetThrift.ConvertedType[field.originalType];
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
schemaElem.type_length = field.typeLength;
|
|
691
|
+
metadata.schema.push(schemaElem);
|
|
426
692
|
}
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
693
|
+
} catch (err) {
|
|
694
|
+
_iterator2.e(err);
|
|
695
|
+
} finally {
|
|
696
|
+
_iterator2.f();
|
|
430
697
|
}
|
|
431
698
|
|
|
432
|
-
|
|
433
|
-
|
|
699
|
+
var metadataEncoded = Util.serializeThrift(metadata);
|
|
700
|
+
var footerEncoded = Buffer.alloc(metadataEncoded.length + 8);
|
|
434
701
|
metadataEncoded.copy(footerEncoded);
|
|
435
702
|
footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);
|
|
436
703
|
footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);
|