@loaders.gl/parquet 3.1.0-beta.3 → 3.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +6 -6
- package/dist/dist.min.js.map +2 -2
- package/dist/es5/bundle.js +1 -1
- package/dist/es5/bundle.js.map +1 -1
- package/dist/es5/constants.js +5 -5
- package/dist/es5/constants.js.map +1 -1
- package/dist/es5/index.js +19 -10
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/convert-schema.js +13 -13
- package/dist/es5/lib/convert-schema.js.map +1 -1
- package/dist/es5/lib/parse-parquet.js +154 -19
- package/dist/es5/lib/parse-parquet.js.map +1 -1
- package/dist/es5/lib/read-array-buffer.js +43 -6
- package/dist/es5/lib/read-array-buffer.js.map +1 -1
- package/dist/es5/parquet-loader.js +4 -4
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-writer.js +4 -4
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/codecs/dictionary.js +10 -2
- package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
- package/dist/es5/parquetjs/codecs/index.js +6 -4
- package/dist/es5/parquetjs/codecs/index.js.map +1 -1
- package/dist/es5/parquetjs/codecs/plain.js +43 -41
- package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
- package/dist/es5/parquetjs/codecs/rle.js +35 -25
- package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
- package/dist/es5/parquetjs/compression.js +110 -27
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/encoder/writer.js +737 -301
- package/dist/es5/parquetjs/encoder/writer.js.map +1 -1
- package/dist/es5/parquetjs/file.js +15 -15
- package/dist/es5/parquetjs/file.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +152 -141
- package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +160 -147
- package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +259 -248
- package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +79 -67
- package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +124 -113
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +169 -158
- package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DateType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Encoding.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +182 -170
- package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/IntType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/ListType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +343 -319
- package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MapType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/NullType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +75 -64
- package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +169 -158
- package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/PageType.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +124 -113
- package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +199 -188
- package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +94 -83
- package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js +135 -124
- package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/StringType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +101 -88
- package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +79 -68
- package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/Type.js +1 -1
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +45 -31
- package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
- package/dist/es5/parquetjs/parquet-thrift/index.js +43 -43
- package/dist/es5/parquetjs/parser/decoders.js +391 -218
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-cursor.js +180 -62
- package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +370 -125
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js +320 -91
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +11 -9
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/schema.js +87 -73
- package/dist/es5/parquetjs/schema/schema.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +96 -56
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js +40 -39
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js +1 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -1
- package/dist/es5/parquetjs/utils/file-utils.js +12 -8
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/es5/parquetjs/utils/read-utils.js +50 -22
- package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
- package/dist/esm/parquet-loader.js +1 -1
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquet-writer.js.map +1 -1
- package/package.json +5 -5
|
@@ -7,6 +7,10 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
7
7
|
});
|
|
8
8
|
exports.ParquetSchema = void 0;
|
|
9
9
|
|
|
10
|
+
var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
|
|
11
|
+
|
|
12
|
+
var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
|
|
13
|
+
|
|
10
14
|
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
|
|
11
15
|
|
|
12
16
|
var _codecs = require("../codecs");
|
|
@@ -17,8 +21,9 @@ var _shred = require("./shred");
|
|
|
17
21
|
|
|
18
22
|
var _types = require("./types");
|
|
19
23
|
|
|
20
|
-
|
|
21
|
-
|
|
24
|
+
var ParquetSchema = function () {
|
|
25
|
+
function ParquetSchema(schema) {
|
|
26
|
+
(0, _classCallCheck2.default)(this, ParquetSchema);
|
|
22
27
|
(0, _defineProperty2.default)(this, "schema", void 0);
|
|
23
28
|
(0, _defineProperty2.default)(this, "fields", void 0);
|
|
24
29
|
(0, _defineProperty2.default)(this, "fieldList", void 0);
|
|
@@ -27,66 +32,74 @@ class ParquetSchema {
|
|
|
27
32
|
this.fieldList = listFields(this.fields);
|
|
28
33
|
}
|
|
29
34
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
for (; path.length > 1; path.shift()) {
|
|
40
|
-
n = n[path[0]].fields;
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
return n[path[0]];
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
findFieldBranch(path) {
|
|
47
|
-
if (typeof path === 'string') {
|
|
48
|
-
path = path.split(',');
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
const branch = [];
|
|
52
|
-
let n = this.fields;
|
|
35
|
+
(0, _createClass2.default)(ParquetSchema, [{
|
|
36
|
+
key: "findField",
|
|
37
|
+
value: function findField(path) {
|
|
38
|
+
if (typeof path === 'string') {
|
|
39
|
+
path = path.split(',');
|
|
40
|
+
} else {
|
|
41
|
+
path = path.slice(0);
|
|
42
|
+
}
|
|
53
43
|
|
|
54
|
-
|
|
55
|
-
branch.push(n[path[0]]);
|
|
44
|
+
var n = this.fields;
|
|
56
45
|
|
|
57
|
-
|
|
46
|
+
for (; path.length > 1; path.shift()) {
|
|
58
47
|
n = n[path[0]].fields;
|
|
59
48
|
}
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
return branch;
|
|
63
|
-
}
|
|
64
49
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
}
|
|
50
|
+
return n[path[0]];
|
|
51
|
+
}
|
|
52
|
+
}, {
|
|
53
|
+
key: "findFieldBranch",
|
|
54
|
+
value: function findFieldBranch(path) {
|
|
55
|
+
if (typeof path === 'string') {
|
|
56
|
+
path = path.split(',');
|
|
57
|
+
}
|
|
68
58
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
}
|
|
59
|
+
var branch = [];
|
|
60
|
+
var n = this.fields;
|
|
72
61
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
setCompress(this.fields, type);
|
|
76
|
-
return this;
|
|
77
|
-
}
|
|
62
|
+
for (; path.length > 0; path.shift()) {
|
|
63
|
+
branch.push(n[path[0]]);
|
|
78
64
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
65
|
+
if (path.length > 1) {
|
|
66
|
+
n = n[path[0]].fields;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
82
69
|
|
|
83
|
-
|
|
70
|
+
return branch;
|
|
71
|
+
}
|
|
72
|
+
}, {
|
|
73
|
+
key: "shredRecord",
|
|
74
|
+
value: function shredRecord(record, buffer) {
|
|
75
|
+
(0, _shred.shredRecord)(this, record, buffer);
|
|
76
|
+
}
|
|
77
|
+
}, {
|
|
78
|
+
key: "materializeRecords",
|
|
79
|
+
value: function materializeRecords(buffer) {
|
|
80
|
+
return (0, _shred.materializeRecords)(this, buffer);
|
|
81
|
+
}
|
|
82
|
+
}, {
|
|
83
|
+
key: "compress",
|
|
84
|
+
value: function compress(type) {
|
|
85
|
+
setCompress(this.schema, type);
|
|
86
|
+
setCompress(this.fields, type);
|
|
87
|
+
return this;
|
|
88
|
+
}
|
|
89
|
+
}, {
|
|
90
|
+
key: "buffer",
|
|
91
|
+
value: function buffer() {
|
|
92
|
+
return (0, _shred.shredBuffer)(this);
|
|
93
|
+
}
|
|
94
|
+
}]);
|
|
95
|
+
return ParquetSchema;
|
|
96
|
+
}();
|
|
84
97
|
|
|
85
98
|
exports.ParquetSchema = ParquetSchema;
|
|
86
99
|
|
|
87
100
|
function setCompress(schema, type) {
|
|
88
|
-
for (
|
|
89
|
-
|
|
101
|
+
for (var name in schema) {
|
|
102
|
+
var node = schema[name];
|
|
90
103
|
|
|
91
104
|
if (node.fields) {
|
|
92
105
|
setCompress(node.fields, type);
|
|
@@ -97,15 +110,15 @@ function setCompress(schema, type) {
|
|
|
97
110
|
}
|
|
98
111
|
|
|
99
112
|
function buildFields(schema, rLevelParentMax, dLevelParentMax, path) {
|
|
100
|
-
|
|
113
|
+
var fieldList = {};
|
|
101
114
|
|
|
102
|
-
for (
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
115
|
+
for (var name in schema) {
|
|
116
|
+
var opts = schema[name];
|
|
117
|
+
var required = !opts.optional;
|
|
118
|
+
var repeated = Boolean(opts.repeated);
|
|
119
|
+
var rLevelMax = rLevelParentMax;
|
|
120
|
+
var dLevelMax = dLevelParentMax;
|
|
121
|
+
var repetitionType = 'REQUIRED';
|
|
109
122
|
|
|
110
123
|
if (!required) {
|
|
111
124
|
repetitionType = 'OPTIONAL';
|
|
@@ -119,22 +132,23 @@ function buildFields(schema, rLevelParentMax, dLevelParentMax, path) {
|
|
|
119
132
|
}
|
|
120
133
|
|
|
121
134
|
if (opts.fields) {
|
|
122
|
-
|
|
135
|
+
var _cpath = path.concat([name]);
|
|
136
|
+
|
|
123
137
|
fieldList[name] = {
|
|
124
|
-
name,
|
|
125
|
-
path:
|
|
126
|
-
key:
|
|
127
|
-
repetitionType,
|
|
128
|
-
rLevelMax,
|
|
129
|
-
dLevelMax,
|
|
138
|
+
name: name,
|
|
139
|
+
path: _cpath,
|
|
140
|
+
key: _cpath.join(),
|
|
141
|
+
repetitionType: repetitionType,
|
|
142
|
+
rLevelMax: rLevelMax,
|
|
143
|
+
dLevelMax: dLevelMax,
|
|
130
144
|
isNested: true,
|
|
131
145
|
fieldCount: Object.keys(opts.fields).length,
|
|
132
|
-
fields: buildFields(opts.fields, rLevelMax, dLevelMax,
|
|
146
|
+
fields: buildFields(opts.fields, rLevelMax, dLevelMax, _cpath)
|
|
133
147
|
};
|
|
134
148
|
continue;
|
|
135
149
|
}
|
|
136
150
|
|
|
137
|
-
|
|
151
|
+
var typeDef = _types.PARQUET_LOGICAL_TYPES[opts.type];
|
|
138
152
|
|
|
139
153
|
if (!typeDef) {
|
|
140
154
|
throw new Error("invalid parquet type: ".concat(opts.type));
|
|
@@ -152,21 +166,21 @@ function buildFields(schema, rLevelParentMax, dLevelParentMax, path) {
|
|
|
152
166
|
throw new Error("unsupported compression method: ".concat(opts.compression));
|
|
153
167
|
}
|
|
154
168
|
|
|
155
|
-
|
|
169
|
+
var cpath = path.concat([name]);
|
|
156
170
|
fieldList[name] = {
|
|
157
|
-
name,
|
|
171
|
+
name: name,
|
|
158
172
|
primitiveType: typeDef.primitiveType,
|
|
159
173
|
originalType: typeDef.originalType,
|
|
160
174
|
path: cpath,
|
|
161
175
|
key: cpath.join(),
|
|
162
|
-
repetitionType,
|
|
176
|
+
repetitionType: repetitionType,
|
|
163
177
|
encoding: opts.encoding,
|
|
164
178
|
compression: opts.compression,
|
|
165
179
|
typeLength: opts.typeLength || typeDef.typeLength,
|
|
166
180
|
presision: opts.presision,
|
|
167
181
|
scale: opts.scale,
|
|
168
|
-
rLevelMax,
|
|
169
|
-
dLevelMax
|
|
182
|
+
rLevelMax: rLevelMax,
|
|
183
|
+
dLevelMax: dLevelMax
|
|
170
184
|
};
|
|
171
185
|
}
|
|
172
186
|
|
|
@@ -174,9 +188,9 @@ function buildFields(schema, rLevelParentMax, dLevelParentMax, path) {
|
|
|
174
188
|
}
|
|
175
189
|
|
|
176
190
|
function listFields(fields) {
|
|
177
|
-
|
|
191
|
+
var list = [];
|
|
178
192
|
|
|
179
|
-
for (
|
|
193
|
+
for (var k in fields) {
|
|
180
194
|
list.push(fields[k]);
|
|
181
195
|
|
|
182
196
|
if (fields[k].isNested) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../src/parquetjs/schema/schema.ts"],"names":["ParquetSchema","constructor","schema","fields","buildFields","fieldList","listFields","findField","path","split","slice","n","length","shift","findFieldBranch","branch","push","shredRecord","record","buffer","materializeRecords","compress","type","setCompress","name","node","compression","rLevelParentMax","dLevelParentMax","opts","required","optional","repeated","Boolean","rLevelMax","dLevelMax","repetitionType","cpath","concat","key","join","isNested","fieldCount","Object","keys","typeDef","PARQUET_LOGICAL_TYPES","Error","encoding","PARQUET_CODECS","PARQUET_COMPRESSION_METHODS","primitiveType","originalType","typeLength","presision","scale","list","k"],"mappings":";;;;;;;;;;;AAEA;;AACA;;AAUA;;AACA;;AAKO,MAAMA,aAAN,CAAoB;AAQzBC,EAAAA,WAAW,CAACC,MAAD,EAA2B;AAAA;AAAA;AAAA;AACpC,SAAKA,MAAL,GAAcA,MAAd;AACA,SAAKC,MAAL,GAAcC,WAAW,CAACF,MAAD,EAAS,CAAT,EAAY,CAAZ,EAAe,EAAf,CAAzB;AACA,SAAKG,SAAL,GAAiBC,UAAU,CAAC,KAAKH,MAAN,CAA3B;AACD;;AAKDI,EAAAA,SAAS,CAACC,IAAD,EAAwC;AAC/C,QAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAE5BA,MAAAA,IAAI,GAAGA,IAAI,CAACC,KAAL,CAAW,GAAX,CAAP;AACD,KAHD,MAGO;AAELD,MAAAA,IAAI,GAAGA,IAAI,CAACE,KAAL,CAAW,CAAX,CAAP;AACD;;AAED,QAAIC,CAAC,GAAG,KAAKR,MAAb;;AACA,WAAOK,IAAI,CAACI,MAAL,GAAc,CAArB,EAAwBJ,IAAI,CAACK,KAAL,EAAxB,EAAsC;AACpCF,MAAAA,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAD,CAAWL,MAAf;AACD;;AAED,WAAOQ,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAR;AACD;;AAKDM,EAAAA,eAAe,CAACN,IAAD,EAA0C;AACvD,QAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAE5BA,MAAAA,IAAI,GAAGA,IAAI,CAACC,KAAL,CAAW,GAAX,CAAP;AACD;;AACD,UAAMM,MAAsB,GAAG,EAA/B;AACA,QAAIJ,CAAC,GAAG,KAAKR,MAAb;;AACA,WAAOK,IAAI,CAACI,MAAL,GAAc,CAArB,EAAwBJ,IAAI,CAACK,KAAL,EAAxB,EAAsC;AACpCE,MAAAA,MAAM,CAACC,IAAP,CAAYL,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAb;;AACA,UAAIA,IAAI,CAACI,MAAL,GAAc,CAAlB,EAAqB;AACnBD,QAAAA,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAD,CAAWL,MAAf;AACD;AACF;;AACD,WAAOY,MAAP;AACD;;AAEDE,EAAAA,WAAW,CAACC,MAAD,EAAwBC,MAAxB,EAAqD;AAC9D,4BAAY,IAAZ,EAAkBD,MAAlB,EAA0BC,MAA1B;AACD;;AAEDC,EAAAA,kBAAkB,CAACD,MAAD,EAAyC;AACzD,WAAO,+BAAmB,IAAnB,EAAyBA,MAAzB,CAAP;AACD;;AAEDE,EAAAA,QAAQ,CAACC,IAAD,EAAiC;AACvCC,IAAAA,WAAW,CAAC,KAAKrB,MAAN,EAAcoB,IAAd,CAAX;AACAC,IAAAA,WAAW,CAAC,KAAKpB,MAAN,EAAcmB,IAAd,CAAX;AACA,WAAO,IAAP;AACD;;AAEDH,EAAAA,MAAM,GAAkB;AACtB,WAAO,wBAAY,IAAZ,CAAP;AACD;;AArEwB;;;;AAwE3B,SAASI,WAAT,CAAqBrB,MAArB,EAAkCoB,IAAlC,EAA4D;AAC1D,OAAK,MAAME,IAAX,IAAmBtB,MAAnB,EAA2B;AACzB,UAAMuB,IAAI,GAAGvB,MAAM,CAACsB,IAAD,CAAnB;;AACA,QAAIC,IAAI,CAACtB,MAAT,EAAiB;AACfoB,MAAAA,WAAW,CAACE,IAAI,CAACtB,MAAN,EAAcmB,IAAd,CAAX;AACD,KAFD,MAEO;AACLG,MAAAA,IAAI,CAACC,WAAL,GAAmBJ,IAAnB;AACD;AACF;AACF;;AAGD,SAASlB,WAAT,CACEF,MADF,EAEEyB,eAFF,EAGEC,eAHF,EAIEpB,IAJF,EAKgC;AAC9B,QAAMH,SAAuC,GAAG,EAAhD;;AAEA,OAAK,MAAMmB,IAAX,IAAmBtB,MAAnB,EAA2B;AACzB,UAAM2B,IAAI,GAAG3B,MAAM,CAACsB,IAAD,CAAnB;AAGA,UAAMM,QAAQ,GAAG,CAACD,IAAI,CAACE,QAAvB;AACA,UAAMC,QAAQ,GAAGC,OAAO,CAACJ,IAAI,CAACG,QAAN,CAAxB;AACA,QAAIE,SAAS,GAAGP,eAAhB;AACA,QAAIQ,SAAS,GAAGP,eAAhB;AAEA,QAAIQ,cAA8B,GAAG,UAArC;;AACA,QAAI,CAACN,QAAL,EAAe;AACbM,MAAAA,cAAc,GAAG,UAAjB;AACAD,MAAAA,SAAS;AACV;;AACD,QAAIH,QAAJ,EAAc;AACZI,MAAAA,cAAc,GAAG,UAAjB;AACAF,MAAAA,SAAS;AACT,UAAIJ,QAAJ,EAAcK,SAAS;AACxB;;AAGD,QAAIN,IAAI,CAAC1B,MAAT,EAAiB;AACf,YAAMkC,KAAK,GAAG7B,IAAI,CAAC8B,MAAL,CAAY,CAACd,IAAD,CAAZ,CAAd;AACAnB,MAAAA,SAAS,CAACmB,IAAD,CAAT,GAAkB;AAChBA,QAAAA,IADgB;AAEhBhB,QAAAA,IAAI,EAAE6B,KAFU;AAGhBE,QAAAA,GAAG,EAAEF,KAAK,CAACG,IAAN,EAHW;AAIhBJ,QAAAA,cAJgB;AAKhBF,QAAAA,SALgB;AAMhBC,QAAAA,SANgB;AAOhBM,QAAAA,QAAQ,EAAE,IAPM;AAQhBC,QAAAA,UAAU,EAAEC,MAAM,CAACC,IAAP,CAAYf,IAAI,CAAC1B,MAAjB,EAAyBS,MARrB;AAShBT,QAAAA,MAAM,EAAEC,WAAW,CAACyB,IAAI,CAAC1B,MAAN,EAAc+B,SAAd,EAAyBC,SAAzB,EAAoCE,KAApC;AATH,OAAlB;AAWA;AACD;;AAED,UAAMQ,OAAY,GAAGC,6BAAsBjB,IAAI,CAACP,IAA3B,CAArB;;AACA,QAAI,CAACuB,OAAL,EAAc;AACZ,YAAM,IAAIE,KAAJ,iCAAmClB,IAAI,CAACP,IAAxC,EAAN;AACD;;AAEDO,IAAAA,IAAI,CAACmB,QAAL,GAAgBnB,IAAI,CAACmB,QAAL,IAAiB,OAAjC;;AACA,QAAI,EAAEnB,IAAI,CAACmB,QAAL,IAAiBC,sBAAnB,CAAJ,EAAwC;AACtC,YAAM,IAAIF,KAAJ,yCAA2ClB,IAAI,CAACmB,QAAhD,EAAN;AACD;;AAEDnB,IAAAA,IAAI,CAACH,WAAL,GAAmBG,IAAI,CAACH,WAAL,IAAoB,cAAvC;;AACA,QAAI,EAAEG,IAAI,CAACH,WAAL,IAAoBwB,wCAAtB,CAAJ,EAAwD;AACtD,YAAM,IAAIH,KAAJ,2CAA6ClB,IAAI,CAACH,WAAlD,EAAN;AACD;;AAGD,UAAMW,KAAK,GAAG7B,IAAI,CAAC8B,MAAL,CAAY,CAACd,IAAD,CAAZ,CAAd;AACAnB,IAAAA,SAAS,CAACmB,IAAD,CAAT,GAAkB;AAChBA,MAAAA,IADgB;AAEhB2B,MAAAA,aAAa,EAAEN,OAAO,CAACM,aAFP;AAGhBC,MAAAA,YAAY,EAAEP,OAAO,CAACO,YAHN;AAIhB5C,MAAAA,IAAI,EAAE6B,KAJU;AAKhBE,MAAAA,GAAG,EAAEF,KAAK,CAACG,IAAN,EALW;AAMhBJ,MAAAA,cANgB;AAOhBY,MAAAA,QAAQ,EAAEnB,IAAI,CAACmB,QAPC;AAQhBtB,MAAAA,WAAW,EAAEG,IAAI,CAACH,WARF;AAShB2B,MAAAA,UAAU,EAAExB,IAAI,CAACwB,UAAL,IAAmBR,OAAO,CAACQ,UATvB;AAUhBC,MAAAA,SAAS,EAAEzB,IAAI,CAACyB,SAVA;AAWhBC,MAAAA,KAAK,EAAE1B,IAAI,CAAC0B,KAXI;AAYhBrB,MAAAA,SAZgB;AAahBC,MAAAA;AAbgB,KAAlB;AAeD;;AACD,SAAO9B,SAAP;AACD;;AAED,SAASC,UAAT,CAAoBH,MAApB,EAA0E;AACxE,MAAIqD,IAAoB,GAAG,EAA3B;;AACA,OAAK,MAAMC,CAAX,IAAgBtD,MAAhB,EAAwB;AACtBqD,IAAAA,IAAI,CAACxC,IAAL,CAAUb,MAAM,CAACsD,CAAD,CAAhB;;AACA,QAAItD,MAAM,CAACsD,CAAD,CAAN,CAAUhB,QAAd,EAAwB;AACtBe,MAAAA,IAAI,GAAGA,IAAI,CAAClB,MAAL,CAAYhC,UAAU,CAACH,MAAM,CAACsD,CAAD,CAAN,CAAUtD,MAAX,CAAtB,CAAP;AACD;AACF;;AACD,SAAOqD,IAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {PARQUET_CODECS} from '../codecs';\nimport {PARQUET_COMPRESSION_METHODS} from '../compression';\nimport {\n FieldDefinition,\n ParquetBuffer,\n ParquetCompression,\n ParquetField,\n ParquetRecord,\n RepetitionType,\n SchemaDefinition\n} from './declare';\nimport {materializeRecords, shredBuffer, shredRecord} from './shred';\nimport {PARQUET_LOGICAL_TYPES} from './types';\n\n/**\n * A parquet file schema\n */\nexport class ParquetSchema {\n public schema: Record<string, FieldDefinition>;\n public fields: Record<string, ParquetField>;\n public fieldList: ParquetField[];\n\n /**\n * Create a new schema from a JSON schema definition\n */\n constructor(schema: SchemaDefinition) {\n this.schema = schema;\n this.fields = buildFields(schema, 0, 0, []);\n this.fieldList = listFields(this.fields);\n }\n\n /**\n * Retrieve a field definition\n */\n findField(path: string | string[]): ParquetField {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n } else {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.slice(0); // clone array\n }\n\n let n = this.fields;\n for (; path.length > 1; path.shift()) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n\n return n[path[0]];\n }\n\n /**\n * Retrieve a field definition and all the field's ancestors\n */\n findFieldBranch(path: string | string[]): ParquetField[] {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n }\n const branch: ParquetField[] = [];\n let n = this.fields;\n for (; path.length > 0; path.shift()) {\n branch.push(n[path[0]]);\n if (path.length > 1) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n }\n return branch;\n }\n\n shredRecord(record: ParquetRecord, buffer: ParquetBuffer): void {\n shredRecord(this, record, buffer);\n }\n\n materializeRecords(buffer: ParquetBuffer): ParquetRecord[] {\n return materializeRecords(this, buffer);\n }\n\n compress(type: ParquetCompression): this {\n setCompress(this.schema, type);\n setCompress(this.fields, type);\n return this;\n }\n\n buffer(): ParquetBuffer {\n return shredBuffer(this);\n }\n}\n\nfunction setCompress(schema: any, type: ParquetCompression) {\n for (const name in schema) {\n const node = schema[name];\n if (node.fields) {\n setCompress(node.fields, type);\n } else {\n node.compression = type;\n }\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction buildFields(\n schema: SchemaDefinition,\n rLevelParentMax: number,\n dLevelParentMax: number,\n path: string[]\n): Record<string, ParquetField> {\n const fieldList: Record<string, ParquetField> = {};\n\n for (const name in schema) {\n const opts = schema[name];\n\n /* field repetition type */\n const required = !opts.optional;\n const repeated = Boolean(opts.repeated);\n let rLevelMax = rLevelParentMax;\n let dLevelMax = dLevelParentMax;\n\n let repetitionType: RepetitionType = 'REQUIRED';\n if (!required) {\n repetitionType = 'OPTIONAL';\n dLevelMax++;\n }\n if (repeated) {\n repetitionType = 'REPEATED';\n rLevelMax++;\n if (required) dLevelMax++;\n }\n\n /* nested field */\n if (opts.fields) {\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n rLevelMax,\n dLevelMax,\n isNested: true,\n fieldCount: Object.keys(opts.fields).length,\n fields: buildFields(opts.fields, rLevelMax, dLevelMax, cpath)\n };\n continue; // eslint-disable-line no-continue\n }\n\n const typeDef: any = PARQUET_LOGICAL_TYPES[opts.type!];\n if (!typeDef) {\n throw new Error(`invalid parquet type: ${opts.type}`);\n }\n\n opts.encoding = opts.encoding || 'PLAIN';\n if (!(opts.encoding in PARQUET_CODECS)) {\n throw new Error(`unsupported parquet encoding: ${opts.encoding}`);\n }\n\n opts.compression = opts.compression || 'UNCOMPRESSED';\n if (!(opts.compression in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`unsupported compression method: ${opts.compression}`);\n }\n\n /* add to schema */\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n primitiveType: typeDef.primitiveType,\n originalType: typeDef.originalType,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n encoding: opts.encoding,\n compression: opts.compression,\n typeLength: opts.typeLength || typeDef.typeLength,\n presision: opts.presision,\n scale: opts.scale,\n rLevelMax,\n dLevelMax\n };\n }\n return fieldList;\n}\n\nfunction listFields(fields: Record<string, ParquetField>): ParquetField[] {\n let list: ParquetField[] = [];\n for (const k in fields) {\n list.push(fields[k]);\n if (fields[k].isNested) {\n list = list.concat(listFields(fields[k].fields!));\n }\n }\n return list;\n}\n"],"file":"schema.js"}
|
|
1
|
+
{"version":3,"sources":["../../../../src/parquetjs/schema/schema.ts"],"names":["ParquetSchema","schema","fields","buildFields","fieldList","listFields","path","split","slice","n","length","shift","branch","push","record","buffer","type","setCompress","name","node","compression","rLevelParentMax","dLevelParentMax","opts","required","optional","repeated","Boolean","rLevelMax","dLevelMax","repetitionType","cpath","concat","key","join","isNested","fieldCount","Object","keys","typeDef","PARQUET_LOGICAL_TYPES","Error","encoding","PARQUET_CODECS","PARQUET_COMPRESSION_METHODS","primitiveType","originalType","typeLength","presision","scale","list","k"],"mappings":";;;;;;;;;;;;;;;AAEA;;AACA;;AAUA;;AACA;;IAKaA,a;AAQX,yBAAYC,MAAZ,EAAsC;AAAA;AAAA;AAAA;AAAA;AACpC,SAAKA,MAAL,GAAcA,MAAd;AACA,SAAKC,MAAL,GAAcC,WAAW,CAACF,MAAD,EAAS,CAAT,EAAY,CAAZ,EAAe,EAAf,CAAzB;AACA,SAAKG,SAAL,GAAiBC,UAAU,CAAC,KAAKH,MAAN,CAA3B;AACD;;;;WAKD,mBAAUI,IAAV,EAAiD;AAC/C,UAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAE5BA,QAAAA,IAAI,GAAGA,IAAI,CAACC,KAAL,CAAW,GAAX,CAAP;AACD,OAHD,MAGO;AAELD,QAAAA,IAAI,GAAGA,IAAI,CAACE,KAAL,CAAW,CAAX,CAAP;AACD;;AAED,UAAIC,CAAC,GAAG,KAAKP,MAAb;;AACA,aAAOI,IAAI,CAACI,MAAL,GAAc,CAArB,EAAwBJ,IAAI,CAACK,KAAL,EAAxB,EAAsC;AACpCF,QAAAA,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAD,CAAWJ,MAAf;AACD;;AAED,aAAOO,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAR;AACD;;;WAKD,yBAAgBA,IAAhB,EAAyD;AACvD,UAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAE5BA,QAAAA,IAAI,GAAGA,IAAI,CAACC,KAAL,CAAW,GAAX,CAAP;AACD;;AACD,UAAMK,MAAsB,GAAG,EAA/B;AACA,UAAIH,CAAC,GAAG,KAAKP,MAAb;;AACA,aAAOI,IAAI,CAACI,MAAL,GAAc,CAArB,EAAwBJ,IAAI,CAACK,KAAL,EAAxB,EAAsC;AACpCC,QAAAA,MAAM,CAACC,IAAP,CAAYJ,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAb;;AACA,YAAIA,IAAI,CAACI,MAAL,GAAc,CAAlB,EAAqB;AACnBD,UAAAA,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAD,CAAWJ,MAAf;AACD;AACF;;AACD,aAAOU,MAAP;AACD;;;WAED,qBAAYE,MAAZ,EAAmCC,MAAnC,EAAgE;AAC9D,8BAAY,IAAZ,EAAkBD,MAAlB,EAA0BC,MAA1B;AACD;;;WAED,4BAAmBA,MAAnB,EAA2D;AACzD,aAAO,+BAAmB,IAAnB,EAAyBA,MAAzB,CAAP;AACD;;;WAED,kBAASC,IAAT,EAAyC;AACvCC,MAAAA,WAAW,CAAC,KAAKhB,MAAN,EAAce,IAAd,CAAX;AACAC,MAAAA,WAAW,CAAC,KAAKf,MAAN,EAAcc,IAAd,CAAX;AACA,aAAO,IAAP;AACD;;;WAED,kBAAwB;AACtB,aAAO,wBAAY,IAAZ,CAAP;AACD;;;;;;;AAGH,SAASC,WAAT,CAAqBhB,MAArB,EAAkCe,IAAlC,EAA4D;AAC1D,OAAK,IAAME,IAAX,IAAmBjB,MAAnB,EAA2B;AACzB,QAAMkB,IAAI,GAAGlB,MAAM,CAACiB,IAAD,CAAnB;;AACA,QAAIC,IAAI,CAACjB,MAAT,EAAiB;AACfe,MAAAA,WAAW,CAACE,IAAI,CAACjB,MAAN,EAAcc,IAAd,CAAX;AACD,KAFD,MAEO;AACLG,MAAAA,IAAI,CAACC,WAAL,GAAmBJ,IAAnB;AACD;AACF;AACF;;AAGD,SAASb,WAAT,CACEF,MADF,EAEEoB,eAFF,EAGEC,eAHF,EAIEhB,IAJF,EAKgC;AAC9B,MAAMF,SAAuC,GAAG,EAAhD;;AAEA,OAAK,IAAMc,IAAX,IAAmBjB,MAAnB,EAA2B;AACzB,QAAMsB,IAAI,GAAGtB,MAAM,CAACiB,IAAD,CAAnB;AAGA,QAAMM,QAAQ,GAAG,CAACD,IAAI,CAACE,QAAvB;AACA,QAAMC,QAAQ,GAAGC,OAAO,CAACJ,IAAI,CAACG,QAAN,CAAxB;AACA,QAAIE,SAAS,GAAGP,eAAhB;AACA,QAAIQ,SAAS,GAAGP,eAAhB;AAEA,QAAIQ,cAA8B,GAAG,UAArC;;AACA,QAAI,CAACN,QAAL,EAAe;AACbM,MAAAA,cAAc,GAAG,UAAjB;AACAD,MAAAA,SAAS;AACV;;AACD,QAAIH,QAAJ,EAAc;AACZI,MAAAA,cAAc,GAAG,UAAjB;AACAF,MAAAA,SAAS;AACT,UAAIJ,QAAJ,EAAcK,SAAS;AACxB;;AAGD,QAAIN,IAAI,CAACrB,MAAT,EAAiB;AACf,UAAM6B,MAAK,GAAGzB,IAAI,CAAC0B,MAAL,CAAY,CAACd,IAAD,CAAZ,CAAd;;AACAd,MAAAA,SAAS,CAACc,IAAD,CAAT,GAAkB;AAChBA,QAAAA,IAAI,EAAJA,IADgB;AAEhBZ,QAAAA,IAAI,EAAEyB,MAFU;AAGhBE,QAAAA,GAAG,EAAEF,MAAK,CAACG,IAAN,EAHW;AAIhBJ,QAAAA,cAAc,EAAdA,cAJgB;AAKhBF,QAAAA,SAAS,EAATA,SALgB;AAMhBC,QAAAA,SAAS,EAATA,SANgB;AAOhBM,QAAAA,QAAQ,EAAE,IAPM;AAQhBC,QAAAA,UAAU,EAAEC,MAAM,CAACC,IAAP,CAAYf,IAAI,CAACrB,MAAjB,EAAyBQ,MARrB;AAShBR,QAAAA,MAAM,EAAEC,WAAW,CAACoB,IAAI,CAACrB,MAAN,EAAc0B,SAAd,EAAyBC,SAAzB,EAAoCE,MAApC;AATH,OAAlB;AAWA;AACD;;AAED,QAAMQ,OAAY,GAAGC,6BAAsBjB,IAAI,CAACP,IAA3B,CAArB;;AACA,QAAI,CAACuB,OAAL,EAAc;AACZ,YAAM,IAAIE,KAAJ,iCAAmClB,IAAI,CAACP,IAAxC,EAAN;AACD;;AAEDO,IAAAA,IAAI,CAACmB,QAAL,GAAgBnB,IAAI,CAACmB,QAAL,IAAiB,OAAjC;;AACA,QAAI,EAAEnB,IAAI,CAACmB,QAAL,IAAiBC,sBAAnB,CAAJ,EAAwC;AACtC,YAAM,IAAIF,KAAJ,yCAA2ClB,IAAI,CAACmB,QAAhD,EAAN;AACD;;AAEDnB,IAAAA,IAAI,CAACH,WAAL,GAAmBG,IAAI,CAACH,WAAL,IAAoB,cAAvC;;AACA,QAAI,EAAEG,IAAI,CAACH,WAAL,IAAoBwB,wCAAtB,CAAJ,EAAwD;AACtD,YAAM,IAAIH,KAAJ,2CAA6ClB,IAAI,CAACH,WAAlD,EAAN;AACD;;AAGD,QAAMW,KAAK,GAAGzB,IAAI,CAAC0B,MAAL,CAAY,CAACd,IAAD,CAAZ,CAAd;AACAd,IAAAA,SAAS,CAACc,IAAD,CAAT,GAAkB;AAChBA,MAAAA,IAAI,EAAJA,IADgB;AAEhB2B,MAAAA,aAAa,EAAEN,OAAO,CAACM,aAFP;AAGhBC,MAAAA,YAAY,EAAEP,OAAO,CAACO,YAHN;AAIhBxC,MAAAA,IAAI,EAAEyB,KAJU;AAKhBE,MAAAA,GAAG,EAAEF,KAAK,CAACG,IAAN,EALW;AAMhBJ,MAAAA,cAAc,EAAdA,cANgB;AAOhBY,MAAAA,QAAQ,EAAEnB,IAAI,CAACmB,QAPC;AAQhBtB,MAAAA,WAAW,EAAEG,IAAI,CAACH,WARF;AAShB2B,MAAAA,UAAU,EAAExB,IAAI,CAACwB,UAAL,IAAmBR,OAAO,CAACQ,UATvB;AAUhBC,MAAAA,SAAS,EAAEzB,IAAI,CAACyB,SAVA;AAWhBC,MAAAA,KAAK,EAAE1B,IAAI,CAAC0B,KAXI;AAYhBrB,MAAAA,SAAS,EAATA,SAZgB;AAahBC,MAAAA,SAAS,EAATA;AAbgB,KAAlB;AAeD;;AACD,SAAOzB,SAAP;AACD;;AAED,SAASC,UAAT,CAAoBH,MAApB,EAA0E;AACxE,MAAIgD,IAAoB,GAAG,EAA3B;;AACA,OAAK,IAAMC,CAAX,IAAgBjD,MAAhB,EAAwB;AACtBgD,IAAAA,IAAI,CAACrC,IAAL,CAAUX,MAAM,CAACiD,CAAD,CAAhB;;AACA,QAAIjD,MAAM,CAACiD,CAAD,CAAN,CAAUhB,QAAd,EAAwB;AACtBe,MAAAA,IAAI,GAAGA,IAAI,CAAClB,MAAL,CAAY3B,UAAU,CAACH,MAAM,CAACiD,CAAD,CAAN,CAAUjD,MAAX,CAAtB,CAAP;AACD;AACF;;AACD,SAAOgD,IAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {PARQUET_CODECS} from '../codecs';\nimport {PARQUET_COMPRESSION_METHODS} from '../compression';\nimport {\n FieldDefinition,\n ParquetBuffer,\n ParquetCompression,\n ParquetField,\n ParquetRecord,\n RepetitionType,\n SchemaDefinition\n} from './declare';\nimport {materializeRecords, shredBuffer, shredRecord} from './shred';\nimport {PARQUET_LOGICAL_TYPES} from './types';\n\n/**\n * A parquet file schema\n */\nexport class ParquetSchema {\n public schema: Record<string, FieldDefinition>;\n public fields: Record<string, ParquetField>;\n public fieldList: ParquetField[];\n\n /**\n * Create a new schema from a JSON schema definition\n */\n constructor(schema: SchemaDefinition) {\n this.schema = schema;\n this.fields = buildFields(schema, 0, 0, []);\n this.fieldList = listFields(this.fields);\n }\n\n /**\n * Retrieve a field definition\n */\n findField(path: string | string[]): ParquetField {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n } else {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.slice(0); // clone array\n }\n\n let n = this.fields;\n for (; path.length > 1; path.shift()) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n\n return n[path[0]];\n }\n\n /**\n * Retrieve a field definition and all the field's ancestors\n */\n findFieldBranch(path: string | string[]): ParquetField[] {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n }\n const branch: ParquetField[] = [];\n let n = this.fields;\n for (; path.length > 0; path.shift()) {\n branch.push(n[path[0]]);\n if (path.length > 1) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n }\n return branch;\n }\n\n shredRecord(record: ParquetRecord, buffer: ParquetBuffer): void {\n shredRecord(this, record, buffer);\n }\n\n materializeRecords(buffer: ParquetBuffer): ParquetRecord[] {\n return materializeRecords(this, buffer);\n }\n\n compress(type: ParquetCompression): this {\n setCompress(this.schema, type);\n setCompress(this.fields, type);\n return this;\n }\n\n buffer(): ParquetBuffer {\n return shredBuffer(this);\n }\n}\n\nfunction setCompress(schema: any, type: ParquetCompression) {\n for (const name in schema) {\n const node = schema[name];\n if (node.fields) {\n setCompress(node.fields, type);\n } else {\n node.compression = type;\n }\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction buildFields(\n schema: SchemaDefinition,\n rLevelParentMax: number,\n dLevelParentMax: number,\n path: string[]\n): Record<string, ParquetField> {\n const fieldList: Record<string, ParquetField> = {};\n\n for (const name in schema) {\n const opts = schema[name];\n\n /* field repetition type */\n const required = !opts.optional;\n const repeated = Boolean(opts.repeated);\n let rLevelMax = rLevelParentMax;\n let dLevelMax = dLevelParentMax;\n\n let repetitionType: RepetitionType = 'REQUIRED';\n if (!required) {\n repetitionType = 'OPTIONAL';\n dLevelMax++;\n }\n if (repeated) {\n repetitionType = 'REPEATED';\n rLevelMax++;\n if (required) dLevelMax++;\n }\n\n /* nested field */\n if (opts.fields) {\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n rLevelMax,\n dLevelMax,\n isNested: true,\n fieldCount: Object.keys(opts.fields).length,\n fields: buildFields(opts.fields, rLevelMax, dLevelMax, cpath)\n };\n continue; // eslint-disable-line no-continue\n }\n\n const typeDef: any = PARQUET_LOGICAL_TYPES[opts.type!];\n if (!typeDef) {\n throw new Error(`invalid parquet type: ${opts.type}`);\n }\n\n opts.encoding = opts.encoding || 'PLAIN';\n if (!(opts.encoding in PARQUET_CODECS)) {\n throw new Error(`unsupported parquet encoding: ${opts.encoding}`);\n }\n\n opts.compression = opts.compression || 'UNCOMPRESSED';\n if (!(opts.compression in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`unsupported compression method: ${opts.compression}`);\n }\n\n /* add to schema */\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n primitiveType: typeDef.primitiveType,\n originalType: typeDef.originalType,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n encoding: opts.encoding,\n compression: opts.compression,\n typeLength: opts.typeLength || typeDef.typeLength,\n presision: opts.presision,\n scale: opts.scale,\n rLevelMax,\n dLevelMax\n };\n }\n return fieldList;\n}\n\nfunction listFields(fields: Record<string, ParquetField>): ParquetField[] {\n let list: ParquetField[] = [];\n for (const k in fields) {\n list.push(fields[k]);\n if (fields[k].isNested) {\n list = list.concat(listFields(fields[k].fields!));\n }\n }\n return list;\n}\n"],"file":"schema.js"}
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
|
|
3
|
+
var _typeof = require("@babel/runtime/helpers/typeof");
|
|
4
|
+
|
|
3
5
|
Object.defineProperty(exports, "__esModule", {
|
|
4
6
|
value: true
|
|
5
7
|
});
|
|
@@ -8,7 +10,7 @@ exports.shredRecord = shredRecord;
|
|
|
8
10
|
exports.materializeRecords = materializeRecords;
|
|
9
11
|
Object.defineProperty(exports, "ParquetBuffer", {
|
|
10
12
|
enumerable: true,
|
|
11
|
-
get: function () {
|
|
13
|
+
get: function get() {
|
|
12
14
|
return _declare.ParquetBuffer;
|
|
13
15
|
}
|
|
14
16
|
});
|
|
@@ -17,31 +19,47 @@ var _declare = require("./declare");
|
|
|
17
19
|
|
|
18
20
|
var Types = _interopRequireWildcard(require("./types"));
|
|
19
21
|
|
|
20
|
-
function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function (nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
|
|
22
|
+
function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
|
|
23
|
+
|
|
24
|
+
function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
|
|
25
|
+
|
|
26
|
+
function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
|
|
27
|
+
|
|
28
|
+
function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
|
|
21
29
|
|
|
22
|
-
function
|
|
30
|
+
function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
|
|
23
31
|
|
|
24
32
|
function shredBuffer(schema) {
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
var columnData = {};
|
|
34
|
+
|
|
35
|
+
var _iterator = _createForOfIteratorHelper(schema.fieldList),
|
|
36
|
+
_step;
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
for (_iterator.s(); !(_step = _iterator.n()).done;) {
|
|
40
|
+
var field = _step.value;
|
|
41
|
+
columnData[field.key] = {
|
|
42
|
+
dlevels: [],
|
|
43
|
+
rlevels: [],
|
|
44
|
+
values: [],
|
|
45
|
+
pageHeaders: [],
|
|
46
|
+
count: 0
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
} catch (err) {
|
|
50
|
+
_iterator.e(err);
|
|
51
|
+
} finally {
|
|
52
|
+
_iterator.f();
|
|
35
53
|
}
|
|
36
54
|
|
|
37
55
|
return {
|
|
38
56
|
rowCount: 0,
|
|
39
|
-
columnData
|
|
57
|
+
columnData: columnData
|
|
40
58
|
};
|
|
41
59
|
}
|
|
42
60
|
|
|
43
61
|
function shredRecord(schema, record, buffer) {
|
|
44
|
-
|
|
62
|
+
var data = shredBuffer(schema).columnData;
|
|
45
63
|
shredRecordFields(schema.fields, record, data, 0, 0);
|
|
46
64
|
|
|
47
65
|
if (buffer.rowCount === 0) {
|
|
@@ -52,18 +70,28 @@ function shredRecord(schema, record, buffer) {
|
|
|
52
70
|
|
|
53
71
|
buffer.rowCount += 1;
|
|
54
72
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
73
|
+
var _iterator2 = _createForOfIteratorHelper(schema.fieldList),
|
|
74
|
+
_step2;
|
|
75
|
+
|
|
76
|
+
try {
|
|
77
|
+
for (_iterator2.s(); !(_step2 = _iterator2.n()).done;) {
|
|
78
|
+
var field = _step2.value;
|
|
79
|
+
Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);
|
|
80
|
+
Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);
|
|
81
|
+
Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);
|
|
82
|
+
buffer.columnData[field.key].count += data[field.key].count;
|
|
83
|
+
}
|
|
84
|
+
} catch (err) {
|
|
85
|
+
_iterator2.e(err);
|
|
86
|
+
} finally {
|
|
87
|
+
_iterator2.f();
|
|
60
88
|
}
|
|
61
89
|
}
|
|
62
90
|
|
|
63
91
|
function shredRecordFields(fields, record, data, rLevel, dLevel) {
|
|
64
|
-
for (
|
|
65
|
-
|
|
66
|
-
|
|
92
|
+
for (var name in fields) {
|
|
93
|
+
var field = fields[name];
|
|
94
|
+
var values = [];
|
|
67
95
|
|
|
68
96
|
if (record && field.name in record && record[field.name] !== undefined && record[field.name] !== null) {
|
|
69
97
|
if (record[field.name].constructor === Array) {
|
|
@@ -93,8 +121,8 @@ function shredRecordFields(fields, record, data, rLevel, dLevel) {
|
|
|
93
121
|
continue;
|
|
94
122
|
}
|
|
95
123
|
|
|
96
|
-
for (
|
|
97
|
-
|
|
124
|
+
for (var i = 0; i < values.length; i++) {
|
|
125
|
+
var rlvl = i === 0 ? rLevel : field.rLevelMax;
|
|
98
126
|
|
|
99
127
|
if (field.isNested) {
|
|
100
128
|
shredRecordFields(field.fields, values[i], data, rlvl, field.dLevelMax);
|
|
@@ -109,11 +137,13 @@ function shredRecordFields(fields, record, data, rLevel, dLevel) {
|
|
|
109
137
|
}
|
|
110
138
|
|
|
111
139
|
function materializeRecords(schema, buffer) {
|
|
112
|
-
|
|
140
|
+
var records = [];
|
|
113
141
|
|
|
114
|
-
for (
|
|
142
|
+
for (var i = 0; i < buffer.rowCount; i++) {
|
|
143
|
+
records.push({});
|
|
144
|
+
}
|
|
115
145
|
|
|
116
|
-
for (
|
|
146
|
+
for (var key in buffer.columnData) {
|
|
117
147
|
materializeColumn(schema, buffer, key, records);
|
|
118
148
|
}
|
|
119
149
|
|
|
@@ -121,45 +151,55 @@ function materializeRecords(schema, buffer) {
|
|
|
121
151
|
}
|
|
122
152
|
|
|
123
153
|
function materializeColumn(schema, buffer, key, records) {
|
|
124
|
-
|
|
154
|
+
var data = buffer.columnData[key];
|
|
125
155
|
if (!data.count) return;
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
for (
|
|
132
|
-
|
|
133
|
-
|
|
156
|
+
var field = schema.findField(key);
|
|
157
|
+
var branch = schema.findFieldBranch(key);
|
|
158
|
+
var rLevels = new Array(field.rLevelMax + 1).fill(0);
|
|
159
|
+
var vIndex = 0;
|
|
160
|
+
|
|
161
|
+
for (var i = 0; i < data.count; i++) {
|
|
162
|
+
var dLevel = data.dlevels[i];
|
|
163
|
+
var rLevel = data.rlevels[i];
|
|
134
164
|
rLevels[rLevel]++;
|
|
135
165
|
rLevels.fill(0, rLevel + 1);
|
|
136
|
-
|
|
137
|
-
|
|
166
|
+
var rIndex = 0;
|
|
167
|
+
var record = records[rLevels[rIndex++] - 1];
|
|
138
168
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
if (dLevel < step.dLevelMax) break;
|
|
169
|
+
var _iterator3 = _createForOfIteratorHelper(branch),
|
|
170
|
+
_step3;
|
|
142
171
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
172
|
+
try {
|
|
173
|
+
for (_iterator3.s(); !(_step3 = _iterator3.n()).done;) {
|
|
174
|
+
var step = _step3.value;
|
|
175
|
+
if (step === field) break;
|
|
176
|
+
if (dLevel < step.dLevelMax) break;
|
|
147
177
|
|
|
148
|
-
|
|
178
|
+
if (step.repetitionType === 'REPEATED') {
|
|
179
|
+
if (!(step.name in record)) {
|
|
180
|
+
record[step.name] = [];
|
|
181
|
+
}
|
|
149
182
|
|
|
150
|
-
|
|
151
|
-
record[step.name].push({});
|
|
152
|
-
}
|
|
183
|
+
var _ix = rLevels[rIndex++];
|
|
153
184
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
185
|
+
while (record[step.name].length <= _ix) {
|
|
186
|
+
record[step.name].push({});
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
record = record[step.name][_ix];
|
|
190
|
+
} else {
|
|
191
|
+
record[step.name] = record[step.name] || {};
|
|
192
|
+
record = record[step.name];
|
|
193
|
+
}
|
|
158
194
|
}
|
|
195
|
+
} catch (err) {
|
|
196
|
+
_iterator3.e(err);
|
|
197
|
+
} finally {
|
|
198
|
+
_iterator3.f();
|
|
159
199
|
}
|
|
160
200
|
|
|
161
201
|
if (dLevel === field.dLevelMax) {
|
|
162
|
-
|
|
202
|
+
var value = Types.fromPrimitive(field.originalType || field.primitiveType, data.values[vIndex], field);
|
|
163
203
|
vIndex++;
|
|
164
204
|
|
|
165
205
|
if (field.repetitionType === 'REPEATED') {
|
|
@@ -167,7 +207,7 @@ function materializeColumn(schema, buffer, key, records) {
|
|
|
167
207
|
record[field.name] = [];
|
|
168
208
|
}
|
|
169
209
|
|
|
170
|
-
|
|
210
|
+
var ix = rLevels[rIndex];
|
|
171
211
|
|
|
172
212
|
while (record[field.name].length <= ix) {
|
|
173
213
|
record[field.name].push(null);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../src/parquetjs/schema/shred.ts"],"names":["shredBuffer","schema","columnData","field","fieldList","key","dlevels","rlevels","values","pageHeaders","count","rowCount","shredRecord","record","buffer","data","shredRecordFields","fields","Array","prototype","push","apply","rLevel","dLevel","name","undefined","constructor","length","Boolean","repetitionType","Error","isNested","i","rlvl","rLevelMax","dLevelMax","Types","toPrimitive","originalType","primitiveType","materializeRecords","records","materializeColumn","findField","branch","findFieldBranch","rLevels","fill","vIndex","rIndex","step","ix","value","fromPrimitive"],"mappings":";;;;;;;;;;;;;;;AAEA;;AAEA;;;;;;AAIO,SAASA,WAAT,CAAqBC,MAArB,EAA2D;AAChE,QAAMC,UAAuC,GAAG,EAAhD;;AACA,OAAK,MAAMC,KAAX,IAAoBF,MAAM,CAACG,SAA3B,EAAsC;AACpCF,IAAAA,UAAU,CAACC,KAAK,CAACE,GAAP,CAAV,GAAwB;AACtBC,MAAAA,OAAO,EAAE,EADa;AAEtBC,MAAAA,OAAO,EAAE,EAFa;AAGtBC,MAAAA,MAAM,EAAE,EAHc;AAItBC,MAAAA,WAAW,EAAE,EAJS;AAKtBC,MAAAA,KAAK,EAAE;AALe,KAAxB;AAOD;;AACD,SAAO;AAACC,IAAAA,QAAQ,EAAE,CAAX;AAAcT,IAAAA;AAAd,GAAP;AACD;;AAwBM,SAASU,WAAT,CAAqBX,MAArB,EAA4CY,MAA5C,EAAyDC,MAAzD,EAAsF;AAE3F,QAAMC,IAAI,GAAGf,WAAW,CAACC,MAAD,CAAX,CAAoBC,UAAjC;AAEAc,EAAAA,iBAAiB,CAACf,MAAM,CAACgB,MAAR,EAAgBJ,MAAhB,EAAwBE,IAAxB,EAA8B,CAA9B,EAAiC,CAAjC,CAAjB;;AAGA,MAAID,MAAM,CAACH,QAAP,KAAoB,CAAxB,EAA2B;AACzBG,IAAAA,MAAM,CAACH,QAAP,GAAkB,CAAlB;AACAG,IAAAA,MAAM,CAACZ,UAAP,GAAoBa,IAApB;AACA;AACD;;AACDD,EAAAA,MAAM,CAACH,QAAP,IAAmB,CAAnB;;AACA,OAAK,MAAMR,KAAX,IAAoBF,MAAM,CAACG,SAA3B,EAAsC;AACpCc,IAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACZ,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BE,OAAxD,EAAiEQ,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBE,OAAjF;AACAW,IAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACZ,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BC,OAAxD,EAAiES,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBC,OAAjF;AACAY,IAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACZ,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BG,MAAxD,EAAgEO,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBG,MAAhF;AACAM,IAAAA,MAAM,CAACZ,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BK,KAA7B,IAAsCK,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBK,KAAtD;AACD;AACF;;AAGD,SAASM,iBAAT,CACEC,MADF,EAEEJ,MAFF,EAGEE,IAHF,EAIEO,MAJF,EAKEC,MALF,EAME;AACA,OAAK,MAAMC,IAAX,IAAmBP,MAAnB,EAA2B;AACzB,UAAMd,KAAK,GAAGc,MAAM,CAACO,IAAD,CAApB;AAGA,QAAIhB,MAAa,GAAG,EAApB;;AACA,QACEK,MAAM,IACNV,KAAK,CAACqB,IAAN,IAAcX,MADd,IAEAA,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,KAAuBC,SAFvB,IAGAZ,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,KAAuB,IAJzB,EAKE;AACA,UAAIX,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,CAAmBE,WAAnB,KAAmCR,KAAvC,EAA8C;AAC5CV,QAAAA,MAAM,GAAGK,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAf;AACD,OAFD,MAEO;AACLhB,QAAAA,MAAM,CAACY,IAAP,CAAYP,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAlB;AACD;AACF;;AAED,QAAIhB,MAAM,CAACmB,MAAP,KAAkB,CAAlB,IAAuBC,OAAO,CAACf,MAAD,CAA9B,IAA0CV,KAAK,CAAC0B,cAAN,KAAyB,UAAvE,EAAmF;AACjF,YAAM,IAAIC,KAAJ,mCAAqC3B,KAAK,CAACqB,IAA3C,EAAN;AACD;;AACD,QAAIhB,MAAM,CAACmB,MAAP,GAAgB,CAAhB,IAAqBxB,KAAK,CAAC0B,cAAN,KAAyB,UAAlD,EAA8D;AAC5D,YAAM,IAAIC,KAAJ,sCAAwC3B,KAAK,CAACqB,IAA9C,EAAN;AACD;;AAGD,QAAIhB,MAAM,CAACmB,MAAP,KAAkB,CAAtB,EAAyB;AACvB,UAAIxB,KAAK,CAAC4B,QAAV,EAAoB;AAClBf,QAAAA,iBAAiB,CAACb,KAAK,CAACc,MAAP,EAAgB,IAAhB,EAAsBF,IAAtB,EAA4BO,MAA5B,EAAoCC,MAApC,CAAjB;AACD,OAFD,MAEO;AACLR,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBK,KAAhB,IAAyB,CAAzB;AACAK,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBE,OAAhB,CAAwBa,IAAxB,CAA6BE,MAA7B;AACAP,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBC,OAAhB,CAAwBc,IAAxB,CAA6BG,MAA7B;AACD;;AACD;AACD;;AAGD,SAAK,IAAIS,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGxB,MAAM,CAACmB,MAA3B,EAAmCK,CAAC,EAApC,EAAwC;AACtC,YAAMC,IAAI,GAAGD,CAAC,KAAK,CAAN,GAAUV,MAAV,GAAmBnB,KAAK,CAAC+B,SAAtC;;AACA,UAAI/B,KAAK,CAAC4B,QAAV,EAAoB;AAClBf,QAAAA,iBAAiB,CAACb,KAAK,CAACc,MAAP,EAAgBT,MAAM,CAACwB,CAAD,CAAtB,EAA2BjB,IAA3B,EAAiCkB,IAAjC,EAAuC9B,KAAK,CAACgC,SAA7C,CAAjB;AACD,OAFD,MAEO;AACLpB,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBK,KAAhB,IAAyB,CAAzB;AACAK,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBE,OAAhB,CAAwBa,IAAxB,CAA6Ba,IAA7B;AACAlB,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBC,OAAhB,CAAwBc,IAAxB,CAA6BjB,KAAK,CAACgC,SAAnC;AACApB,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBG,MAAhB,CAAuBY,IAAvB,CACEgB,KAAK,CAACC,WAAN,CAAmBlC,KAAK,CAACmC,YAAN,IAAsBnC,KAAK,CAACoC,aAA/C,EAAgE/B,MAAM,CAACwB,CAAD,CAAtE,CADF;AAGD;AACF;AACF;AACF;;AAqBM,SAASQ,kBAAT,CAA4BvC,MAA5B,EAAmDa,MAAnD,EAA2F;AAChG,QAAM2B,OAAwB,GAAG,EAAjC;;AACA,OAAK,IAAIT,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGlB,MAAM,CAACH,QAA3B,EAAqCqB,CAAC,EAAtC,EAA0CS,OAAO,CAACrB,IAAR,CAAa,EAAb;;AAC1C,OAAK,MAAMf,GAAX,IAAkBS,MAAM,CAACZ,UAAzB,EAAqC;AACnCwC,IAAAA,iBAAiB,CAACzC,MAAD,EAASa,MAAT,EAAiBT,GAAjB,EAAsBoC,OAAtB,CAAjB;AACD;;AACD,SAAOA,OAAP;AACD;;AAGD,SAASC,iBAAT,CACEzC,MADF,EAEEa,MAFF,EAGET,GAHF,EAIEoC,OAJF,EAKE;AACA,QAAM1B,IAAI,GAAGD,MAAM,CAACZ,UAAP,CAAkBG,GAAlB,CAAb;AACA,MAAI,CAACU,IAAI,CAACL,KAAV,EAAiB;AAEjB,QAAMP,KAAK,GAAGF,MAAM,CAAC0C,SAAP,CAAiBtC,GAAjB,CAAd;AACA,QAAMuC,MAAM,GAAG3C,MAAM,CAAC4C,eAAP,CAAuBxC,GAAvB,CAAf;AAGA,QAAMyC,OAAiB,GAAG,IAAI5B,KAAJ,CAAUf,KAAK,CAAC+B,SAAN,GAAkB,CAA5B,EAA+Ba,IAA/B,CAAoC,CAApC,CAA1B;AACA,MAAIC,MAAM,GAAG,CAAb;;AACA,OAAK,IAAIhB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGjB,IAAI,CAACL,KAAzB,EAAgCsB,CAAC,EAAjC,EAAqC;AACnC,UAAMT,MAAM,GAAGR,IAAI,CAACT,OAAL,CAAa0B,CAAb,CAAf;AACA,UAAMV,MAAM,GAAGP,IAAI,CAACR,OAAL,CAAayB,CAAb,CAAf;AACAc,IAAAA,OAAO,CAACxB,MAAD,CAAP;AACAwB,IAAAA,OAAO,CAACC,IAAR,CAAa,CAAb,EAAgBzB,MAAM,GAAG,CAAzB;AAEA,QAAI2B,MAAM,GAAG,CAAb;AACA,QAAIpC,MAAM,GAAG4B,OAAO,CAACK,OAAO,CAACG,MAAM,EAAP,CAAP,GAAoB,CAArB,CAApB;;AAGA,SAAK,MAAMC,IAAX,IAAmBN,MAAnB,EAA2B;AACzB,UAAIM,IAAI,KAAK/C,KAAb,EAAoB;AACpB,UAAIoB,MAAM,GAAG2B,IAAI,CAACf,SAAlB,EAA6B;;AAC7B,UAAIe,IAAI,CAACrB,cAAL,KAAwB,UAA5B,EAAwC;AACtC,YAAI,EAAEqB,IAAI,CAAC1B,IAAL,IAAaX,MAAf,CAAJ,EAA4B;AAE1BA,UAAAA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,GAAoB,EAApB;AACD;;AACD,cAAM2B,EAAE,GAAGL,OAAO,CAACG,MAAM,EAAP,CAAlB;;AACA,eAAOpC,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,CAAkBG,MAAlB,IAA4BwB,EAAnC,EAAuC;AAErCtC,UAAAA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,CAAkBJ,IAAlB,CAAuB,EAAvB;AACD;;AACDP,QAAAA,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,CAAkB2B,EAAlB,CAAT;AACD,OAXD,MAWO;AACLtC,QAAAA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,GAAoBX,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,IAAqB,EAAzC;AACAX,QAAAA,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAf;AACD;AACF;;AAGD,QAAID,MAAM,KAAKpB,KAAK,CAACgC,SAArB,EAAgC;AAC9B,YAAMiB,KAAK,GAAGhB,KAAK,CAACiB,aAAN,CAEZlD,KAAK,CAACmC,YAAN,IAAsBnC,KAAK,CAACoC,aAFhB,EAGZxB,IAAI,CAACP,MAAL,CAAYwC,MAAZ,CAHY,EAIZ7C,KAJY,CAAd;AAMA6C,MAAAA,MAAM;;AACN,UAAI7C,KAAK,CAAC0B,cAAN,KAAyB,UAA7B,EAAyC;AACvC,YAAI,EAAE1B,KAAK,CAACqB,IAAN,IAAcX,MAAhB,CAAJ,EAA6B;AAE3BA,UAAAA,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,GAAqB,EAArB;AACD;;AACD,cAAM2B,EAAE,GAAGL,OAAO,CAACG,MAAD,CAAlB;;AACA,eAAOpC,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,CAAmBG,MAAnB,IAA6BwB,EAApC,EAAwC;AAEtCtC,UAAAA,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,CAAmBJ,IAAnB,CAAwB,IAAxB;AACD;;AACDP,QAAAA,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,CAAmB2B,EAAnB,IAAyBC,KAAzB;AACD,OAXD,MAWO;AACLvC,QAAAA,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,GAAqB4B,KAArB;AACD;AACF;AACF;AACF","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {ParquetBuffer, ParquetData, ParquetField, ParquetRecord} from './declare';\nimport {ParquetSchema} from './schema';\nimport * as Types from './types';\n\nexport {ParquetBuffer};\n\nexport function shredBuffer(schema: ParquetSchema): ParquetBuffer {\n const columnData: Record<string, ParquetData> = {};\n for (const field of schema.fieldList) {\n columnData[field.key] = {\n dlevels: [],\n rlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n }\n return {rowCount: 0, columnData};\n}\n\n/**\n * 'Shred' a record into a list of <value, repetition_level, definition_level>\n * tuples per column using the Google Dremel Algorithm..\n *\n * The buffer argument must point to an object into which the shredded record\n * will be returned. You may re-use the buffer for repeated calls to this function\n * to append to an existing buffer, as long as the schema is unchanged.\n *\n * The format in which the shredded records will be stored in the buffer is as\n * follows:\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void {\n /* shred the record, this may raise an exception */\n const data = shredBuffer(schema).columnData;\n\n shredRecordFields(schema.fields, record, data, 0, 0);\n\n /* if no error during shredding, add the shredded record to the buffer */\n if (buffer.rowCount === 0) {\n buffer.rowCount = 1;\n buffer.columnData = data;\n return;\n }\n buffer.rowCount += 1;\n for (const field of schema.fieldList) {\n Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);\n buffer.columnData[field.key].count += data[field.key].count;\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction shredRecordFields(\n fields: Record<string, ParquetField>,\n record: any,\n data: Record<string, ParquetData>,\n rLevel: number,\n dLevel: number\n) {\n for (const name in fields) {\n const field = fields[name];\n\n // fetch values\n let values: any[] = [];\n if (\n record &&\n field.name in record &&\n record[field.name] !== undefined &&\n record[field.name] !== null\n ) {\n if (record[field.name].constructor === Array) {\n values = record[field.name];\n } else {\n values.push(record[field.name]);\n }\n }\n // check values\n if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {\n throw new Error(`missing required field: ${field.name}`);\n }\n if (values.length > 1 && field.repetitionType !== 'REPEATED') {\n throw new Error(`too many values for field: ${field.name}`);\n }\n\n // push null\n if (values.length === 0) {\n if (field.isNested) {\n shredRecordFields(field.fields!, null, data, rLevel, dLevel);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rLevel);\n data[field.key].dlevels.push(dLevel);\n }\n continue; // eslint-disable-line no-continue\n }\n\n // push values\n for (let i = 0; i < values.length; i++) {\n const rlvl = i === 0 ? rLevel : field.rLevelMax;\n if (field.isNested) {\n shredRecordFields(field.fields!, values[i], data, rlvl, field.dLevelMax);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rlvl);\n data[field.key].dlevels.push(field.dLevelMax);\n data[field.key].values.push(\n Types.toPrimitive((field.originalType || field.primitiveType)!, values[i])\n );\n }\n }\n }\n}\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The buffer argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[] {\n const records: ParquetRecord[] = [];\n for (let i = 0; i < buffer.rowCount; i++) records.push({});\n for (const key in buffer.columnData) {\n materializeColumn(schema, buffer, key, records);\n }\n return records;\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction materializeColumn(\n schema: ParquetSchema,\n buffer: ParquetBuffer,\n key: string,\n records: ParquetRecord[]\n) {\n const data = buffer.columnData[key];\n if (!data.count) return;\n\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n for (let i = 0; i < data.count; i++) {\n const dLevel = data.dlevels[i];\n const rLevel = data.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = records[rLevels[rIndex++] - 1];\n\n // Internal nodes\n for (const step of branch) {\n if (step === field) break;\n if (dLevel < step.dLevelMax) break;\n if (step.repetitionType === 'REPEATED') {\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n } else {\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n data.values[vIndex],\n field\n );\n vIndex++;\n if (field.repetitionType === 'REPEATED') {\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n } else {\n record[field.name] = value;\n }\n }\n }\n}\n"],"file":"shred.js"}
|
|
1
|
+
{"version":3,"sources":["../../../../src/parquetjs/schema/shred.ts"],"names":["shredBuffer","schema","columnData","fieldList","field","key","dlevels","rlevels","values","pageHeaders","count","rowCount","shredRecord","record","buffer","data","shredRecordFields","fields","Array","prototype","push","apply","rLevel","dLevel","name","undefined","constructor","length","Boolean","repetitionType","Error","isNested","i","rlvl","rLevelMax","dLevelMax","Types","toPrimitive","originalType","primitiveType","materializeRecords","records","materializeColumn","findField","branch","findFieldBranch","rLevels","fill","vIndex","rIndex","step","ix","value","fromPrimitive"],"mappings":";;;;;;;;;;;;;;;;;AAEA;;AAEA;;;;;;;;;;;;AAIO,SAASA,WAAT,CAAqBC,MAArB,EAA2D;AAChE,MAAMC,UAAuC,GAAG,EAAhD;;AADgE,6CAE5CD,MAAM,CAACE,SAFqC;AAAA;;AAAA;AAEhE,wDAAsC;AAAA,UAA3BC,KAA2B;AACpCF,MAAAA,UAAU,CAACE,KAAK,CAACC,GAAP,CAAV,GAAwB;AACtBC,QAAAA,OAAO,EAAE,EADa;AAEtBC,QAAAA,OAAO,EAAE,EAFa;AAGtBC,QAAAA,MAAM,EAAE,EAHc;AAItBC,QAAAA,WAAW,EAAE,EAJS;AAKtBC,QAAAA,KAAK,EAAE;AALe,OAAxB;AAOD;AAV+D;AAAA;AAAA;AAAA;AAAA;;AAWhE,SAAO;AAACC,IAAAA,QAAQ,EAAE,CAAX;AAAcT,IAAAA,UAAU,EAAVA;AAAd,GAAP;AACD;;AAwBM,SAASU,WAAT,CAAqBX,MAArB,EAA4CY,MAA5C,EAAyDC,MAAzD,EAAsF;AAE3F,MAAMC,IAAI,GAAGf,WAAW,CAACC,MAAD,CAAX,CAAoBC,UAAjC;AAEAc,EAAAA,iBAAiB,CAACf,MAAM,CAACgB,MAAR,EAAgBJ,MAAhB,EAAwBE,IAAxB,EAA8B,CAA9B,EAAiC,CAAjC,CAAjB;;AAGA,MAAID,MAAM,CAACH,QAAP,KAAoB,CAAxB,EAA2B;AACzBG,IAAAA,MAAM,CAACH,QAAP,GAAkB,CAAlB;AACAG,IAAAA,MAAM,CAACZ,UAAP,GAAoBa,IAApB;AACA;AACD;;AACDD,EAAAA,MAAM,CAACH,QAAP,IAAmB,CAAnB;;AAZ2F,8CAavEV,MAAM,CAACE,SAbgE;AAAA;;AAAA;AAa3F,2DAAsC;AAAA,UAA3BC,KAA2B;AACpCc,MAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACZ,UAAP,CAAkBE,KAAK,CAACC,GAAxB,EAA6BE,OAAxD,EAAiEQ,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBE,OAAjF;AACAW,MAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACZ,UAAP,CAAkBE,KAAK,CAACC,GAAxB,EAA6BC,OAAxD,EAAiES,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBC,OAAjF;AACAY,MAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACZ,UAAP,CAAkBE,KAAK,CAACC,GAAxB,EAA6BG,MAAxD,EAAgEO,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBG,MAAhF;AACAM,MAAAA,MAAM,CAACZ,UAAP,CAAkBE,KAAK,CAACC,GAAxB,EAA6BK,KAA7B,IAAsCK,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBK,KAAtD;AACD;AAlB0F;AAAA;AAAA;AAAA;AAAA;AAmB5F;;AAGD,SAASM,iBAAT,CACEC,MADF,EAEEJ,MAFF,EAGEE,IAHF,EAIEO,MAJF,EAKEC,MALF,EAME;AACA,OAAK,IAAMC,IAAX,IAAmBP,MAAnB,EAA2B;AACzB,QAAMb,KAAK,GAAGa,MAAM,CAACO,IAAD,CAApB;AAGA,QAAIhB,MAAa,GAAG,EAApB;;AACA,QACEK,MAAM,IACNT,KAAK,CAACoB,IAAN,IAAcX,MADd,IAEAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,KAAuBC,SAFvB,IAGAZ,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,KAAuB,IAJzB,EAKE;AACA,UAAIX,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmBE,WAAnB,KAAmCR,KAAvC,EAA8C;AAC5CV,QAAAA,MAAM,GAAGK,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAf;AACD,OAFD,MAEO;AACLhB,QAAAA,MAAM,CAACY,IAAP,CAAYP,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAlB;AACD;AACF;;AAED,QAAIhB,MAAM,CAACmB,MAAP,KAAkB,CAAlB,IAAuBC,OAAO,CAACf,MAAD,CAA9B,IAA0CT,KAAK,CAACyB,cAAN,KAAyB,UAAvE,EAAmF;AACjF,YAAM,IAAIC,KAAJ,mCAAqC1B,KAAK,CAACoB,IAA3C,EAAN;AACD;;AACD,QAAIhB,MAAM,CAACmB,MAAP,GAAgB,CAAhB,IAAqBvB,KAAK,CAACyB,cAAN,KAAyB,UAAlD,EAA8D;AAC5D,YAAM,IAAIC,KAAJ,sCAAwC1B,KAAK,CAACoB,IAA9C,EAAN;AACD;;AAGD,QAAIhB,MAAM,CAACmB,MAAP,KAAkB,CAAtB,EAAyB;AACvB,UAAIvB,KAAK,CAAC2B,QAAV,EAAoB;AAClBf,QAAAA,iBAAiB,CAACZ,KAAK,CAACa,MAAP,EAAgB,IAAhB,EAAsBF,IAAtB,EAA4BO,MAA5B,EAAoCC,MAApC,CAAjB;AACD,OAFD,MAEO;AACLR,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBK,KAAhB,IAAyB,CAAzB;AACAK,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBE,OAAhB,CAAwBa,IAAxB,CAA6BE,MAA7B;AACAP,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBC,OAAhB,CAAwBc,IAAxB,CAA6BG,MAA7B;AACD;;AACD;AACD;;AAGD,SAAK,IAAIS,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGxB,MAAM,CAACmB,MAA3B,EAAmCK,CAAC,EAApC,EAAwC;AACtC,UAAMC,IAAI,GAAGD,CAAC,KAAK,CAAN,GAAUV,MAAV,GAAmBlB,KAAK,CAAC8B,SAAtC;;AACA,UAAI9B,KAAK,CAAC2B,QAAV,EAAoB;AAClBf,QAAAA,iBAAiB,CAACZ,KAAK,CAACa,MAAP,EAAgBT,MAAM,CAACwB,CAAD,CAAtB,EAA2BjB,IAA3B,EAAiCkB,IAAjC,EAAuC7B,KAAK,CAAC+B,SAA7C,CAAjB;AACD,OAFD,MAEO;AACLpB,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBK,KAAhB,IAAyB,CAAzB;AACAK,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBE,OAAhB,CAAwBa,IAAxB,CAA6Ba,IAA7B;AACAlB,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBC,OAAhB,CAAwBc,IAAxB,CAA6BhB,KAAK,CAAC+B,SAAnC;AACApB,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBG,MAAhB,CAAuBY,IAAvB,CACEgB,KAAK,CAACC,WAAN,CAAmBjC,KAAK,CAACkC,YAAN,IAAsBlC,KAAK,CAACmC,aAA/C,EAAgE/B,MAAM,CAACwB,CAAD,CAAtE,CADF;AAGD;AACF;AACF;AACF;;AAqBM,SAASQ,kBAAT,CAA4BvC,MAA5B,EAAmDa,MAAnD,EAA2F;AAChG,MAAM2B,OAAwB,GAAG,EAAjC;;AACA,OAAK,IAAIT,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGlB,MAAM,CAACH,QAA3B,EAAqCqB,CAAC,EAAtC;AAA0CS,IAAAA,OAAO,CAACrB,IAAR,CAAa,EAAb;AAA1C;;AACA,OAAK,IAAMf,GAAX,IAAkBS,MAAM,CAACZ,UAAzB,EAAqC;AACnCwC,IAAAA,iBAAiB,CAACzC,MAAD,EAASa,MAAT,EAAiBT,GAAjB,EAAsBoC,OAAtB,CAAjB;AACD;;AACD,SAAOA,OAAP;AACD;;AAGD,SAASC,iBAAT,CACEzC,MADF,EAEEa,MAFF,EAGET,GAHF,EAIEoC,OAJF,EAKE;AACA,MAAM1B,IAAI,GAAGD,MAAM,CAACZ,UAAP,CAAkBG,GAAlB,CAAb;AACA,MAAI,CAACU,IAAI,CAACL,KAAV,EAAiB;AAEjB,MAAMN,KAAK,GAAGH,MAAM,CAAC0C,SAAP,CAAiBtC,GAAjB,CAAd;AACA,MAAMuC,MAAM,GAAG3C,MAAM,CAAC4C,eAAP,CAAuBxC,GAAvB,CAAf;AAGA,MAAMyC,OAAiB,GAAG,IAAI5B,KAAJ,CAAUd,KAAK,CAAC8B,SAAN,GAAkB,CAA5B,EAA+Ba,IAA/B,CAAoC,CAApC,CAA1B;AACA,MAAIC,MAAM,GAAG,CAAb;;AACA,OAAK,IAAIhB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGjB,IAAI,CAACL,KAAzB,EAAgCsB,CAAC,EAAjC,EAAqC;AACnC,QAAMT,MAAM,GAAGR,IAAI,CAACT,OAAL,CAAa0B,CAAb,CAAf;AACA,QAAMV,MAAM,GAAGP,IAAI,CAACR,OAAL,CAAayB,CAAb,CAAf;AACAc,IAAAA,OAAO,CAACxB,MAAD,CAAP;AACAwB,IAAAA,OAAO,CAACC,IAAR,CAAa,CAAb,EAAgBzB,MAAM,GAAG,CAAzB;AAEA,QAAI2B,MAAM,GAAG,CAAb;AACA,QAAIpC,MAAM,GAAG4B,OAAO,CAACK,OAAO,CAACG,MAAM,EAAP,CAAP,GAAoB,CAArB,CAApB;;AAPmC,gDAUhBL,MAVgB;AAAA;;AAAA;AAUnC,6DAA2B;AAAA,YAAhBM,IAAgB;AACzB,YAAIA,IAAI,KAAK9C,KAAb,EAAoB;AACpB,YAAImB,MAAM,GAAG2B,IAAI,CAACf,SAAlB,EAA6B;;AAC7B,YAAIe,IAAI,CAACrB,cAAL,KAAwB,UAA5B,EAAwC;AACtC,cAAI,EAAEqB,IAAI,CAAC1B,IAAL,IAAaX,MAAf,CAAJ,EAA4B;AAE1BA,YAAAA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,GAAoB,EAApB;AACD;;AACD,cAAM2B,GAAE,GAAGL,OAAO,CAACG,MAAM,EAAP,CAAlB;;AACA,iBAAOpC,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,CAAkBG,MAAlB,IAA4BwB,GAAnC,EAAuC;AAErCtC,YAAAA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,CAAkBJ,IAAlB,CAAuB,EAAvB;AACD;;AACDP,UAAAA,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,CAAkB2B,GAAlB,CAAT;AACD,SAXD,MAWO;AACLtC,UAAAA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,GAAoBX,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,IAAqB,EAAzC;AACAX,UAAAA,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAf;AACD;AACF;AA5BkC;AAAA;AAAA;AAAA;AAAA;;AA+BnC,QAAID,MAAM,KAAKnB,KAAK,CAAC+B,SAArB,EAAgC;AAC9B,UAAMiB,KAAK,GAAGhB,KAAK,CAACiB,aAAN,CAEZjD,KAAK,CAACkC,YAAN,IAAsBlC,KAAK,CAACmC,aAFhB,EAGZxB,IAAI,CAACP,MAAL,CAAYwC,MAAZ,CAHY,EAIZ5C,KAJY,CAAd;AAMA4C,MAAAA,MAAM;;AACN,UAAI5C,KAAK,CAACyB,cAAN,KAAyB,UAA7B,EAAyC;AACvC,YAAI,EAAEzB,KAAK,CAACoB,IAAN,IAAcX,MAAhB,CAAJ,EAA6B;AAE3BA,UAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,GAAqB,EAArB;AACD;;AACD,YAAM2B,EAAE,GAAGL,OAAO,CAACG,MAAD,CAAlB;;AACA,eAAOpC,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmBG,MAAnB,IAA6BwB,EAApC,EAAwC;AAEtCtC,UAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmBJ,IAAnB,CAAwB,IAAxB;AACD;;AACDP,QAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmB2B,EAAnB,IAAyBC,KAAzB;AACD,OAXD,MAWO;AACLvC,QAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,GAAqB4B,KAArB;AACD;AACF;AACF;AACF","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {ParquetBuffer, ParquetData, ParquetField, ParquetRecord} from './declare';\nimport {ParquetSchema} from './schema';\nimport * as Types from './types';\n\nexport {ParquetBuffer};\n\nexport function shredBuffer(schema: ParquetSchema): ParquetBuffer {\n const columnData: Record<string, ParquetData> = {};\n for (const field of schema.fieldList) {\n columnData[field.key] = {\n dlevels: [],\n rlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n }\n return {rowCount: 0, columnData};\n}\n\n/**\n * 'Shred' a record into a list of <value, repetition_level, definition_level>\n * tuples per column using the Google Dremel Algorithm..\n *\n * The buffer argument must point to an object into which the shredded record\n * will be returned. You may re-use the buffer for repeated calls to this function\n * to append to an existing buffer, as long as the schema is unchanged.\n *\n * The format in which the shredded records will be stored in the buffer is as\n * follows:\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void {\n /* shred the record, this may raise an exception */\n const data = shredBuffer(schema).columnData;\n\n shredRecordFields(schema.fields, record, data, 0, 0);\n\n /* if no error during shredding, add the shredded record to the buffer */\n if (buffer.rowCount === 0) {\n buffer.rowCount = 1;\n buffer.columnData = data;\n return;\n }\n buffer.rowCount += 1;\n for (const field of schema.fieldList) {\n Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);\n buffer.columnData[field.key].count += data[field.key].count;\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction shredRecordFields(\n fields: Record<string, ParquetField>,\n record: any,\n data: Record<string, ParquetData>,\n rLevel: number,\n dLevel: number\n) {\n for (const name in fields) {\n const field = fields[name];\n\n // fetch values\n let values: any[] = [];\n if (\n record &&\n field.name in record &&\n record[field.name] !== undefined &&\n record[field.name] !== null\n ) {\n if (record[field.name].constructor === Array) {\n values = record[field.name];\n } else {\n values.push(record[field.name]);\n }\n }\n // check values\n if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {\n throw new Error(`missing required field: ${field.name}`);\n }\n if (values.length > 1 && field.repetitionType !== 'REPEATED') {\n throw new Error(`too many values for field: ${field.name}`);\n }\n\n // push null\n if (values.length === 0) {\n if (field.isNested) {\n shredRecordFields(field.fields!, null, data, rLevel, dLevel);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rLevel);\n data[field.key].dlevels.push(dLevel);\n }\n continue; // eslint-disable-line no-continue\n }\n\n // push values\n for (let i = 0; i < values.length; i++) {\n const rlvl = i === 0 ? rLevel : field.rLevelMax;\n if (field.isNested) {\n shredRecordFields(field.fields!, values[i], data, rlvl, field.dLevelMax);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rlvl);\n data[field.key].dlevels.push(field.dLevelMax);\n data[field.key].values.push(\n Types.toPrimitive((field.originalType || field.primitiveType)!, values[i])\n );\n }\n }\n }\n}\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The buffer argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[] {\n const records: ParquetRecord[] = [];\n for (let i = 0; i < buffer.rowCount; i++) records.push({});\n for (const key in buffer.columnData) {\n materializeColumn(schema, buffer, key, records);\n }\n return records;\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction materializeColumn(\n schema: ParquetSchema,\n buffer: ParquetBuffer,\n key: string,\n records: ParquetRecord[]\n) {\n const data = buffer.columnData[key];\n if (!data.count) return;\n\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n for (let i = 0; i < data.count; i++) {\n const dLevel = data.dlevels[i];\n const rLevel = data.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = records[rLevels[rIndex++] - 1];\n\n // Internal nodes\n for (const step of branch) {\n if (step === field) break;\n if (dLevel < step.dLevelMax) break;\n if (step.repetitionType === 'REPEATED') {\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n } else {\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n data.values[vIndex],\n field\n );\n vIndex++;\n if (field.repetitionType === 'REPEATED') {\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n } else {\n record[field.name] = value;\n }\n }\n }\n}\n"],"file":"shred.js"}
|