@loaders.gl/parquet 3.1.0-beta.3 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. package/dist/dist.min.js +6 -6
  2. package/dist/dist.min.js.map +2 -2
  3. package/dist/es5/bundle.js +1 -1
  4. package/dist/es5/bundle.js.map +1 -1
  5. package/dist/es5/constants.js +5 -5
  6. package/dist/es5/constants.js.map +1 -1
  7. package/dist/es5/index.js +19 -10
  8. package/dist/es5/index.js.map +1 -1
  9. package/dist/es5/lib/convert-schema.js +13 -13
  10. package/dist/es5/lib/convert-schema.js.map +1 -1
  11. package/dist/es5/lib/parse-parquet.js +154 -19
  12. package/dist/es5/lib/parse-parquet.js.map +1 -1
  13. package/dist/es5/lib/read-array-buffer.js +43 -6
  14. package/dist/es5/lib/read-array-buffer.js.map +1 -1
  15. package/dist/es5/parquet-loader.js +4 -4
  16. package/dist/es5/parquet-loader.js.map +1 -1
  17. package/dist/es5/parquet-writer.js +4 -4
  18. package/dist/es5/parquet-writer.js.map +1 -1
  19. package/dist/es5/parquetjs/codecs/dictionary.js +10 -2
  20. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -1
  21. package/dist/es5/parquetjs/codecs/index.js +6 -4
  22. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  23. package/dist/es5/parquetjs/codecs/plain.js +43 -41
  24. package/dist/es5/parquetjs/codecs/plain.js.map +1 -1
  25. package/dist/es5/parquetjs/codecs/rle.js +35 -25
  26. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  27. package/dist/es5/parquetjs/compression.js +110 -27
  28. package/dist/es5/parquetjs/compression.js.map +1 -1
  29. package/dist/es5/parquetjs/encoder/writer.js +737 -301
  30. package/dist/es5/parquetjs/encoder/writer.js.map +1 -1
  31. package/dist/es5/parquetjs/file.js +15 -15
  32. package/dist/es5/parquetjs/file.js.map +1 -1
  33. package/dist/es5/parquetjs/parquet-thrift/BoundaryOrder.js +1 -1
  34. package/dist/es5/parquetjs/parquet-thrift/BsonType.js +45 -31
  35. package/dist/es5/parquetjs/parquet-thrift/BsonType.js.map +1 -1
  36. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js +152 -141
  37. package/dist/es5/parquetjs/parquet-thrift/ColumnChunk.js.map +1 -1
  38. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js +160 -147
  39. package/dist/es5/parquetjs/parquet-thrift/ColumnIndex.js.map +1 -1
  40. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js +259 -248
  41. package/dist/es5/parquetjs/parquet-thrift/ColumnMetaData.js.map +1 -1
  42. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js +79 -67
  43. package/dist/es5/parquetjs/parquet-thrift/ColumnOrder.js.map +1 -1
  44. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -1
  45. package/dist/es5/parquetjs/parquet-thrift/ConvertedType.js +1 -1
  46. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js +124 -113
  47. package/dist/es5/parquetjs/parquet-thrift/DataPageHeader.js.map +1 -1
  48. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js +169 -158
  49. package/dist/es5/parquetjs/parquet-thrift/DataPageHeaderV2.js.map +1 -1
  50. package/dist/es5/parquetjs/parquet-thrift/DateType.js +45 -31
  51. package/dist/es5/parquetjs/parquet-thrift/DateType.js.map +1 -1
  52. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js +79 -68
  53. package/dist/es5/parquetjs/parquet-thrift/DecimalType.js.map +1 -1
  54. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js +94 -83
  55. package/dist/es5/parquetjs/parquet-thrift/DictionaryPageHeader.js.map +1 -1
  56. package/dist/es5/parquetjs/parquet-thrift/Encoding.js +1 -1
  57. package/dist/es5/parquetjs/parquet-thrift/EnumType.js +45 -31
  58. package/dist/es5/parquetjs/parquet-thrift/EnumType.js.map +1 -1
  59. package/dist/es5/parquetjs/parquet-thrift/FieldRepetitionType.js +1 -1
  60. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js +182 -170
  61. package/dist/es5/parquetjs/parquet-thrift/FileMetaData.js.map +1 -1
  62. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js +45 -31
  63. package/dist/es5/parquetjs/parquet-thrift/IndexPageHeader.js.map +1 -1
  64. package/dist/es5/parquetjs/parquet-thrift/IntType.js +79 -68
  65. package/dist/es5/parquetjs/parquet-thrift/IntType.js.map +1 -1
  66. package/dist/es5/parquetjs/parquet-thrift/JsonType.js +45 -31
  67. package/dist/es5/parquetjs/parquet-thrift/JsonType.js.map +1 -1
  68. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js +79 -68
  69. package/dist/es5/parquetjs/parquet-thrift/KeyValue.js.map +1 -1
  70. package/dist/es5/parquetjs/parquet-thrift/ListType.js +45 -31
  71. package/dist/es5/parquetjs/parquet-thrift/ListType.js.map +1 -1
  72. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js +343 -319
  73. package/dist/es5/parquetjs/parquet-thrift/LogicalType.js.map +1 -1
  74. package/dist/es5/parquetjs/parquet-thrift/MapType.js +45 -31
  75. package/dist/es5/parquetjs/parquet-thrift/MapType.js.map +1 -1
  76. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js +45 -31
  77. package/dist/es5/parquetjs/parquet-thrift/MicroSeconds.js.map +1 -1
  78. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js +45 -31
  79. package/dist/es5/parquetjs/parquet-thrift/MilliSeconds.js.map +1 -1
  80. package/dist/es5/parquetjs/parquet-thrift/NullType.js +45 -31
  81. package/dist/es5/parquetjs/parquet-thrift/NullType.js.map +1 -1
  82. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js +75 -64
  83. package/dist/es5/parquetjs/parquet-thrift/OffsetIndex.js.map +1 -1
  84. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js +94 -83
  85. package/dist/es5/parquetjs/parquet-thrift/PageEncodingStats.js.map +1 -1
  86. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js +169 -158
  87. package/dist/es5/parquetjs/parquet-thrift/PageHeader.js.map +1 -1
  88. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js +94 -83
  89. package/dist/es5/parquetjs/parquet-thrift/PageLocation.js.map +1 -1
  90. package/dist/es5/parquetjs/parquet-thrift/PageType.js +1 -1
  91. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js +124 -113
  92. package/dist/es5/parquetjs/parquet-thrift/RowGroup.js.map +1 -1
  93. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js +199 -188
  94. package/dist/es5/parquetjs/parquet-thrift/SchemaElement.js.map +1 -1
  95. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js +94 -83
  96. package/dist/es5/parquetjs/parquet-thrift/SortingColumn.js.map +1 -1
  97. package/dist/es5/parquetjs/parquet-thrift/Statistics.js +135 -124
  98. package/dist/es5/parquetjs/parquet-thrift/Statistics.js.map +1 -1
  99. package/dist/es5/parquetjs/parquet-thrift/StringType.js +45 -31
  100. package/dist/es5/parquetjs/parquet-thrift/StringType.js.map +1 -1
  101. package/dist/es5/parquetjs/parquet-thrift/TimeType.js +79 -68
  102. package/dist/es5/parquetjs/parquet-thrift/TimeType.js.map +1 -1
  103. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js +101 -88
  104. package/dist/es5/parquetjs/parquet-thrift/TimeUnit.js.map +1 -1
  105. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js +79 -68
  106. package/dist/es5/parquetjs/parquet-thrift/TimestampType.js.map +1 -1
  107. package/dist/es5/parquetjs/parquet-thrift/Type.js +1 -1
  108. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js +45 -31
  109. package/dist/es5/parquetjs/parquet-thrift/TypeDefinedOrder.js.map +1 -1
  110. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js +45 -31
  111. package/dist/es5/parquetjs/parquet-thrift/UUIDType.js.map +1 -1
  112. package/dist/es5/parquetjs/parquet-thrift/index.js +43 -43
  113. package/dist/es5/parquetjs/parser/decoders.js +391 -218
  114. package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
  115. package/dist/es5/parquetjs/parser/parquet-cursor.js +180 -62
  116. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -1
  117. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +370 -125
  118. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -1
  119. package/dist/es5/parquetjs/parser/parquet-reader.js +320 -91
  120. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  121. package/dist/es5/parquetjs/schema/declare.js +11 -9
  122. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  123. package/dist/es5/parquetjs/schema/schema.js +87 -73
  124. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  125. package/dist/es5/parquetjs/schema/shred.js +96 -56
  126. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  127. package/dist/es5/parquetjs/schema/types.js +40 -39
  128. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  129. package/dist/es5/parquetjs/utils/buffer-utils.js +1 -1
  130. package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -1
  131. package/dist/es5/parquetjs/utils/file-utils.js +12 -8
  132. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  133. package/dist/es5/parquetjs/utils/read-utils.js +50 -22
  134. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -1
  135. package/dist/esm/parquet-loader.js +1 -1
  136. package/dist/esm/parquet-loader.js.map +1 -1
  137. package/dist/esm/parquet-writer.js +1 -1
  138. package/dist/esm/parquet-writer.js.map +1 -1
  139. package/package.json +5 -5
@@ -7,6 +7,10 @@ Object.defineProperty(exports, "__esModule", {
7
7
  });
8
8
  exports.ParquetSchema = void 0;
9
9
 
10
+ var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
11
+
12
+ var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
13
+
10
14
  var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
11
15
 
12
16
  var _codecs = require("../codecs");
@@ -17,8 +21,9 @@ var _shred = require("./shred");
17
21
 
18
22
  var _types = require("./types");
19
23
 
20
- class ParquetSchema {
21
- constructor(schema) {
24
+ var ParquetSchema = function () {
25
+ function ParquetSchema(schema) {
26
+ (0, _classCallCheck2.default)(this, ParquetSchema);
22
27
  (0, _defineProperty2.default)(this, "schema", void 0);
23
28
  (0, _defineProperty2.default)(this, "fields", void 0);
24
29
  (0, _defineProperty2.default)(this, "fieldList", void 0);
@@ -27,66 +32,74 @@ class ParquetSchema {
27
32
  this.fieldList = listFields(this.fields);
28
33
  }
29
34
 
30
- findField(path) {
31
- if (typeof path === 'string') {
32
- path = path.split(',');
33
- } else {
34
- path = path.slice(0);
35
- }
36
-
37
- let n = this.fields;
38
-
39
- for (; path.length > 1; path.shift()) {
40
- n = n[path[0]].fields;
41
- }
42
-
43
- return n[path[0]];
44
- }
45
-
46
- findFieldBranch(path) {
47
- if (typeof path === 'string') {
48
- path = path.split(',');
49
- }
50
-
51
- const branch = [];
52
- let n = this.fields;
35
+ (0, _createClass2.default)(ParquetSchema, [{
36
+ key: "findField",
37
+ value: function findField(path) {
38
+ if (typeof path === 'string') {
39
+ path = path.split(',');
40
+ } else {
41
+ path = path.slice(0);
42
+ }
53
43
 
54
- for (; path.length > 0; path.shift()) {
55
- branch.push(n[path[0]]);
44
+ var n = this.fields;
56
45
 
57
- if (path.length > 1) {
46
+ for (; path.length > 1; path.shift()) {
58
47
  n = n[path[0]].fields;
59
48
  }
60
- }
61
-
62
- return branch;
63
- }
64
49
 
65
- shredRecord(record, buffer) {
66
- (0, _shred.shredRecord)(this, record, buffer);
67
- }
50
+ return n[path[0]];
51
+ }
52
+ }, {
53
+ key: "findFieldBranch",
54
+ value: function findFieldBranch(path) {
55
+ if (typeof path === 'string') {
56
+ path = path.split(',');
57
+ }
68
58
 
69
- materializeRecords(buffer) {
70
- return (0, _shred.materializeRecords)(this, buffer);
71
- }
59
+ var branch = [];
60
+ var n = this.fields;
72
61
 
73
- compress(type) {
74
- setCompress(this.schema, type);
75
- setCompress(this.fields, type);
76
- return this;
77
- }
62
+ for (; path.length > 0; path.shift()) {
63
+ branch.push(n[path[0]]);
78
64
 
79
- buffer() {
80
- return (0, _shred.shredBuffer)(this);
81
- }
65
+ if (path.length > 1) {
66
+ n = n[path[0]].fields;
67
+ }
68
+ }
82
69
 
83
- }
70
+ return branch;
71
+ }
72
+ }, {
73
+ key: "shredRecord",
74
+ value: function shredRecord(record, buffer) {
75
+ (0, _shred.shredRecord)(this, record, buffer);
76
+ }
77
+ }, {
78
+ key: "materializeRecords",
79
+ value: function materializeRecords(buffer) {
80
+ return (0, _shred.materializeRecords)(this, buffer);
81
+ }
82
+ }, {
83
+ key: "compress",
84
+ value: function compress(type) {
85
+ setCompress(this.schema, type);
86
+ setCompress(this.fields, type);
87
+ return this;
88
+ }
89
+ }, {
90
+ key: "buffer",
91
+ value: function buffer() {
92
+ return (0, _shred.shredBuffer)(this);
93
+ }
94
+ }]);
95
+ return ParquetSchema;
96
+ }();
84
97
 
85
98
  exports.ParquetSchema = ParquetSchema;
86
99
 
87
100
  function setCompress(schema, type) {
88
- for (const name in schema) {
89
- const node = schema[name];
101
+ for (var name in schema) {
102
+ var node = schema[name];
90
103
 
91
104
  if (node.fields) {
92
105
  setCompress(node.fields, type);
@@ -97,15 +110,15 @@ function setCompress(schema, type) {
97
110
  }
98
111
 
99
112
  function buildFields(schema, rLevelParentMax, dLevelParentMax, path) {
100
- const fieldList = {};
113
+ var fieldList = {};
101
114
 
102
- for (const name in schema) {
103
- const opts = schema[name];
104
- const required = !opts.optional;
105
- const repeated = Boolean(opts.repeated);
106
- let rLevelMax = rLevelParentMax;
107
- let dLevelMax = dLevelParentMax;
108
- let repetitionType = 'REQUIRED';
115
+ for (var name in schema) {
116
+ var opts = schema[name];
117
+ var required = !opts.optional;
118
+ var repeated = Boolean(opts.repeated);
119
+ var rLevelMax = rLevelParentMax;
120
+ var dLevelMax = dLevelParentMax;
121
+ var repetitionType = 'REQUIRED';
109
122
 
110
123
  if (!required) {
111
124
  repetitionType = 'OPTIONAL';
@@ -119,22 +132,23 @@ function buildFields(schema, rLevelParentMax, dLevelParentMax, path) {
119
132
  }
120
133
 
121
134
  if (opts.fields) {
122
- const cpath = path.concat([name]);
135
+ var _cpath = path.concat([name]);
136
+
123
137
  fieldList[name] = {
124
- name,
125
- path: cpath,
126
- key: cpath.join(),
127
- repetitionType,
128
- rLevelMax,
129
- dLevelMax,
138
+ name: name,
139
+ path: _cpath,
140
+ key: _cpath.join(),
141
+ repetitionType: repetitionType,
142
+ rLevelMax: rLevelMax,
143
+ dLevelMax: dLevelMax,
130
144
  isNested: true,
131
145
  fieldCount: Object.keys(opts.fields).length,
132
- fields: buildFields(opts.fields, rLevelMax, dLevelMax, cpath)
146
+ fields: buildFields(opts.fields, rLevelMax, dLevelMax, _cpath)
133
147
  };
134
148
  continue;
135
149
  }
136
150
 
137
- const typeDef = _types.PARQUET_LOGICAL_TYPES[opts.type];
151
+ var typeDef = _types.PARQUET_LOGICAL_TYPES[opts.type];
138
152
 
139
153
  if (!typeDef) {
140
154
  throw new Error("invalid parquet type: ".concat(opts.type));
@@ -152,21 +166,21 @@ function buildFields(schema, rLevelParentMax, dLevelParentMax, path) {
152
166
  throw new Error("unsupported compression method: ".concat(opts.compression));
153
167
  }
154
168
 
155
- const cpath = path.concat([name]);
169
+ var cpath = path.concat([name]);
156
170
  fieldList[name] = {
157
- name,
171
+ name: name,
158
172
  primitiveType: typeDef.primitiveType,
159
173
  originalType: typeDef.originalType,
160
174
  path: cpath,
161
175
  key: cpath.join(),
162
- repetitionType,
176
+ repetitionType: repetitionType,
163
177
  encoding: opts.encoding,
164
178
  compression: opts.compression,
165
179
  typeLength: opts.typeLength || typeDef.typeLength,
166
180
  presision: opts.presision,
167
181
  scale: opts.scale,
168
- rLevelMax,
169
- dLevelMax
182
+ rLevelMax: rLevelMax,
183
+ dLevelMax: dLevelMax
170
184
  };
171
185
  }
172
186
 
@@ -174,9 +188,9 @@ function buildFields(schema, rLevelParentMax, dLevelParentMax, path) {
174
188
  }
175
189
 
176
190
  function listFields(fields) {
177
- let list = [];
191
+ var list = [];
178
192
 
179
- for (const k in fields) {
193
+ for (var k in fields) {
180
194
  list.push(fields[k]);
181
195
 
182
196
  if (fields[k].isNested) {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../src/parquetjs/schema/schema.ts"],"names":["ParquetSchema","constructor","schema","fields","buildFields","fieldList","listFields","findField","path","split","slice","n","length","shift","findFieldBranch","branch","push","shredRecord","record","buffer","materializeRecords","compress","type","setCompress","name","node","compression","rLevelParentMax","dLevelParentMax","opts","required","optional","repeated","Boolean","rLevelMax","dLevelMax","repetitionType","cpath","concat","key","join","isNested","fieldCount","Object","keys","typeDef","PARQUET_LOGICAL_TYPES","Error","encoding","PARQUET_CODECS","PARQUET_COMPRESSION_METHODS","primitiveType","originalType","typeLength","presision","scale","list","k"],"mappings":";;;;;;;;;;;AAEA;;AACA;;AAUA;;AACA;;AAKO,MAAMA,aAAN,CAAoB;AAQzBC,EAAAA,WAAW,CAACC,MAAD,EAA2B;AAAA;AAAA;AAAA;AACpC,SAAKA,MAAL,GAAcA,MAAd;AACA,SAAKC,MAAL,GAAcC,WAAW,CAACF,MAAD,EAAS,CAAT,EAAY,CAAZ,EAAe,EAAf,CAAzB;AACA,SAAKG,SAAL,GAAiBC,UAAU,CAAC,KAAKH,MAAN,CAA3B;AACD;;AAKDI,EAAAA,SAAS,CAACC,IAAD,EAAwC;AAC/C,QAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAE5BA,MAAAA,IAAI,GAAGA,IAAI,CAACC,KAAL,CAAW,GAAX,CAAP;AACD,KAHD,MAGO;AAELD,MAAAA,IAAI,GAAGA,IAAI,CAACE,KAAL,CAAW,CAAX,CAAP;AACD;;AAED,QAAIC,CAAC,GAAG,KAAKR,MAAb;;AACA,WAAOK,IAAI,CAACI,MAAL,GAAc,CAArB,EAAwBJ,IAAI,CAACK,KAAL,EAAxB,EAAsC;AACpCF,MAAAA,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAD,CAAWL,MAAf;AACD;;AAED,WAAOQ,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAR;AACD;;AAKDM,EAAAA,eAAe,CAACN,IAAD,EAA0C;AACvD,QAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAE5BA,MAAAA,IAAI,GAAGA,IAAI,CAACC,KAAL,CAAW,GAAX,CAAP;AACD;;AACD,UAAMM,MAAsB,GAAG,EAA/B;AACA,QAAIJ,CAAC,GAAG,KAAKR,MAAb;;AACA,WAAOK,IAAI,CAACI,MAAL,GAAc,CAArB,EAAwBJ,IAAI,CAACK,KAAL,EAAxB,EAAsC;AACpCE,MAAAA,MAAM,CAACC,IAAP,CAAYL,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAb;;AACA,UAAIA,IAAI,CAACI,MAAL,GAAc,CAAlB,EAAqB;AACnBD,QAAAA,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAD,CAAWL,MAAf;AACD;AACF;;AACD,WAAOY,MAAP;AACD;;AAEDE,EAAAA,WAAW,CAACC,MAAD,EAAwBC,MAAxB,EAAqD;AAC9D,4BAAY,IAAZ,EAAkBD,MAAlB,EAA0BC,MAA1B;AACD;;AAEDC,EAAAA,kBAAkB,CAACD,MAAD,EAAyC;AACzD,WAAO,+BAAmB,IAAnB,EAAyBA,MAAzB,CAAP;AACD;;AAEDE,EAAAA,QAAQ,CAACC,IAAD,EAAiC;AACvCC,IAAAA,WAAW,CAAC,KAAKrB,MAAN,EAAcoB,IAAd,CAAX;AACAC,IAAAA,WAAW,CAAC,KAAKpB,MAAN,EAAcmB,IAAd,CAAX;AACA,WAAO,IAAP;AACD;;AAEDH,EAAAA,MAAM,GAAkB;AACtB,WAAO,wBAAY,IAAZ,CAAP;AACD;;AArEwB;;;;AAwE3B,SAASI,WAAT,CAAqBrB,MAArB,EAAkCoB,IAAlC,EAA4D;AAC1D,OAAK,MAAME,IAAX,IAAmBtB,MAAnB,EAA2B;AACzB,UAAMuB,IAAI,GAAGvB,MAAM,CAACsB,IAAD,CAAnB;;AACA,QAAIC,IAAI,CAACtB,MAAT,EAAiB;AACfoB,MAAAA,WAAW,CAACE,IAAI,CAACtB,MAAN,EAAcmB,IAAd,CAAX;AACD,KAFD,MAEO;AACLG,MAAAA,IAAI,CAACC,WAAL,GAAmBJ,IAAnB;AACD;AACF;AACF;;AAGD,SAASlB,WAAT,CACEF,MADF,EAEEyB,eAFF,EAGEC,eAHF,EAIEpB,IAJF,EAKgC;AAC9B,QAAMH,SAAuC,GAAG,EAAhD;;AAEA,OAAK,MAAMmB,IAAX,IAAmBtB,MAAnB,EAA2B;AACzB,UAAM2B,IAAI,GAAG3B,MAAM,CAACsB,IAAD,CAAnB;AAGA,UAAMM,QAAQ,GAAG,CAACD,IAAI,CAACE,QAAvB;AACA,UAAMC,QAAQ,GAAGC,OAAO,CAACJ,IAAI,CAACG,QAAN,CAAxB;AACA,QAAIE,SAAS,GAAGP,eAAhB;AACA,QAAIQ,SAAS,GAAGP,eAAhB;AAEA,QAAIQ,cAA8B,GAAG,UAArC;;AACA,QAAI,CAACN,QAAL,EAAe;AACbM,MAAAA,cAAc,GAAG,UAAjB;AACAD,MAAAA,SAAS;AACV;;AACD,QAAIH,QAAJ,EAAc;AACZI,MAAAA,cAAc,GAAG,UAAjB;AACAF,MAAAA,SAAS;AACT,UAAIJ,QAAJ,EAAcK,SAAS;AACxB;;AAGD,QAAIN,IAAI,CAAC1B,MAAT,EAAiB;AACf,YAAMkC,KAAK,GAAG7B,IAAI,CAAC8B,MAAL,CAAY,CAACd,IAAD,CAAZ,CAAd;AACAnB,MAAAA,SAAS,CAACmB,IAAD,CAAT,GAAkB;AAChBA,QAAAA,IADgB;AAEhBhB,QAAAA,IAAI,EAAE6B,KAFU;AAGhBE,QAAAA,GAAG,EAAEF,KAAK,CAACG,IAAN,EAHW;AAIhBJ,QAAAA,cAJgB;AAKhBF,QAAAA,SALgB;AAMhBC,QAAAA,SANgB;AAOhBM,QAAAA,QAAQ,EAAE,IAPM;AAQhBC,QAAAA,UAAU,EAAEC,MAAM,CAACC,IAAP,CAAYf,IAAI,CAAC1B,MAAjB,EAAyBS,MARrB;AAShBT,QAAAA,MAAM,EAAEC,WAAW,CAACyB,IAAI,CAAC1B,MAAN,EAAc+B,SAAd,EAAyBC,SAAzB,EAAoCE,KAApC;AATH,OAAlB;AAWA;AACD;;AAED,UAAMQ,OAAY,GAAGC,6BAAsBjB,IAAI,CAACP,IAA3B,CAArB;;AACA,QAAI,CAACuB,OAAL,EAAc;AACZ,YAAM,IAAIE,KAAJ,iCAAmClB,IAAI,CAACP,IAAxC,EAAN;AACD;;AAEDO,IAAAA,IAAI,CAACmB,QAAL,GAAgBnB,IAAI,CAACmB,QAAL,IAAiB,OAAjC;;AACA,QAAI,EAAEnB,IAAI,CAACmB,QAAL,IAAiBC,sBAAnB,CAAJ,EAAwC;AACtC,YAAM,IAAIF,KAAJ,yCAA2ClB,IAAI,CAACmB,QAAhD,EAAN;AACD;;AAEDnB,IAAAA,IAAI,CAACH,WAAL,GAAmBG,IAAI,CAACH,WAAL,IAAoB,cAAvC;;AACA,QAAI,EAAEG,IAAI,CAACH,WAAL,IAAoBwB,wCAAtB,CAAJ,EAAwD;AACtD,YAAM,IAAIH,KAAJ,2CAA6ClB,IAAI,CAACH,WAAlD,EAAN;AACD;;AAGD,UAAMW,KAAK,GAAG7B,IAAI,CAAC8B,MAAL,CAAY,CAACd,IAAD,CAAZ,CAAd;AACAnB,IAAAA,SAAS,CAACmB,IAAD,CAAT,GAAkB;AAChBA,MAAAA,IADgB;AAEhB2B,MAAAA,aAAa,EAAEN,OAAO,CAACM,aAFP;AAGhBC,MAAAA,YAAY,EAAEP,OAAO,CAACO,YAHN;AAIhB5C,MAAAA,IAAI,EAAE6B,KAJU;AAKhBE,MAAAA,GAAG,EAAEF,KAAK,CAACG,IAAN,EALW;AAMhBJ,MAAAA,cANgB;AAOhBY,MAAAA,QAAQ,EAAEnB,IAAI,CAACmB,QAPC;AAQhBtB,MAAAA,WAAW,EAAEG,IAAI,CAACH,WARF;AAShB2B,MAAAA,UAAU,EAAExB,IAAI,CAACwB,UAAL,IAAmBR,OAAO,CAACQ,UATvB;AAUhBC,MAAAA,SAAS,EAAEzB,IAAI,CAACyB,SAVA;AAWhBC,MAAAA,KAAK,EAAE1B,IAAI,CAAC0B,KAXI;AAYhBrB,MAAAA,SAZgB;AAahBC,MAAAA;AAbgB,KAAlB;AAeD;;AACD,SAAO9B,SAAP;AACD;;AAED,SAASC,UAAT,CAAoBH,MAApB,EAA0E;AACxE,MAAIqD,IAAoB,GAAG,EAA3B;;AACA,OAAK,MAAMC,CAAX,IAAgBtD,MAAhB,EAAwB;AACtBqD,IAAAA,IAAI,CAACxC,IAAL,CAAUb,MAAM,CAACsD,CAAD,CAAhB;;AACA,QAAItD,MAAM,CAACsD,CAAD,CAAN,CAAUhB,QAAd,EAAwB;AACtBe,MAAAA,IAAI,GAAGA,IAAI,CAAClB,MAAL,CAAYhC,UAAU,CAACH,MAAM,CAACsD,CAAD,CAAN,CAAUtD,MAAX,CAAtB,CAAP;AACD;AACF;;AACD,SAAOqD,IAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {PARQUET_CODECS} from '../codecs';\nimport {PARQUET_COMPRESSION_METHODS} from '../compression';\nimport {\n FieldDefinition,\n ParquetBuffer,\n ParquetCompression,\n ParquetField,\n ParquetRecord,\n RepetitionType,\n SchemaDefinition\n} from './declare';\nimport {materializeRecords, shredBuffer, shredRecord} from './shred';\nimport {PARQUET_LOGICAL_TYPES} from './types';\n\n/**\n * A parquet file schema\n */\nexport class ParquetSchema {\n public schema: Record<string, FieldDefinition>;\n public fields: Record<string, ParquetField>;\n public fieldList: ParquetField[];\n\n /**\n * Create a new schema from a JSON schema definition\n */\n constructor(schema: SchemaDefinition) {\n this.schema = schema;\n this.fields = buildFields(schema, 0, 0, []);\n this.fieldList = listFields(this.fields);\n }\n\n /**\n * Retrieve a field definition\n */\n findField(path: string | string[]): ParquetField {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n } else {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.slice(0); // clone array\n }\n\n let n = this.fields;\n for (; path.length > 1; path.shift()) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n\n return n[path[0]];\n }\n\n /**\n * Retrieve a field definition and all the field's ancestors\n */\n findFieldBranch(path: string | string[]): ParquetField[] {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n }\n const branch: ParquetField[] = [];\n let n = this.fields;\n for (; path.length > 0; path.shift()) {\n branch.push(n[path[0]]);\n if (path.length > 1) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n }\n return branch;\n }\n\n shredRecord(record: ParquetRecord, buffer: ParquetBuffer): void {\n shredRecord(this, record, buffer);\n }\n\n materializeRecords(buffer: ParquetBuffer): ParquetRecord[] {\n return materializeRecords(this, buffer);\n }\n\n compress(type: ParquetCompression): this {\n setCompress(this.schema, type);\n setCompress(this.fields, type);\n return this;\n }\n\n buffer(): ParquetBuffer {\n return shredBuffer(this);\n }\n}\n\nfunction setCompress(schema: any, type: ParquetCompression) {\n for (const name in schema) {\n const node = schema[name];\n if (node.fields) {\n setCompress(node.fields, type);\n } else {\n node.compression = type;\n }\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction buildFields(\n schema: SchemaDefinition,\n rLevelParentMax: number,\n dLevelParentMax: number,\n path: string[]\n): Record<string, ParquetField> {\n const fieldList: Record<string, ParquetField> = {};\n\n for (const name in schema) {\n const opts = schema[name];\n\n /* field repetition type */\n const required = !opts.optional;\n const repeated = Boolean(opts.repeated);\n let rLevelMax = rLevelParentMax;\n let dLevelMax = dLevelParentMax;\n\n let repetitionType: RepetitionType = 'REQUIRED';\n if (!required) {\n repetitionType = 'OPTIONAL';\n dLevelMax++;\n }\n if (repeated) {\n repetitionType = 'REPEATED';\n rLevelMax++;\n if (required) dLevelMax++;\n }\n\n /* nested field */\n if (opts.fields) {\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n rLevelMax,\n dLevelMax,\n isNested: true,\n fieldCount: Object.keys(opts.fields).length,\n fields: buildFields(opts.fields, rLevelMax, dLevelMax, cpath)\n };\n continue; // eslint-disable-line no-continue\n }\n\n const typeDef: any = PARQUET_LOGICAL_TYPES[opts.type!];\n if (!typeDef) {\n throw new Error(`invalid parquet type: ${opts.type}`);\n }\n\n opts.encoding = opts.encoding || 'PLAIN';\n if (!(opts.encoding in PARQUET_CODECS)) {\n throw new Error(`unsupported parquet encoding: ${opts.encoding}`);\n }\n\n opts.compression = opts.compression || 'UNCOMPRESSED';\n if (!(opts.compression in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`unsupported compression method: ${opts.compression}`);\n }\n\n /* add to schema */\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n primitiveType: typeDef.primitiveType,\n originalType: typeDef.originalType,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n encoding: opts.encoding,\n compression: opts.compression,\n typeLength: opts.typeLength || typeDef.typeLength,\n presision: opts.presision,\n scale: opts.scale,\n rLevelMax,\n dLevelMax\n };\n }\n return fieldList;\n}\n\nfunction listFields(fields: Record<string, ParquetField>): ParquetField[] {\n let list: ParquetField[] = [];\n for (const k in fields) {\n list.push(fields[k]);\n if (fields[k].isNested) {\n list = list.concat(listFields(fields[k].fields!));\n }\n }\n return list;\n}\n"],"file":"schema.js"}
1
+ {"version":3,"sources":["../../../../src/parquetjs/schema/schema.ts"],"names":["ParquetSchema","schema","fields","buildFields","fieldList","listFields","path","split","slice","n","length","shift","branch","push","record","buffer","type","setCompress","name","node","compression","rLevelParentMax","dLevelParentMax","opts","required","optional","repeated","Boolean","rLevelMax","dLevelMax","repetitionType","cpath","concat","key","join","isNested","fieldCount","Object","keys","typeDef","PARQUET_LOGICAL_TYPES","Error","encoding","PARQUET_CODECS","PARQUET_COMPRESSION_METHODS","primitiveType","originalType","typeLength","presision","scale","list","k"],"mappings":";;;;;;;;;;;;;;;AAEA;;AACA;;AAUA;;AACA;;IAKaA,a;AAQX,yBAAYC,MAAZ,EAAsC;AAAA;AAAA;AAAA;AAAA;AACpC,SAAKA,MAAL,GAAcA,MAAd;AACA,SAAKC,MAAL,GAAcC,WAAW,CAACF,MAAD,EAAS,CAAT,EAAY,CAAZ,EAAe,EAAf,CAAzB;AACA,SAAKG,SAAL,GAAiBC,UAAU,CAAC,KAAKH,MAAN,CAA3B;AACD;;;;WAKD,mBAAUI,IAAV,EAAiD;AAC/C,UAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAE5BA,QAAAA,IAAI,GAAGA,IAAI,CAACC,KAAL,CAAW,GAAX,CAAP;AACD,OAHD,MAGO;AAELD,QAAAA,IAAI,GAAGA,IAAI,CAACE,KAAL,CAAW,CAAX,CAAP;AACD;;AAED,UAAIC,CAAC,GAAG,KAAKP,MAAb;;AACA,aAAOI,IAAI,CAACI,MAAL,GAAc,CAArB,EAAwBJ,IAAI,CAACK,KAAL,EAAxB,EAAsC;AACpCF,QAAAA,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAD,CAAWJ,MAAf;AACD;;AAED,aAAOO,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAR;AACD;;;WAKD,yBAAgBA,IAAhB,EAAyD;AACvD,UAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAE5BA,QAAAA,IAAI,GAAGA,IAAI,CAACC,KAAL,CAAW,GAAX,CAAP;AACD;;AACD,UAAMK,MAAsB,GAAG,EAA/B;AACA,UAAIH,CAAC,GAAG,KAAKP,MAAb;;AACA,aAAOI,IAAI,CAACI,MAAL,GAAc,CAArB,EAAwBJ,IAAI,CAACK,KAAL,EAAxB,EAAsC;AACpCC,QAAAA,MAAM,CAACC,IAAP,CAAYJ,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAb;;AACA,YAAIA,IAAI,CAACI,MAAL,GAAc,CAAlB,EAAqB;AACnBD,UAAAA,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAD,CAAWJ,MAAf;AACD;AACF;;AACD,aAAOU,MAAP;AACD;;;WAED,qBAAYE,MAAZ,EAAmCC,MAAnC,EAAgE;AAC9D,8BAAY,IAAZ,EAAkBD,MAAlB,EAA0BC,MAA1B;AACD;;;WAED,4BAAmBA,MAAnB,EAA2D;AACzD,aAAO,+BAAmB,IAAnB,EAAyBA,MAAzB,CAAP;AACD;;;WAED,kBAASC,IAAT,EAAyC;AACvCC,MAAAA,WAAW,CAAC,KAAKhB,MAAN,EAAce,IAAd,CAAX;AACAC,MAAAA,WAAW,CAAC,KAAKf,MAAN,EAAcc,IAAd,CAAX;AACA,aAAO,IAAP;AACD;;;WAED,kBAAwB;AACtB,aAAO,wBAAY,IAAZ,CAAP;AACD;;;;;;;AAGH,SAASC,WAAT,CAAqBhB,MAArB,EAAkCe,IAAlC,EAA4D;AAC1D,OAAK,IAAME,IAAX,IAAmBjB,MAAnB,EAA2B;AACzB,QAAMkB,IAAI,GAAGlB,MAAM,CAACiB,IAAD,CAAnB;;AACA,QAAIC,IAAI,CAACjB,MAAT,EAAiB;AACfe,MAAAA,WAAW,CAACE,IAAI,CAACjB,MAAN,EAAcc,IAAd,CAAX;AACD,KAFD,MAEO;AACLG,MAAAA,IAAI,CAACC,WAAL,GAAmBJ,IAAnB;AACD;AACF;AACF;;AAGD,SAASb,WAAT,CACEF,MADF,EAEEoB,eAFF,EAGEC,eAHF,EAIEhB,IAJF,EAKgC;AAC9B,MAAMF,SAAuC,GAAG,EAAhD;;AAEA,OAAK,IAAMc,IAAX,IAAmBjB,MAAnB,EAA2B;AACzB,QAAMsB,IAAI,GAAGtB,MAAM,CAACiB,IAAD,CAAnB;AAGA,QAAMM,QAAQ,GAAG,CAACD,IAAI,CAACE,QAAvB;AACA,QAAMC,QAAQ,GAAGC,OAAO,CAACJ,IAAI,CAACG,QAAN,CAAxB;AACA,QAAIE,SAAS,GAAGP,eAAhB;AACA,QAAIQ,SAAS,GAAGP,eAAhB;AAEA,QAAIQ,cAA8B,GAAG,UAArC;;AACA,QAAI,CAACN,QAAL,EAAe;AACbM,MAAAA,cAAc,GAAG,UAAjB;AACAD,MAAAA,SAAS;AACV;;AACD,QAAIH,QAAJ,EAAc;AACZI,MAAAA,cAAc,GAAG,UAAjB;AACAF,MAAAA,SAAS;AACT,UAAIJ,QAAJ,EAAcK,SAAS;AACxB;;AAGD,QAAIN,IAAI,CAACrB,MAAT,EAAiB;AACf,UAAM6B,MAAK,GAAGzB,IAAI,CAAC0B,MAAL,CAAY,CAACd,IAAD,CAAZ,CAAd;;AACAd,MAAAA,SAAS,CAACc,IAAD,CAAT,GAAkB;AAChBA,QAAAA,IAAI,EAAJA,IADgB;AAEhBZ,QAAAA,IAAI,EAAEyB,MAFU;AAGhBE,QAAAA,GAAG,EAAEF,MAAK,CAACG,IAAN,EAHW;AAIhBJ,QAAAA,cAAc,EAAdA,cAJgB;AAKhBF,QAAAA,SAAS,EAATA,SALgB;AAMhBC,QAAAA,SAAS,EAATA,SANgB;AAOhBM,QAAAA,QAAQ,EAAE,IAPM;AAQhBC,QAAAA,UAAU,EAAEC,MAAM,CAACC,IAAP,CAAYf,IAAI,CAACrB,MAAjB,EAAyBQ,MARrB;AAShBR,QAAAA,MAAM,EAAEC,WAAW,CAACoB,IAAI,CAACrB,MAAN,EAAc0B,SAAd,EAAyBC,SAAzB,EAAoCE,MAApC;AATH,OAAlB;AAWA;AACD;;AAED,QAAMQ,OAAY,GAAGC,6BAAsBjB,IAAI,CAACP,IAA3B,CAArB;;AACA,QAAI,CAACuB,OAAL,EAAc;AACZ,YAAM,IAAIE,KAAJ,iCAAmClB,IAAI,CAACP,IAAxC,EAAN;AACD;;AAEDO,IAAAA,IAAI,CAACmB,QAAL,GAAgBnB,IAAI,CAACmB,QAAL,IAAiB,OAAjC;;AACA,QAAI,EAAEnB,IAAI,CAACmB,QAAL,IAAiBC,sBAAnB,CAAJ,EAAwC;AACtC,YAAM,IAAIF,KAAJ,yCAA2ClB,IAAI,CAACmB,QAAhD,EAAN;AACD;;AAEDnB,IAAAA,IAAI,CAACH,WAAL,GAAmBG,IAAI,CAACH,WAAL,IAAoB,cAAvC;;AACA,QAAI,EAAEG,IAAI,CAACH,WAAL,IAAoBwB,wCAAtB,CAAJ,EAAwD;AACtD,YAAM,IAAIH,KAAJ,2CAA6ClB,IAAI,CAACH,WAAlD,EAAN;AACD;;AAGD,QAAMW,KAAK,GAAGzB,IAAI,CAAC0B,MAAL,CAAY,CAACd,IAAD,CAAZ,CAAd;AACAd,IAAAA,SAAS,CAACc,IAAD,CAAT,GAAkB;AAChBA,MAAAA,IAAI,EAAJA,IADgB;AAEhB2B,MAAAA,aAAa,EAAEN,OAAO,CAACM,aAFP;AAGhBC,MAAAA,YAAY,EAAEP,OAAO,CAACO,YAHN;AAIhBxC,MAAAA,IAAI,EAAEyB,KAJU;AAKhBE,MAAAA,GAAG,EAAEF,KAAK,CAACG,IAAN,EALW;AAMhBJ,MAAAA,cAAc,EAAdA,cANgB;AAOhBY,MAAAA,QAAQ,EAAEnB,IAAI,CAACmB,QAPC;AAQhBtB,MAAAA,WAAW,EAAEG,IAAI,CAACH,WARF;AAShB2B,MAAAA,UAAU,EAAExB,IAAI,CAACwB,UAAL,IAAmBR,OAAO,CAACQ,UATvB;AAUhBC,MAAAA,SAAS,EAAEzB,IAAI,CAACyB,SAVA;AAWhBC,MAAAA,KAAK,EAAE1B,IAAI,CAAC0B,KAXI;AAYhBrB,MAAAA,SAAS,EAATA,SAZgB;AAahBC,MAAAA,SAAS,EAATA;AAbgB,KAAlB;AAeD;;AACD,SAAOzB,SAAP;AACD;;AAED,SAASC,UAAT,CAAoBH,MAApB,EAA0E;AACxE,MAAIgD,IAAoB,GAAG,EAA3B;;AACA,OAAK,IAAMC,CAAX,IAAgBjD,MAAhB,EAAwB;AACtBgD,IAAAA,IAAI,CAACrC,IAAL,CAAUX,MAAM,CAACiD,CAAD,CAAhB;;AACA,QAAIjD,MAAM,CAACiD,CAAD,CAAN,CAAUhB,QAAd,EAAwB;AACtBe,MAAAA,IAAI,GAAGA,IAAI,CAAClB,MAAL,CAAY3B,UAAU,CAACH,MAAM,CAACiD,CAAD,CAAN,CAAUjD,MAAX,CAAtB,CAAP;AACD;AACF;;AACD,SAAOgD,IAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {PARQUET_CODECS} from '../codecs';\nimport {PARQUET_COMPRESSION_METHODS} from '../compression';\nimport {\n FieldDefinition,\n ParquetBuffer,\n ParquetCompression,\n ParquetField,\n ParquetRecord,\n RepetitionType,\n SchemaDefinition\n} from './declare';\nimport {materializeRecords, shredBuffer, shredRecord} from './shred';\nimport {PARQUET_LOGICAL_TYPES} from './types';\n\n/**\n * A parquet file schema\n */\nexport class ParquetSchema {\n public schema: Record<string, FieldDefinition>;\n public fields: Record<string, ParquetField>;\n public fieldList: ParquetField[];\n\n /**\n * Create a new schema from a JSON schema definition\n */\n constructor(schema: SchemaDefinition) {\n this.schema = schema;\n this.fields = buildFields(schema, 0, 0, []);\n this.fieldList = listFields(this.fields);\n }\n\n /**\n * Retrieve a field definition\n */\n findField(path: string | string[]): ParquetField {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n } else {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.slice(0); // clone array\n }\n\n let n = this.fields;\n for (; path.length > 1; path.shift()) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n\n return n[path[0]];\n }\n\n /**\n * Retrieve a field definition and all the field's ancestors\n */\n findFieldBranch(path: string | string[]): ParquetField[] {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n }\n const branch: ParquetField[] = [];\n let n = this.fields;\n for (; path.length > 0; path.shift()) {\n branch.push(n[path[0]]);\n if (path.length > 1) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n }\n return branch;\n }\n\n shredRecord(record: ParquetRecord, buffer: ParquetBuffer): void {\n shredRecord(this, record, buffer);\n }\n\n materializeRecords(buffer: ParquetBuffer): ParquetRecord[] {\n return materializeRecords(this, buffer);\n }\n\n compress(type: ParquetCompression): this {\n setCompress(this.schema, type);\n setCompress(this.fields, type);\n return this;\n }\n\n buffer(): ParquetBuffer {\n return shredBuffer(this);\n }\n}\n\nfunction setCompress(schema: any, type: ParquetCompression) {\n for (const name in schema) {\n const node = schema[name];\n if (node.fields) {\n setCompress(node.fields, type);\n } else {\n node.compression = type;\n }\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction buildFields(\n schema: SchemaDefinition,\n rLevelParentMax: number,\n dLevelParentMax: number,\n path: string[]\n): Record<string, ParquetField> {\n const fieldList: Record<string, ParquetField> = {};\n\n for (const name in schema) {\n const opts = schema[name];\n\n /* field repetition type */\n const required = !opts.optional;\n const repeated = Boolean(opts.repeated);\n let rLevelMax = rLevelParentMax;\n let dLevelMax = dLevelParentMax;\n\n let repetitionType: RepetitionType = 'REQUIRED';\n if (!required) {\n repetitionType = 'OPTIONAL';\n dLevelMax++;\n }\n if (repeated) {\n repetitionType = 'REPEATED';\n rLevelMax++;\n if (required) dLevelMax++;\n }\n\n /* nested field */\n if (opts.fields) {\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n rLevelMax,\n dLevelMax,\n isNested: true,\n fieldCount: Object.keys(opts.fields).length,\n fields: buildFields(opts.fields, rLevelMax, dLevelMax, cpath)\n };\n continue; // eslint-disable-line no-continue\n }\n\n const typeDef: any = PARQUET_LOGICAL_TYPES[opts.type!];\n if (!typeDef) {\n throw new Error(`invalid parquet type: ${opts.type}`);\n }\n\n opts.encoding = opts.encoding || 'PLAIN';\n if (!(opts.encoding in PARQUET_CODECS)) {\n throw new Error(`unsupported parquet encoding: ${opts.encoding}`);\n }\n\n opts.compression = opts.compression || 'UNCOMPRESSED';\n if (!(opts.compression in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`unsupported compression method: ${opts.compression}`);\n }\n\n /* add to schema */\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n primitiveType: typeDef.primitiveType,\n originalType: typeDef.originalType,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n encoding: opts.encoding,\n compression: opts.compression,\n typeLength: opts.typeLength || typeDef.typeLength,\n presision: opts.presision,\n scale: opts.scale,\n rLevelMax,\n dLevelMax\n };\n }\n return fieldList;\n}\n\nfunction listFields(fields: Record<string, ParquetField>): ParquetField[] {\n let list: ParquetField[] = [];\n for (const k in fields) {\n list.push(fields[k]);\n if (fields[k].isNested) {\n list = list.concat(listFields(fields[k].fields!));\n }\n }\n return list;\n}\n"],"file":"schema.js"}
@@ -1,5 +1,7 @@
1
1
  "use strict";
2
2
 
3
+ var _typeof = require("@babel/runtime/helpers/typeof");
4
+
3
5
  Object.defineProperty(exports, "__esModule", {
4
6
  value: true
5
7
  });
@@ -8,7 +10,7 @@ exports.shredRecord = shredRecord;
8
10
  exports.materializeRecords = materializeRecords;
9
11
  Object.defineProperty(exports, "ParquetBuffer", {
10
12
  enumerable: true,
11
- get: function () {
13
+ get: function get() {
12
14
  return _declare.ParquetBuffer;
13
15
  }
14
16
  });
@@ -17,31 +19,47 @@ var _declare = require("./declare");
17
19
 
18
20
  var Types = _interopRequireWildcard(require("./types"));
19
21
 
20
- function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function (nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
22
+ function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
23
+
24
+ function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
25
+
26
+ function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
27
+
28
+ function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
21
29
 
22
- function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || typeof obj !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
30
+ function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
23
31
 
24
32
  function shredBuffer(schema) {
25
- const columnData = {};
26
-
27
- for (const field of schema.fieldList) {
28
- columnData[field.key] = {
29
- dlevels: [],
30
- rlevels: [],
31
- values: [],
32
- pageHeaders: [],
33
- count: 0
34
- };
33
+ var columnData = {};
34
+
35
+ var _iterator = _createForOfIteratorHelper(schema.fieldList),
36
+ _step;
37
+
38
+ try {
39
+ for (_iterator.s(); !(_step = _iterator.n()).done;) {
40
+ var field = _step.value;
41
+ columnData[field.key] = {
42
+ dlevels: [],
43
+ rlevels: [],
44
+ values: [],
45
+ pageHeaders: [],
46
+ count: 0
47
+ };
48
+ }
49
+ } catch (err) {
50
+ _iterator.e(err);
51
+ } finally {
52
+ _iterator.f();
35
53
  }
36
54
 
37
55
  return {
38
56
  rowCount: 0,
39
- columnData
57
+ columnData: columnData
40
58
  };
41
59
  }
42
60
 
43
61
  function shredRecord(schema, record, buffer) {
44
- const data = shredBuffer(schema).columnData;
62
+ var data = shredBuffer(schema).columnData;
45
63
  shredRecordFields(schema.fields, record, data, 0, 0);
46
64
 
47
65
  if (buffer.rowCount === 0) {
@@ -52,18 +70,28 @@ function shredRecord(schema, record, buffer) {
52
70
 
53
71
  buffer.rowCount += 1;
54
72
 
55
- for (const field of schema.fieldList) {
56
- Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);
57
- Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);
58
- Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);
59
- buffer.columnData[field.key].count += data[field.key].count;
73
+ var _iterator2 = _createForOfIteratorHelper(schema.fieldList),
74
+ _step2;
75
+
76
+ try {
77
+ for (_iterator2.s(); !(_step2 = _iterator2.n()).done;) {
78
+ var field = _step2.value;
79
+ Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);
80
+ Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);
81
+ Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);
82
+ buffer.columnData[field.key].count += data[field.key].count;
83
+ }
84
+ } catch (err) {
85
+ _iterator2.e(err);
86
+ } finally {
87
+ _iterator2.f();
60
88
  }
61
89
  }
62
90
 
63
91
  function shredRecordFields(fields, record, data, rLevel, dLevel) {
64
- for (const name in fields) {
65
- const field = fields[name];
66
- let values = [];
92
+ for (var name in fields) {
93
+ var field = fields[name];
94
+ var values = [];
67
95
 
68
96
  if (record && field.name in record && record[field.name] !== undefined && record[field.name] !== null) {
69
97
  if (record[field.name].constructor === Array) {
@@ -93,8 +121,8 @@ function shredRecordFields(fields, record, data, rLevel, dLevel) {
93
121
  continue;
94
122
  }
95
123
 
96
- for (let i = 0; i < values.length; i++) {
97
- const rlvl = i === 0 ? rLevel : field.rLevelMax;
124
+ for (var i = 0; i < values.length; i++) {
125
+ var rlvl = i === 0 ? rLevel : field.rLevelMax;
98
126
 
99
127
  if (field.isNested) {
100
128
  shredRecordFields(field.fields, values[i], data, rlvl, field.dLevelMax);
@@ -109,11 +137,13 @@ function shredRecordFields(fields, record, data, rLevel, dLevel) {
109
137
  }
110
138
 
111
139
  function materializeRecords(schema, buffer) {
112
- const records = [];
140
+ var records = [];
113
141
 
114
- for (let i = 0; i < buffer.rowCount; i++) records.push({});
142
+ for (var i = 0; i < buffer.rowCount; i++) {
143
+ records.push({});
144
+ }
115
145
 
116
- for (const key in buffer.columnData) {
146
+ for (var key in buffer.columnData) {
117
147
  materializeColumn(schema, buffer, key, records);
118
148
  }
119
149
 
@@ -121,45 +151,55 @@ function materializeRecords(schema, buffer) {
121
151
  }
122
152
 
123
153
  function materializeColumn(schema, buffer, key, records) {
124
- const data = buffer.columnData[key];
154
+ var data = buffer.columnData[key];
125
155
  if (!data.count) return;
126
- const field = schema.findField(key);
127
- const branch = schema.findFieldBranch(key);
128
- const rLevels = new Array(field.rLevelMax + 1).fill(0);
129
- let vIndex = 0;
130
-
131
- for (let i = 0; i < data.count; i++) {
132
- const dLevel = data.dlevels[i];
133
- const rLevel = data.rlevels[i];
156
+ var field = schema.findField(key);
157
+ var branch = schema.findFieldBranch(key);
158
+ var rLevels = new Array(field.rLevelMax + 1).fill(0);
159
+ var vIndex = 0;
160
+
161
+ for (var i = 0; i < data.count; i++) {
162
+ var dLevel = data.dlevels[i];
163
+ var rLevel = data.rlevels[i];
134
164
  rLevels[rLevel]++;
135
165
  rLevels.fill(0, rLevel + 1);
136
- let rIndex = 0;
137
- let record = records[rLevels[rIndex++] - 1];
166
+ var rIndex = 0;
167
+ var record = records[rLevels[rIndex++] - 1];
138
168
 
139
- for (const step of branch) {
140
- if (step === field) break;
141
- if (dLevel < step.dLevelMax) break;
169
+ var _iterator3 = _createForOfIteratorHelper(branch),
170
+ _step3;
142
171
 
143
- if (step.repetitionType === 'REPEATED') {
144
- if (!(step.name in record)) {
145
- record[step.name] = [];
146
- }
172
+ try {
173
+ for (_iterator3.s(); !(_step3 = _iterator3.n()).done;) {
174
+ var step = _step3.value;
175
+ if (step === field) break;
176
+ if (dLevel < step.dLevelMax) break;
147
177
 
148
- const ix = rLevels[rIndex++];
178
+ if (step.repetitionType === 'REPEATED') {
179
+ if (!(step.name in record)) {
180
+ record[step.name] = [];
181
+ }
149
182
 
150
- while (record[step.name].length <= ix) {
151
- record[step.name].push({});
152
- }
183
+ var _ix = rLevels[rIndex++];
153
184
 
154
- record = record[step.name][ix];
155
- } else {
156
- record[step.name] = record[step.name] || {};
157
- record = record[step.name];
185
+ while (record[step.name].length <= _ix) {
186
+ record[step.name].push({});
187
+ }
188
+
189
+ record = record[step.name][_ix];
190
+ } else {
191
+ record[step.name] = record[step.name] || {};
192
+ record = record[step.name];
193
+ }
158
194
  }
195
+ } catch (err) {
196
+ _iterator3.e(err);
197
+ } finally {
198
+ _iterator3.f();
159
199
  }
160
200
 
161
201
  if (dLevel === field.dLevelMax) {
162
- const value = Types.fromPrimitive(field.originalType || field.primitiveType, data.values[vIndex], field);
202
+ var value = Types.fromPrimitive(field.originalType || field.primitiveType, data.values[vIndex], field);
163
203
  vIndex++;
164
204
 
165
205
  if (field.repetitionType === 'REPEATED') {
@@ -167,7 +207,7 @@ function materializeColumn(schema, buffer, key, records) {
167
207
  record[field.name] = [];
168
208
  }
169
209
 
170
- const ix = rLevels[rIndex];
210
+ var ix = rLevels[rIndex];
171
211
 
172
212
  while (record[field.name].length <= ix) {
173
213
  record[field.name].push(null);
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../src/parquetjs/schema/shred.ts"],"names":["shredBuffer","schema","columnData","field","fieldList","key","dlevels","rlevels","values","pageHeaders","count","rowCount","shredRecord","record","buffer","data","shredRecordFields","fields","Array","prototype","push","apply","rLevel","dLevel","name","undefined","constructor","length","Boolean","repetitionType","Error","isNested","i","rlvl","rLevelMax","dLevelMax","Types","toPrimitive","originalType","primitiveType","materializeRecords","records","materializeColumn","findField","branch","findFieldBranch","rLevels","fill","vIndex","rIndex","step","ix","value","fromPrimitive"],"mappings":";;;;;;;;;;;;;;;AAEA;;AAEA;;;;;;AAIO,SAASA,WAAT,CAAqBC,MAArB,EAA2D;AAChE,QAAMC,UAAuC,GAAG,EAAhD;;AACA,OAAK,MAAMC,KAAX,IAAoBF,MAAM,CAACG,SAA3B,EAAsC;AACpCF,IAAAA,UAAU,CAACC,KAAK,CAACE,GAAP,CAAV,GAAwB;AACtBC,MAAAA,OAAO,EAAE,EADa;AAEtBC,MAAAA,OAAO,EAAE,EAFa;AAGtBC,MAAAA,MAAM,EAAE,EAHc;AAItBC,MAAAA,WAAW,EAAE,EAJS;AAKtBC,MAAAA,KAAK,EAAE;AALe,KAAxB;AAOD;;AACD,SAAO;AAACC,IAAAA,QAAQ,EAAE,CAAX;AAAcT,IAAAA;AAAd,GAAP;AACD;;AAwBM,SAASU,WAAT,CAAqBX,MAArB,EAA4CY,MAA5C,EAAyDC,MAAzD,EAAsF;AAE3F,QAAMC,IAAI,GAAGf,WAAW,CAACC,MAAD,CAAX,CAAoBC,UAAjC;AAEAc,EAAAA,iBAAiB,CAACf,MAAM,CAACgB,MAAR,EAAgBJ,MAAhB,EAAwBE,IAAxB,EAA8B,CAA9B,EAAiC,CAAjC,CAAjB;;AAGA,MAAID,MAAM,CAACH,QAAP,KAAoB,CAAxB,EAA2B;AACzBG,IAAAA,MAAM,CAACH,QAAP,GAAkB,CAAlB;AACAG,IAAAA,MAAM,CAACZ,UAAP,GAAoBa,IAApB;AACA;AACD;;AACDD,EAAAA,MAAM,CAACH,QAAP,IAAmB,CAAnB;;AACA,OAAK,MAAMR,KAAX,IAAoBF,MAAM,CAACG,SAA3B,EAAsC;AACpCc,IAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACZ,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BE,OAAxD,EAAiEQ,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBE,OAAjF;AACAW,IAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACZ,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BC,OAAxD,EAAiES,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBC,OAAjF;AACAY,IAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACZ,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BG,MAAxD,EAAgEO,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBG,MAAhF;AACAM,IAAAA,MAAM,CAACZ,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BK,KAA7B,IAAsCK,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBK,KAAtD;AACD;AACF;;AAGD,SAASM,iBAAT,CACEC,MADF,EAEEJ,MAFF,EAGEE,IAHF,EAIEO,MAJF,EAKEC,MALF,EAME;AACA,OAAK,MAAMC,IAAX,IAAmBP,MAAnB,EAA2B;AACzB,UAAMd,KAAK,GAAGc,MAAM,CAACO,IAAD,CAApB;AAGA,QAAIhB,MAAa,GAAG,EAApB;;AACA,QACEK,MAAM,IACNV,KAAK,CAACqB,IAAN,IAAcX,MADd,IAEAA,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,KAAuBC,SAFvB,IAGAZ,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,KAAuB,IAJzB,EAKE;AACA,UAAIX,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,CAAmBE,WAAnB,KAAmCR,KAAvC,EAA8C;AAC5CV,QAAAA,MAAM,GAAGK,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAf;AACD,OAFD,MAEO;AACLhB,QAAAA,MAAM,CAACY,IAAP,CAAYP,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAlB;AACD;AACF;;AAED,QAAIhB,MAAM,CAACmB,MAAP,KAAkB,CAAlB,IAAuBC,OAAO,CAACf,MAAD,CAA9B,IAA0CV,KAAK,CAAC0B,cAAN,KAAyB,UAAvE,EAAmF;AACjF,YAAM,IAAIC,KAAJ,mCAAqC3B,KAAK,CAACqB,IAA3C,EAAN;AACD;;AACD,QAAIhB,MAAM,CAACmB,MAAP,GAAgB,CAAhB,IAAqBxB,KAAK,CAAC0B,cAAN,KAAyB,UAAlD,EAA8D;AAC5D,YAAM,IAAIC,KAAJ,sCAAwC3B,KAAK,CAACqB,IAA9C,EAAN;AACD;;AAGD,QAAIhB,MAAM,CAACmB,MAAP,KAAkB,CAAtB,EAAyB;AACvB,UAAIxB,KAAK,CAAC4B,QAAV,EAAoB;AAClBf,QAAAA,iBAAiB,CAACb,KAAK,CAACc,MAAP,EAAgB,IAAhB,EAAsBF,IAAtB,EAA4BO,MAA5B,EAAoCC,MAApC,CAAjB;AACD,OAFD,MAEO;AACLR,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBK,KAAhB,IAAyB,CAAzB;AACAK,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBE,OAAhB,CAAwBa,IAAxB,CAA6BE,MAA7B;AACAP,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBC,OAAhB,CAAwBc,IAAxB,CAA6BG,MAA7B;AACD;;AACD;AACD;;AAGD,SAAK,IAAIS,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGxB,MAAM,CAACmB,MAA3B,EAAmCK,CAAC,EAApC,EAAwC;AACtC,YAAMC,IAAI,GAAGD,CAAC,KAAK,CAAN,GAAUV,MAAV,GAAmBnB,KAAK,CAAC+B,SAAtC;;AACA,UAAI/B,KAAK,CAAC4B,QAAV,EAAoB;AAClBf,QAAAA,iBAAiB,CAACb,KAAK,CAACc,MAAP,EAAgBT,MAAM,CAACwB,CAAD,CAAtB,EAA2BjB,IAA3B,EAAiCkB,IAAjC,EAAuC9B,KAAK,CAACgC,SAA7C,CAAjB;AACD,OAFD,MAEO;AACLpB,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBK,KAAhB,IAAyB,CAAzB;AACAK,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBE,OAAhB,CAAwBa,IAAxB,CAA6Ba,IAA7B;AACAlB,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBC,OAAhB,CAAwBc,IAAxB,CAA6BjB,KAAK,CAACgC,SAAnC;AACApB,QAAAA,IAAI,CAACZ,KAAK,CAACE,GAAP,CAAJ,CAAgBG,MAAhB,CAAuBY,IAAvB,CACEgB,KAAK,CAACC,WAAN,CAAmBlC,KAAK,CAACmC,YAAN,IAAsBnC,KAAK,CAACoC,aAA/C,EAAgE/B,MAAM,CAACwB,CAAD,CAAtE,CADF;AAGD;AACF;AACF;AACF;;AAqBM,SAASQ,kBAAT,CAA4BvC,MAA5B,EAAmDa,MAAnD,EAA2F;AAChG,QAAM2B,OAAwB,GAAG,EAAjC;;AACA,OAAK,IAAIT,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGlB,MAAM,CAACH,QAA3B,EAAqCqB,CAAC,EAAtC,EAA0CS,OAAO,CAACrB,IAAR,CAAa,EAAb;;AAC1C,OAAK,MAAMf,GAAX,IAAkBS,MAAM,CAACZ,UAAzB,EAAqC;AACnCwC,IAAAA,iBAAiB,CAACzC,MAAD,EAASa,MAAT,EAAiBT,GAAjB,EAAsBoC,OAAtB,CAAjB;AACD;;AACD,SAAOA,OAAP;AACD;;AAGD,SAASC,iBAAT,CACEzC,MADF,EAEEa,MAFF,EAGET,GAHF,EAIEoC,OAJF,EAKE;AACA,QAAM1B,IAAI,GAAGD,MAAM,CAACZ,UAAP,CAAkBG,GAAlB,CAAb;AACA,MAAI,CAACU,IAAI,CAACL,KAAV,EAAiB;AAEjB,QAAMP,KAAK,GAAGF,MAAM,CAAC0C,SAAP,CAAiBtC,GAAjB,CAAd;AACA,QAAMuC,MAAM,GAAG3C,MAAM,CAAC4C,eAAP,CAAuBxC,GAAvB,CAAf;AAGA,QAAMyC,OAAiB,GAAG,IAAI5B,KAAJ,CAAUf,KAAK,CAAC+B,SAAN,GAAkB,CAA5B,EAA+Ba,IAA/B,CAAoC,CAApC,CAA1B;AACA,MAAIC,MAAM,GAAG,CAAb;;AACA,OAAK,IAAIhB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGjB,IAAI,CAACL,KAAzB,EAAgCsB,CAAC,EAAjC,EAAqC;AACnC,UAAMT,MAAM,GAAGR,IAAI,CAACT,OAAL,CAAa0B,CAAb,CAAf;AACA,UAAMV,MAAM,GAAGP,IAAI,CAACR,OAAL,CAAayB,CAAb,CAAf;AACAc,IAAAA,OAAO,CAACxB,MAAD,CAAP;AACAwB,IAAAA,OAAO,CAACC,IAAR,CAAa,CAAb,EAAgBzB,MAAM,GAAG,CAAzB;AAEA,QAAI2B,MAAM,GAAG,CAAb;AACA,QAAIpC,MAAM,GAAG4B,OAAO,CAACK,OAAO,CAACG,MAAM,EAAP,CAAP,GAAoB,CAArB,CAApB;;AAGA,SAAK,MAAMC,IAAX,IAAmBN,MAAnB,EAA2B;AACzB,UAAIM,IAAI,KAAK/C,KAAb,EAAoB;AACpB,UAAIoB,MAAM,GAAG2B,IAAI,CAACf,SAAlB,EAA6B;;AAC7B,UAAIe,IAAI,CAACrB,cAAL,KAAwB,UAA5B,EAAwC;AACtC,YAAI,EAAEqB,IAAI,CAAC1B,IAAL,IAAaX,MAAf,CAAJ,EAA4B;AAE1BA,UAAAA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,GAAoB,EAApB;AACD;;AACD,cAAM2B,EAAE,GAAGL,OAAO,CAACG,MAAM,EAAP,CAAlB;;AACA,eAAOpC,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,CAAkBG,MAAlB,IAA4BwB,EAAnC,EAAuC;AAErCtC,UAAAA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,CAAkBJ,IAAlB,CAAuB,EAAvB;AACD;;AACDP,QAAAA,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,CAAkB2B,EAAlB,CAAT;AACD,OAXD,MAWO;AACLtC,QAAAA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,GAAoBX,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,IAAqB,EAAzC;AACAX,QAAAA,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAf;AACD;AACF;;AAGD,QAAID,MAAM,KAAKpB,KAAK,CAACgC,SAArB,EAAgC;AAC9B,YAAMiB,KAAK,GAAGhB,KAAK,CAACiB,aAAN,CAEZlD,KAAK,CAACmC,YAAN,IAAsBnC,KAAK,CAACoC,aAFhB,EAGZxB,IAAI,CAACP,MAAL,CAAYwC,MAAZ,CAHY,EAIZ7C,KAJY,CAAd;AAMA6C,MAAAA,MAAM;;AACN,UAAI7C,KAAK,CAAC0B,cAAN,KAAyB,UAA7B,EAAyC;AACvC,YAAI,EAAE1B,KAAK,CAACqB,IAAN,IAAcX,MAAhB,CAAJ,EAA6B;AAE3BA,UAAAA,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,GAAqB,EAArB;AACD;;AACD,cAAM2B,EAAE,GAAGL,OAAO,CAACG,MAAD,CAAlB;;AACA,eAAOpC,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,CAAmBG,MAAnB,IAA6BwB,EAApC,EAAwC;AAEtCtC,UAAAA,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,CAAmBJ,IAAnB,CAAwB,IAAxB;AACD;;AACDP,QAAAA,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,CAAmB2B,EAAnB,IAAyBC,KAAzB;AACD,OAXD,MAWO;AACLvC,QAAAA,MAAM,CAACV,KAAK,CAACqB,IAAP,CAAN,GAAqB4B,KAArB;AACD;AACF;AACF;AACF","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {ParquetBuffer, ParquetData, ParquetField, ParquetRecord} from './declare';\nimport {ParquetSchema} from './schema';\nimport * as Types from './types';\n\nexport {ParquetBuffer};\n\nexport function shredBuffer(schema: ParquetSchema): ParquetBuffer {\n const columnData: Record<string, ParquetData> = {};\n for (const field of schema.fieldList) {\n columnData[field.key] = {\n dlevels: [],\n rlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n }\n return {rowCount: 0, columnData};\n}\n\n/**\n * 'Shred' a record into a list of <value, repetition_level, definition_level>\n * tuples per column using the Google Dremel Algorithm..\n *\n * The buffer argument must point to an object into which the shredded record\n * will be returned. You may re-use the buffer for repeated calls to this function\n * to append to an existing buffer, as long as the schema is unchanged.\n *\n * The format in which the shredded records will be stored in the buffer is as\n * follows:\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void {\n /* shred the record, this may raise an exception */\n const data = shredBuffer(schema).columnData;\n\n shredRecordFields(schema.fields, record, data, 0, 0);\n\n /* if no error during shredding, add the shredded record to the buffer */\n if (buffer.rowCount === 0) {\n buffer.rowCount = 1;\n buffer.columnData = data;\n return;\n }\n buffer.rowCount += 1;\n for (const field of schema.fieldList) {\n Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);\n buffer.columnData[field.key].count += data[field.key].count;\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction shredRecordFields(\n fields: Record<string, ParquetField>,\n record: any,\n data: Record<string, ParquetData>,\n rLevel: number,\n dLevel: number\n) {\n for (const name in fields) {\n const field = fields[name];\n\n // fetch values\n let values: any[] = [];\n if (\n record &&\n field.name in record &&\n record[field.name] !== undefined &&\n record[field.name] !== null\n ) {\n if (record[field.name].constructor === Array) {\n values = record[field.name];\n } else {\n values.push(record[field.name]);\n }\n }\n // check values\n if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {\n throw new Error(`missing required field: ${field.name}`);\n }\n if (values.length > 1 && field.repetitionType !== 'REPEATED') {\n throw new Error(`too many values for field: ${field.name}`);\n }\n\n // push null\n if (values.length === 0) {\n if (field.isNested) {\n shredRecordFields(field.fields!, null, data, rLevel, dLevel);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rLevel);\n data[field.key].dlevels.push(dLevel);\n }\n continue; // eslint-disable-line no-continue\n }\n\n // push values\n for (let i = 0; i < values.length; i++) {\n const rlvl = i === 0 ? rLevel : field.rLevelMax;\n if (field.isNested) {\n shredRecordFields(field.fields!, values[i], data, rlvl, field.dLevelMax);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rlvl);\n data[field.key].dlevels.push(field.dLevelMax);\n data[field.key].values.push(\n Types.toPrimitive((field.originalType || field.primitiveType)!, values[i])\n );\n }\n }\n }\n}\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The buffer argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[] {\n const records: ParquetRecord[] = [];\n for (let i = 0; i < buffer.rowCount; i++) records.push({});\n for (const key in buffer.columnData) {\n materializeColumn(schema, buffer, key, records);\n }\n return records;\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction materializeColumn(\n schema: ParquetSchema,\n buffer: ParquetBuffer,\n key: string,\n records: ParquetRecord[]\n) {\n const data = buffer.columnData[key];\n if (!data.count) return;\n\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n for (let i = 0; i < data.count; i++) {\n const dLevel = data.dlevels[i];\n const rLevel = data.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = records[rLevels[rIndex++] - 1];\n\n // Internal nodes\n for (const step of branch) {\n if (step === field) break;\n if (dLevel < step.dLevelMax) break;\n if (step.repetitionType === 'REPEATED') {\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n } else {\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n data.values[vIndex],\n field\n );\n vIndex++;\n if (field.repetitionType === 'REPEATED') {\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n } else {\n record[field.name] = value;\n }\n }\n }\n}\n"],"file":"shred.js"}
1
+ {"version":3,"sources":["../../../../src/parquetjs/schema/shred.ts"],"names":["shredBuffer","schema","columnData","fieldList","field","key","dlevels","rlevels","values","pageHeaders","count","rowCount","shredRecord","record","buffer","data","shredRecordFields","fields","Array","prototype","push","apply","rLevel","dLevel","name","undefined","constructor","length","Boolean","repetitionType","Error","isNested","i","rlvl","rLevelMax","dLevelMax","Types","toPrimitive","originalType","primitiveType","materializeRecords","records","materializeColumn","findField","branch","findFieldBranch","rLevels","fill","vIndex","rIndex","step","ix","value","fromPrimitive"],"mappings":";;;;;;;;;;;;;;;;;AAEA;;AAEA;;;;;;;;;;;;AAIO,SAASA,WAAT,CAAqBC,MAArB,EAA2D;AAChE,MAAMC,UAAuC,GAAG,EAAhD;;AADgE,6CAE5CD,MAAM,CAACE,SAFqC;AAAA;;AAAA;AAEhE,wDAAsC;AAAA,UAA3BC,KAA2B;AACpCF,MAAAA,UAAU,CAACE,KAAK,CAACC,GAAP,CAAV,GAAwB;AACtBC,QAAAA,OAAO,EAAE,EADa;AAEtBC,QAAAA,OAAO,EAAE,EAFa;AAGtBC,QAAAA,MAAM,EAAE,EAHc;AAItBC,QAAAA,WAAW,EAAE,EAJS;AAKtBC,QAAAA,KAAK,EAAE;AALe,OAAxB;AAOD;AAV+D;AAAA;AAAA;AAAA;AAAA;;AAWhE,SAAO;AAACC,IAAAA,QAAQ,EAAE,CAAX;AAAcT,IAAAA,UAAU,EAAVA;AAAd,GAAP;AACD;;AAwBM,SAASU,WAAT,CAAqBX,MAArB,EAA4CY,MAA5C,EAAyDC,MAAzD,EAAsF;AAE3F,MAAMC,IAAI,GAAGf,WAAW,CAACC,MAAD,CAAX,CAAoBC,UAAjC;AAEAc,EAAAA,iBAAiB,CAACf,MAAM,CAACgB,MAAR,EAAgBJ,MAAhB,EAAwBE,IAAxB,EAA8B,CAA9B,EAAiC,CAAjC,CAAjB;;AAGA,MAAID,MAAM,CAACH,QAAP,KAAoB,CAAxB,EAA2B;AACzBG,IAAAA,MAAM,CAACH,QAAP,GAAkB,CAAlB;AACAG,IAAAA,MAAM,CAACZ,UAAP,GAAoBa,IAApB;AACA;AACD;;AACDD,EAAAA,MAAM,CAACH,QAAP,IAAmB,CAAnB;;AAZ2F,8CAavEV,MAAM,CAACE,SAbgE;AAAA;;AAAA;AAa3F,2DAAsC;AAAA,UAA3BC,KAA2B;AACpCc,MAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACZ,UAAP,CAAkBE,KAAK,CAACC,GAAxB,EAA6BE,OAAxD,EAAiEQ,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBE,OAAjF;AACAW,MAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACZ,UAAP,CAAkBE,KAAK,CAACC,GAAxB,EAA6BC,OAAxD,EAAiES,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBC,OAAjF;AACAY,MAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACZ,UAAP,CAAkBE,KAAK,CAACC,GAAxB,EAA6BG,MAAxD,EAAgEO,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBG,MAAhF;AACAM,MAAAA,MAAM,CAACZ,UAAP,CAAkBE,KAAK,CAACC,GAAxB,EAA6BK,KAA7B,IAAsCK,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBK,KAAtD;AACD;AAlB0F;AAAA;AAAA;AAAA;AAAA;AAmB5F;;AAGD,SAASM,iBAAT,CACEC,MADF,EAEEJ,MAFF,EAGEE,IAHF,EAIEO,MAJF,EAKEC,MALF,EAME;AACA,OAAK,IAAMC,IAAX,IAAmBP,MAAnB,EAA2B;AACzB,QAAMb,KAAK,GAAGa,MAAM,CAACO,IAAD,CAApB;AAGA,QAAIhB,MAAa,GAAG,EAApB;;AACA,QACEK,MAAM,IACNT,KAAK,CAACoB,IAAN,IAAcX,MADd,IAEAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,KAAuBC,SAFvB,IAGAZ,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,KAAuB,IAJzB,EAKE;AACA,UAAIX,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmBE,WAAnB,KAAmCR,KAAvC,EAA8C;AAC5CV,QAAAA,MAAM,GAAGK,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAf;AACD,OAFD,MAEO;AACLhB,QAAAA,MAAM,CAACY,IAAP,CAAYP,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAlB;AACD;AACF;;AAED,QAAIhB,MAAM,CAACmB,MAAP,KAAkB,CAAlB,IAAuBC,OAAO,CAACf,MAAD,CAA9B,IAA0CT,KAAK,CAACyB,cAAN,KAAyB,UAAvE,EAAmF;AACjF,YAAM,IAAIC,KAAJ,mCAAqC1B,KAAK,CAACoB,IAA3C,EAAN;AACD;;AACD,QAAIhB,MAAM,CAACmB,MAAP,GAAgB,CAAhB,IAAqBvB,KAAK,CAACyB,cAAN,KAAyB,UAAlD,EAA8D;AAC5D,YAAM,IAAIC,KAAJ,sCAAwC1B,KAAK,CAACoB,IAA9C,EAAN;AACD;;AAGD,QAAIhB,MAAM,CAACmB,MAAP,KAAkB,CAAtB,EAAyB;AACvB,UAAIvB,KAAK,CAAC2B,QAAV,EAAoB;AAClBf,QAAAA,iBAAiB,CAACZ,KAAK,CAACa,MAAP,EAAgB,IAAhB,EAAsBF,IAAtB,EAA4BO,MAA5B,EAAoCC,MAApC,CAAjB;AACD,OAFD,MAEO;AACLR,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBK,KAAhB,IAAyB,CAAzB;AACAK,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBE,OAAhB,CAAwBa,IAAxB,CAA6BE,MAA7B;AACAP,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBC,OAAhB,CAAwBc,IAAxB,CAA6BG,MAA7B;AACD;;AACD;AACD;;AAGD,SAAK,IAAIS,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGxB,MAAM,CAACmB,MAA3B,EAAmCK,CAAC,EAApC,EAAwC;AACtC,UAAMC,IAAI,GAAGD,CAAC,KAAK,CAAN,GAAUV,MAAV,GAAmBlB,KAAK,CAAC8B,SAAtC;;AACA,UAAI9B,KAAK,CAAC2B,QAAV,EAAoB;AAClBf,QAAAA,iBAAiB,CAACZ,KAAK,CAACa,MAAP,EAAgBT,MAAM,CAACwB,CAAD,CAAtB,EAA2BjB,IAA3B,EAAiCkB,IAAjC,EAAuC7B,KAAK,CAAC+B,SAA7C,CAAjB;AACD,OAFD,MAEO;AACLpB,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBK,KAAhB,IAAyB,CAAzB;AACAK,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBE,OAAhB,CAAwBa,IAAxB,CAA6Ba,IAA7B;AACAlB,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBC,OAAhB,CAAwBc,IAAxB,CAA6BhB,KAAK,CAAC+B,SAAnC;AACApB,QAAAA,IAAI,CAACX,KAAK,CAACC,GAAP,CAAJ,CAAgBG,MAAhB,CAAuBY,IAAvB,CACEgB,KAAK,CAACC,WAAN,CAAmBjC,KAAK,CAACkC,YAAN,IAAsBlC,KAAK,CAACmC,aAA/C,EAAgE/B,MAAM,CAACwB,CAAD,CAAtE,CADF;AAGD;AACF;AACF;AACF;;AAqBM,SAASQ,kBAAT,CAA4BvC,MAA5B,EAAmDa,MAAnD,EAA2F;AAChG,MAAM2B,OAAwB,GAAG,EAAjC;;AACA,OAAK,IAAIT,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGlB,MAAM,CAACH,QAA3B,EAAqCqB,CAAC,EAAtC;AAA0CS,IAAAA,OAAO,CAACrB,IAAR,CAAa,EAAb;AAA1C;;AACA,OAAK,IAAMf,GAAX,IAAkBS,MAAM,CAACZ,UAAzB,EAAqC;AACnCwC,IAAAA,iBAAiB,CAACzC,MAAD,EAASa,MAAT,EAAiBT,GAAjB,EAAsBoC,OAAtB,CAAjB;AACD;;AACD,SAAOA,OAAP;AACD;;AAGD,SAASC,iBAAT,CACEzC,MADF,EAEEa,MAFF,EAGET,GAHF,EAIEoC,OAJF,EAKE;AACA,MAAM1B,IAAI,GAAGD,MAAM,CAACZ,UAAP,CAAkBG,GAAlB,CAAb;AACA,MAAI,CAACU,IAAI,CAACL,KAAV,EAAiB;AAEjB,MAAMN,KAAK,GAAGH,MAAM,CAAC0C,SAAP,CAAiBtC,GAAjB,CAAd;AACA,MAAMuC,MAAM,GAAG3C,MAAM,CAAC4C,eAAP,CAAuBxC,GAAvB,CAAf;AAGA,MAAMyC,OAAiB,GAAG,IAAI5B,KAAJ,CAAUd,KAAK,CAAC8B,SAAN,GAAkB,CAA5B,EAA+Ba,IAA/B,CAAoC,CAApC,CAA1B;AACA,MAAIC,MAAM,GAAG,CAAb;;AACA,OAAK,IAAIhB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGjB,IAAI,CAACL,KAAzB,EAAgCsB,CAAC,EAAjC,EAAqC;AACnC,QAAMT,MAAM,GAAGR,IAAI,CAACT,OAAL,CAAa0B,CAAb,CAAf;AACA,QAAMV,MAAM,GAAGP,IAAI,CAACR,OAAL,CAAayB,CAAb,CAAf;AACAc,IAAAA,OAAO,CAACxB,MAAD,CAAP;AACAwB,IAAAA,OAAO,CAACC,IAAR,CAAa,CAAb,EAAgBzB,MAAM,GAAG,CAAzB;AAEA,QAAI2B,MAAM,GAAG,CAAb;AACA,QAAIpC,MAAM,GAAG4B,OAAO,CAACK,OAAO,CAACG,MAAM,EAAP,CAAP,GAAoB,CAArB,CAApB;;AAPmC,gDAUhBL,MAVgB;AAAA;;AAAA;AAUnC,6DAA2B;AAAA,YAAhBM,IAAgB;AACzB,YAAIA,IAAI,KAAK9C,KAAb,EAAoB;AACpB,YAAImB,MAAM,GAAG2B,IAAI,CAACf,SAAlB,EAA6B;;AAC7B,YAAIe,IAAI,CAACrB,cAAL,KAAwB,UAA5B,EAAwC;AACtC,cAAI,EAAEqB,IAAI,CAAC1B,IAAL,IAAaX,MAAf,CAAJ,EAA4B;AAE1BA,YAAAA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,GAAoB,EAApB;AACD;;AACD,cAAM2B,GAAE,GAAGL,OAAO,CAACG,MAAM,EAAP,CAAlB;;AACA,iBAAOpC,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,CAAkBG,MAAlB,IAA4BwB,GAAnC,EAAuC;AAErCtC,YAAAA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,CAAkBJ,IAAlB,CAAuB,EAAvB;AACD;;AACDP,UAAAA,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,CAAkB2B,GAAlB,CAAT;AACD,SAXD,MAWO;AACLtC,UAAAA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,GAAoBX,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAN,IAAqB,EAAzC;AACAX,UAAAA,MAAM,GAAGA,MAAM,CAACqC,IAAI,CAAC1B,IAAN,CAAf;AACD;AACF;AA5BkC;AAAA;AAAA;AAAA;AAAA;;AA+BnC,QAAID,MAAM,KAAKnB,KAAK,CAAC+B,SAArB,EAAgC;AAC9B,UAAMiB,KAAK,GAAGhB,KAAK,CAACiB,aAAN,CAEZjD,KAAK,CAACkC,YAAN,IAAsBlC,KAAK,CAACmC,aAFhB,EAGZxB,IAAI,CAACP,MAAL,CAAYwC,MAAZ,CAHY,EAIZ5C,KAJY,CAAd;AAMA4C,MAAAA,MAAM;;AACN,UAAI5C,KAAK,CAACyB,cAAN,KAAyB,UAA7B,EAAyC;AACvC,YAAI,EAAEzB,KAAK,CAACoB,IAAN,IAAcX,MAAhB,CAAJ,EAA6B;AAE3BA,UAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,GAAqB,EAArB;AACD;;AACD,YAAM2B,EAAE,GAAGL,OAAO,CAACG,MAAD,CAAlB;;AACA,eAAOpC,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmBG,MAAnB,IAA6BwB,EAApC,EAAwC;AAEtCtC,UAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmBJ,IAAnB,CAAwB,IAAxB;AACD;;AACDP,QAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmB2B,EAAnB,IAAyBC,KAAzB;AACD,OAXD,MAWO;AACLvC,QAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,GAAqB4B,KAArB;AACD;AACF;AACF;AACF","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {ParquetBuffer, ParquetData, ParquetField, ParquetRecord} from './declare';\nimport {ParquetSchema} from './schema';\nimport * as Types from './types';\n\nexport {ParquetBuffer};\n\nexport function shredBuffer(schema: ParquetSchema): ParquetBuffer {\n const columnData: Record<string, ParquetData> = {};\n for (const field of schema.fieldList) {\n columnData[field.key] = {\n dlevels: [],\n rlevels: [],\n values: [],\n pageHeaders: [],\n count: 0\n };\n }\n return {rowCount: 0, columnData};\n}\n\n/**\n * 'Shred' a record into a list of <value, repetition_level, definition_level>\n * tuples per column using the Google Dremel Algorithm..\n *\n * The buffer argument must point to an object into which the shredded record\n * will be returned. You may re-use the buffer for repeated calls to this function\n * to append to an existing buffer, as long as the schema is unchanged.\n *\n * The format in which the shredded records will be stored in the buffer is as\n * follows:\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void {\n /* shred the record, this may raise an exception */\n const data = shredBuffer(schema).columnData;\n\n shredRecordFields(schema.fields, record, data, 0, 0);\n\n /* if no error during shredding, add the shredded record to the buffer */\n if (buffer.rowCount === 0) {\n buffer.rowCount = 1;\n buffer.columnData = data;\n return;\n }\n buffer.rowCount += 1;\n for (const field of schema.fieldList) {\n Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);\n buffer.columnData[field.key].count += data[field.key].count;\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction shredRecordFields(\n fields: Record<string, ParquetField>,\n record: any,\n data: Record<string, ParquetData>,\n rLevel: number,\n dLevel: number\n) {\n for (const name in fields) {\n const field = fields[name];\n\n // fetch values\n let values: any[] = [];\n if (\n record &&\n field.name in record &&\n record[field.name] !== undefined &&\n record[field.name] !== null\n ) {\n if (record[field.name].constructor === Array) {\n values = record[field.name];\n } else {\n values.push(record[field.name]);\n }\n }\n // check values\n if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {\n throw new Error(`missing required field: ${field.name}`);\n }\n if (values.length > 1 && field.repetitionType !== 'REPEATED') {\n throw new Error(`too many values for field: ${field.name}`);\n }\n\n // push null\n if (values.length === 0) {\n if (field.isNested) {\n shredRecordFields(field.fields!, null, data, rLevel, dLevel);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rLevel);\n data[field.key].dlevels.push(dLevel);\n }\n continue; // eslint-disable-line no-continue\n }\n\n // push values\n for (let i = 0; i < values.length; i++) {\n const rlvl = i === 0 ? rLevel : field.rLevelMax;\n if (field.isNested) {\n shredRecordFields(field.fields!, values[i], data, rlvl, field.dLevelMax);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rlvl);\n data[field.key].dlevels.push(field.dLevelMax);\n data[field.key].values.push(\n Types.toPrimitive((field.originalType || field.primitiveType)!, values[i])\n );\n }\n }\n }\n}\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The buffer argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[] {\n const records: ParquetRecord[] = [];\n for (let i = 0; i < buffer.rowCount; i++) records.push({});\n for (const key in buffer.columnData) {\n materializeColumn(schema, buffer, key, records);\n }\n return records;\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction materializeColumn(\n schema: ParquetSchema,\n buffer: ParquetBuffer,\n key: string,\n records: ParquetRecord[]\n) {\n const data = buffer.columnData[key];\n if (!data.count) return;\n\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n for (let i = 0; i < data.count; i++) {\n const dLevel = data.dlevels[i];\n const rLevel = data.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = records[rLevels[rIndex++] - 1];\n\n // Internal nodes\n for (const step of branch) {\n if (step === field) break;\n if (dLevel < step.dLevelMax) break;\n if (step.repetitionType === 'REPEATED') {\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n } else {\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n data.values[vIndex],\n field\n );\n vIndex++;\n if (field.repetitionType === 'REPEATED') {\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n } else {\n record[field.name] = value;\n }\n }\n }\n}\n"],"file":"shred.js"}