@loaders.gl/parquet 3.0.12 → 3.1.0-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/dist/dist.min.js +7 -18
  2. package/dist/dist.min.js.map +1 -1
  3. package/dist/es5/bundle.js +2 -4
  4. package/dist/es5/bundle.js.map +1 -1
  5. package/dist/es5/constants.js +17 -0
  6. package/dist/es5/constants.js.map +1 -0
  7. package/dist/es5/index.js +53 -21
  8. package/dist/es5/index.js.map +1 -1
  9. package/dist/es5/lib/convert-schema.js +82 -0
  10. package/dist/es5/lib/convert-schema.js.map +1 -0
  11. package/dist/es5/lib/parse-parquet.js +173 -0
  12. package/dist/es5/lib/parse-parquet.js.map +1 -0
  13. package/dist/es5/lib/read-array-buffer.js +53 -0
  14. package/dist/es5/lib/read-array-buffer.js.map +1 -0
  15. package/dist/es5/parquet-loader.js +6 -79
  16. package/dist/es5/parquet-loader.js.map +1 -1
  17. package/dist/es5/parquet-writer.js +1 -1
  18. package/dist/es5/parquet-writer.js.map +1 -1
  19. package/dist/es5/parquetjs/codecs/dictionary.js +30 -0
  20. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -0
  21. package/dist/es5/parquetjs/codecs/index.js +10 -0
  22. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  23. package/dist/es5/parquetjs/codecs/rle.js +2 -2
  24. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  25. package/dist/es5/parquetjs/compression.js +138 -104
  26. package/dist/es5/parquetjs/compression.js.map +1 -1
  27. package/dist/es5/parquetjs/{writer.js → encoder/writer.js} +397 -228
  28. package/dist/es5/parquetjs/encoder/writer.js.map +1 -0
  29. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
  30. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  31. package/dist/es5/parquetjs/parser/decoders.js +495 -0
  32. package/dist/es5/parquetjs/parser/decoders.js.map +1 -0
  33. package/dist/es5/parquetjs/parser/parquet-cursor.js +215 -0
  34. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -0
  35. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +452 -0
  36. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  37. package/dist/es5/parquetjs/parser/parquet-reader.js +413 -0
  38. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -0
  39. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  40. package/dist/es5/parquetjs/schema/schema.js +2 -0
  41. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  42. package/dist/es5/parquetjs/schema/shred.js +2 -1
  43. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  44. package/dist/es5/parquetjs/schema/types.js +79 -4
  45. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  46. package/dist/es5/parquetjs/utils/buffer-utils.js +21 -0
  47. package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -0
  48. package/dist/es5/parquetjs/utils/file-utils.js +108 -0
  49. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -0
  50. package/dist/es5/parquetjs/{util.js → utils/read-utils.js} +13 -113
  51. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -0
  52. package/dist/esm/bundle.js +2 -4
  53. package/dist/esm/bundle.js.map +1 -1
  54. package/dist/esm/constants.js +6 -0
  55. package/dist/esm/constants.js.map +1 -0
  56. package/dist/esm/index.js +14 -4
  57. package/dist/esm/index.js.map +1 -1
  58. package/dist/esm/lib/convert-schema.js +71 -0
  59. package/dist/esm/lib/convert-schema.js.map +1 -0
  60. package/dist/esm/lib/parse-parquet.js +28 -0
  61. package/dist/esm/lib/parse-parquet.js.map +1 -0
  62. package/dist/esm/lib/read-array-buffer.js +9 -0
  63. package/dist/esm/lib/read-array-buffer.js.map +1 -0
  64. package/dist/esm/parquet-loader.js +4 -24
  65. package/dist/esm/parquet-loader.js.map +1 -1
  66. package/dist/esm/parquet-writer.js +1 -1
  67. package/dist/esm/parquet-writer.js.map +1 -1
  68. package/dist/esm/parquetjs/codecs/dictionary.js +12 -0
  69. package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -0
  70. package/dist/esm/parquetjs/codecs/index.js +9 -0
  71. package/dist/esm/parquetjs/codecs/index.js.map +1 -1
  72. package/dist/esm/parquetjs/codecs/rle.js +2 -2
  73. package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
  74. package/dist/esm/parquetjs/compression.js +54 -105
  75. package/dist/esm/parquetjs/compression.js.map +1 -1
  76. package/dist/esm/parquetjs/{writer.js → encoder/writer.js} +32 -35
  77. package/dist/esm/parquetjs/encoder/writer.js.map +1 -0
  78. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
  79. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  80. package/dist/esm/parquetjs/parser/decoders.js +300 -0
  81. package/dist/esm/parquetjs/parser/decoders.js.map +1 -0
  82. package/dist/esm/parquetjs/parser/parquet-cursor.js +90 -0
  83. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -0
  84. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +164 -0
  85. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  86. package/dist/esm/parquetjs/parser/parquet-reader.js +133 -0
  87. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -0
  88. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  89. package/dist/esm/parquetjs/schema/schema.js +2 -0
  90. package/dist/esm/parquetjs/schema/schema.js.map +1 -1
  91. package/dist/esm/parquetjs/schema/shred.js +2 -1
  92. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  93. package/dist/esm/parquetjs/schema/types.js +78 -4
  94. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  95. package/dist/esm/parquetjs/utils/buffer-utils.js +12 -0
  96. package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -0
  97. package/dist/esm/parquetjs/utils/file-utils.js +79 -0
  98. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -0
  99. package/dist/esm/parquetjs/{util.js → utils/read-utils.js} +11 -89
  100. package/dist/esm/parquetjs/utils/read-utils.js.map +1 -0
  101. package/dist/parquet-worker.js +7 -18
  102. package/dist/parquet-worker.js.map +1 -1
  103. package/package.json +10 -10
  104. package/src/bundle.ts +2 -3
  105. package/src/constants.ts +17 -0
  106. package/src/index.ts +30 -4
  107. package/src/lib/convert-schema.ts +95 -0
  108. package/src/lib/parse-parquet.ts +27 -0
  109. package/{dist/es5/libs → src/lib}/read-array-buffer.ts +0 -0
  110. package/src/parquet-loader.ts +4 -24
  111. package/src/parquetjs/codecs/dictionary.ts +11 -0
  112. package/src/parquetjs/codecs/index.ts +13 -0
  113. package/src/parquetjs/codecs/rle.ts +4 -2
  114. package/src/parquetjs/compression.ts +89 -50
  115. package/src/parquetjs/{writer.ts → encoder/writer.ts} +46 -45
  116. package/src/parquetjs/parquet-thrift/CompressionCodec.ts +2 -1
  117. package/src/parquetjs/parser/decoders.ts +448 -0
  118. package/src/parquetjs/parser/parquet-cursor.ts +94 -0
  119. package/src/parquetjs/parser/parquet-envelope-reader.ts +210 -0
  120. package/src/parquetjs/parser/parquet-reader.ts +179 -0
  121. package/src/parquetjs/schema/declare.ts +48 -2
  122. package/src/parquetjs/schema/schema.ts +2 -0
  123. package/src/parquetjs/schema/shred.ts +3 -1
  124. package/src/parquetjs/schema/types.ts +82 -5
  125. package/src/parquetjs/utils/buffer-utils.ts +18 -0
  126. package/src/parquetjs/utils/file-utils.ts +96 -0
  127. package/src/parquetjs/{util.ts → utils/read-utils.ts} +13 -110
  128. package/dist/dist.es5.min.js +0 -51
  129. package/dist/dist.es5.min.js.map +0 -1
  130. package/dist/es5/parquetjs/compression.ts.disabled +0 -105
  131. package/dist/es5/parquetjs/reader.js +0 -1078
  132. package/dist/es5/parquetjs/reader.js.map +0 -1
  133. package/dist/es5/parquetjs/util.js.map +0 -1
  134. package/dist/es5/parquetjs/writer.js.map +0 -1
  135. package/dist/esm/libs/read-array-buffer.ts +0 -31
  136. package/dist/esm/parquetjs/compression.ts.disabled +0 -105
  137. package/dist/esm/parquetjs/reader.js +0 -524
  138. package/dist/esm/parquetjs/reader.js.map +0 -1
  139. package/dist/esm/parquetjs/util.js.map +0 -1
  140. package/dist/esm/parquetjs/writer.js.map +0 -1
  141. package/src/libs/read-array-buffer.ts +0 -31
  142. package/src/parquetjs/compression.ts.disabled +0 -105
  143. package/src/parquetjs/reader.ts +0 -707
@@ -11,5 +11,6 @@ export enum CompressionCodec {
11
11
  LZO = 3,
12
12
  BROTLI = 4,
13
13
  LZ4 = 5,
14
- ZSTD = 6
14
+ ZSTD = 6,
15
+ LZ4_RAW = 7 // Added in 2.9
15
16
  }
@@ -0,0 +1,448 @@
1
+ // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
2
+ import {
3
+ ParquetCodec,
4
+ ParquetData,
5
+ ParquetOptions,
6
+ ParquetPageData,
7
+ ParquetType,
8
+ PrimitiveType,
9
+ SchemaDefinition
10
+ } from '../schema/declare';
11
+ import {CursorBuffer, ParquetCodecOptions, PARQUET_CODECS} from '../codecs';
12
+ import {
13
+ ConvertedType,
14
+ Encoding,
15
+ FieldRepetitionType,
16
+ PageHeader,
17
+ PageType,
18
+ SchemaElement,
19
+ Type
20
+ } from '../parquet-thrift';
21
+ import {decompress} from '../compression';
22
+ import {PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING} from '../../constants';
23
+ import {decodePageHeader, getThriftEnum, getBitWidth} from '../utils/read-utils';
24
+
25
+ /**
26
+ * Decode data pages
27
+ * @param buffer - input data
28
+ * @param column - parquet column
29
+ * @param compression - compression type
30
+ * @returns parquet data page data
31
+ */
32
+ export async function decodeDataPages(
33
+ buffer: Buffer,
34
+ options: ParquetOptions
35
+ ): Promise<ParquetData> {
36
+ const cursor: CursorBuffer = {
37
+ buffer,
38
+ offset: 0,
39
+ size: buffer.length
40
+ };
41
+
42
+ const data: ParquetData = {
43
+ rlevels: [],
44
+ dlevels: [],
45
+ values: [],
46
+ pageHeaders: [],
47
+ count: 0
48
+ };
49
+
50
+ let dictionary = options.dictionary || [];
51
+
52
+ while (
53
+ // @ts-ignore size can be undefined
54
+ cursor.offset < cursor.size &&
55
+ (!options.numValues || data.dlevels.length < Number(options.numValues))
56
+ ) {
57
+ // Looks like we have to decode these in sequence due to cursor updates?
58
+ const page = await decodePage(cursor, options);
59
+
60
+ if (page.dictionary) {
61
+ dictionary = page.dictionary;
62
+ // eslint-disable-next-line no-continue
63
+ continue;
64
+ }
65
+
66
+ if (dictionary.length) {
67
+ // eslint-disable-next-line no-loop-func
68
+ page.values = page.values.map((value) => dictionary[value]);
69
+ }
70
+
71
+ for (let index = 0; index < page.rlevels.length; index++) {
72
+ data.rlevels.push(page.rlevels[index]);
73
+ data.dlevels.push(page.dlevels[index]);
74
+ const value = page.values[index];
75
+
76
+ if (value !== undefined) {
77
+ data.values.push(value);
78
+ }
79
+ }
80
+
81
+ data.count += page.count;
82
+ data.pageHeaders.push(page.pageHeader);
83
+ }
84
+
85
+ return data;
86
+ }
87
+
88
+ /**
89
+ * Decode parquet page based on page type
90
+ * @param cursor
91
+ * @param options
92
+ */
93
+ export async function decodePage(
94
+ cursor: CursorBuffer,
95
+ options: ParquetOptions
96
+ ): Promise<ParquetPageData> {
97
+ let page;
98
+ const {pageHeader, length} = await decodePageHeader(cursor.buffer, cursor.offset);
99
+ cursor.offset += length;
100
+
101
+ const pageType = getThriftEnum(PageType, pageHeader.type);
102
+
103
+ switch (pageType) {
104
+ case 'DATA_PAGE':
105
+ page = await decodeDataPage(cursor, pageHeader, options);
106
+ break;
107
+ case 'DATA_PAGE_V2':
108
+ page = await decodeDataPageV2(cursor, pageHeader, options);
109
+ break;
110
+ case 'DICTIONARY_PAGE':
111
+ page = {
112
+ dictionary: await decodeDictionaryPage(cursor, pageHeader, options),
113
+ pageHeader
114
+ };
115
+ break;
116
+ default:
117
+ throw new Error(`invalid page type: ${pageType}`);
118
+ }
119
+
120
+ return page;
121
+ }
122
+
123
+ /**
124
+ * Decode parquet schema
125
+ * @param schemaElements input schema elements data
126
+ * @param offset offset to read from
127
+ * @param len length of data
128
+ * @returns result.offset
129
+ * result.next - offset at the end of function
130
+ * result.schema - schema read from the input data
131
+ * @todo output offset is the same as input - possibly excess output field
132
+ */
133
+ export function decodeSchema(
134
+ schemaElements: SchemaElement[],
135
+ offset: number,
136
+ len: number
137
+ ): {
138
+ offset: number;
139
+ next: number;
140
+ schema: SchemaDefinition;
141
+ } {
142
+ const schema: SchemaDefinition = {};
143
+ let next = offset;
144
+ for (let i = 0; i < len; i++) {
145
+ const schemaElement = schemaElements[next];
146
+
147
+ const repetitionType =
148
+ next > 0 ? getThriftEnum(FieldRepetitionType, schemaElement.repetition_type!) : 'ROOT';
149
+
150
+ let optional = false;
151
+ let repeated = false;
152
+ switch (repetitionType) {
153
+ case 'REQUIRED':
154
+ break;
155
+ case 'OPTIONAL':
156
+ optional = true;
157
+ break;
158
+ case 'REPEATED':
159
+ repeated = true;
160
+ break;
161
+ default:
162
+ throw new Error('parquet: unknown repetition type');
163
+ }
164
+
165
+ if (schemaElement.num_children! > 0) {
166
+ const res = decodeSchema(schemaElements, next + 1, schemaElement.num_children!);
167
+ next = res.next;
168
+ schema[schemaElement.name] = {
169
+ // type: undefined,
170
+ optional,
171
+ repeated,
172
+ fields: res.schema
173
+ };
174
+ } else {
175
+ const type = getThriftEnum(Type, schemaElement.type!);
176
+ let logicalType = type;
177
+
178
+ if (schemaElement.converted_type) {
179
+ logicalType = getThriftEnum(ConvertedType, schemaElement.converted_type);
180
+ }
181
+
182
+ switch (logicalType) {
183
+ case 'DECIMAL':
184
+ logicalType = `${logicalType}_${type}` as ParquetType;
185
+ break;
186
+ default:
187
+ }
188
+
189
+ schema[schemaElement.name] = {
190
+ type: logicalType as ParquetType,
191
+ typeLength: schemaElement.type_length,
192
+ presision: schemaElement.precision,
193
+ scale: schemaElement.scale,
194
+ optional,
195
+ repeated
196
+ };
197
+ next++;
198
+ }
199
+ }
200
+ return {schema, offset, next};
201
+ }
202
+
203
+ /**
204
+ * Decode a consecutive array of data using one of the parquet encodings
205
+ */
206
+ function decodeValues(
207
+ type: PrimitiveType,
208
+ encoding: ParquetCodec,
209
+ cursor: CursorBuffer,
210
+ count: number,
211
+ opts: ParquetCodecOptions
212
+ ): any[] {
213
+ if (!(encoding in PARQUET_CODECS)) {
214
+ throw new Error(`invalid encoding: ${encoding}`);
215
+ }
216
+ return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);
217
+ }
218
+
219
+ /**
220
+ * Do decoding of parquet dataPage from column chunk
221
+ * @param cursor
222
+ * @param header
223
+ * @param options
224
+ */
225
+ async function decodeDataPage(
226
+ cursor: CursorBuffer,
227
+ header: PageHeader,
228
+ options: ParquetOptions
229
+ ): Promise<ParquetPageData> {
230
+ const cursorEnd = cursor.offset + header.compressed_page_size;
231
+ const valueCount = header.data_page_header?.num_values;
232
+
233
+ /* uncompress page */
234
+ let dataCursor = cursor;
235
+
236
+ if (options.compression !== 'UNCOMPRESSED') {
237
+ const valuesBuf = await decompress(
238
+ options.compression,
239
+ cursor.buffer.slice(cursor.offset, cursorEnd),
240
+ header.uncompressed_page_size
241
+ );
242
+ dataCursor = {
243
+ buffer: valuesBuf,
244
+ offset: 0,
245
+ size: valuesBuf.length
246
+ };
247
+ cursor.offset = cursorEnd;
248
+ }
249
+
250
+ /* read repetition levels */
251
+ const rLevelEncoding = getThriftEnum(
252
+ Encoding,
253
+ header.data_page_header?.repetition_level_encoding!
254
+ ) as ParquetCodec;
255
+ // tslint:disable-next-line:prefer-array-literal
256
+ let rLevels = new Array(valueCount);
257
+
258
+ if (options.column.rLevelMax > 0) {
259
+ rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount!, {
260
+ bitWidth: getBitWidth(options.column.rLevelMax),
261
+ disableEnvelope: false
262
+ // column: opts.column
263
+ });
264
+ } else {
265
+ rLevels.fill(0);
266
+ }
267
+
268
+ /* read definition levels */
269
+ const dLevelEncoding = getThriftEnum(
270
+ Encoding,
271
+ header.data_page_header?.definition_level_encoding!
272
+ ) as ParquetCodec;
273
+ // tslint:disable-next-line:prefer-array-literal
274
+ let dLevels = new Array(valueCount);
275
+ if (options.column.dLevelMax > 0) {
276
+ dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount!, {
277
+ bitWidth: getBitWidth(options.column.dLevelMax),
278
+ disableEnvelope: false
279
+ // column: opts.column
280
+ });
281
+ } else {
282
+ dLevels.fill(0);
283
+ }
284
+ let valueCountNonNull = 0;
285
+ for (const dlvl of dLevels) {
286
+ if (dlvl === options.column.dLevelMax) {
287
+ valueCountNonNull++;
288
+ }
289
+ }
290
+
291
+ /* read values */
292
+ const valueEncoding = getThriftEnum(Encoding, header.data_page_header?.encoding!) as ParquetCodec;
293
+ const decodeOptions = {
294
+ typeLength: options.column.typeLength,
295
+ bitWidth: options.column.typeLength
296
+ };
297
+
298
+ const values = decodeValues(
299
+ options.column.primitiveType!,
300
+ valueEncoding,
301
+ dataCursor,
302
+ valueCountNonNull,
303
+ decodeOptions
304
+ );
305
+
306
+ return {
307
+ dlevels: dLevels,
308
+ rlevels: rLevels,
309
+ values,
310
+ count: valueCount!,
311
+ pageHeader: header
312
+ };
313
+ }
314
+
315
+ /**
316
+ * Do decoding of parquet dataPage in version 2 from column chunk
317
+ * @param cursor
318
+ * @param header
319
+ * @param opts
320
+ * @returns
321
+ */
322
+ async function decodeDataPageV2(
323
+ cursor: CursorBuffer,
324
+ header: PageHeader,
325
+ opts: any
326
+ ): Promise<ParquetPageData> {
327
+ const cursorEnd = cursor.offset + header.compressed_page_size;
328
+
329
+ const valueCount = header.data_page_header_v2?.num_values;
330
+ // @ts-ignore
331
+ const valueCountNonNull = valueCount - header.data_page_header_v2?.num_nulls;
332
+ const valueEncoding = getThriftEnum(
333
+ Encoding,
334
+ header.data_page_header_v2?.encoding!
335
+ ) as ParquetCodec;
336
+
337
+ /* read repetition levels */
338
+ // tslint:disable-next-line:prefer-array-literal
339
+ let rLevels = new Array(valueCount);
340
+ if (opts.column.rLevelMax > 0) {
341
+ rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {
342
+ bitWidth: getBitWidth(opts.column.rLevelMax),
343
+ disableEnvelope: true
344
+ });
345
+ } else {
346
+ rLevels.fill(0);
347
+ }
348
+
349
+ /* read definition levels */
350
+ // tslint:disable-next-line:prefer-array-literal
351
+ let dLevels = new Array(valueCount);
352
+ if (opts.column.dLevelMax > 0) {
353
+ dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {
354
+ bitWidth: getBitWidth(opts.column.dLevelMax),
355
+ disableEnvelope: true
356
+ });
357
+ } else {
358
+ dLevels.fill(0);
359
+ }
360
+
361
+ /* read values */
362
+ let valuesBufCursor = cursor;
363
+
364
+ if (header.data_page_header_v2?.is_compressed) {
365
+ const valuesBuf = await decompress(
366
+ opts.compression,
367
+ cursor.buffer.slice(cursor.offset, cursorEnd),
368
+ header.uncompressed_page_size
369
+ );
370
+
371
+ valuesBufCursor = {
372
+ buffer: valuesBuf,
373
+ offset: 0,
374
+ size: valuesBuf.length
375
+ };
376
+
377
+ cursor.offset = cursorEnd;
378
+ }
379
+
380
+ const decodeOptions = {
381
+ typeLength: opts.column.typeLength,
382
+ bitWidth: opts.column.typeLength
383
+ };
384
+
385
+ const values = decodeValues(
386
+ opts.column.primitiveType!,
387
+ valueEncoding,
388
+ valuesBufCursor,
389
+ valueCountNonNull,
390
+ decodeOptions
391
+ );
392
+
393
+ return {
394
+ dlevels: dLevels,
395
+ rlevels: rLevels,
396
+ values,
397
+ count: valueCount!,
398
+ pageHeader: header
399
+ };
400
+ }
401
+
402
+ /**
403
+ * Do decoding of dictionary page which helps to iterate over all indexes and get dataPage values.
404
+ * @param cursor
405
+ * @param pageHeader
406
+ * @param options
407
+ */
408
+ async function decodeDictionaryPage(
409
+ cursor: CursorBuffer,
410
+ pageHeader: PageHeader,
411
+ options: ParquetOptions
412
+ ): Promise<string[]> {
413
+ const cursorEnd = cursor.offset + pageHeader.compressed_page_size;
414
+
415
+ let dictCursor = {
416
+ offset: 0,
417
+ buffer: cursor.buffer.slice(cursor.offset, cursorEnd),
418
+ size: cursorEnd - cursor.offset
419
+ };
420
+
421
+ cursor.offset = cursorEnd;
422
+
423
+ if (options.compression !== 'UNCOMPRESSED') {
424
+ const valuesBuf = await decompress(
425
+ options.compression,
426
+ dictCursor.buffer.slice(dictCursor.offset, cursorEnd),
427
+ pageHeader.uncompressed_page_size
428
+ );
429
+
430
+ dictCursor = {
431
+ buffer: valuesBuf,
432
+ offset: 0,
433
+ size: valuesBuf.length
434
+ };
435
+
436
+ cursor.offset = cursorEnd;
437
+ }
438
+
439
+ const numValues = pageHeader?.dictionary_page_header?.num_values || 0;
440
+
441
+ return decodeValues(
442
+ options.column.primitiveType!,
443
+ options.column.encoding!,
444
+ dictCursor,
445
+ numValues,
446
+ options as ParquetCodecOptions
447
+ ).map((d) => d.toString());
448
+ }