@loaders.gl/parquet 3.4.6 → 4.0.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +27 -34
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/index.js +6 -6
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -1
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js +58 -42
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -1
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js +33 -31
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -1
- package/dist/es5/lib/geo/decode-geo-metadata.js +12 -8
- package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -1
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js +11 -7
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -1
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js +51 -29
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -1
- package/dist/es5/lib/wasm/parse-parquet-wasm.js +6 -6
- package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -1
- package/dist/es5/parquet-loader.js +16 -4
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-loader.js +1 -1
- package/dist/es5/parquet-wasm-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-writer.js +1 -1
- package/dist/es5/parquet-wasm-writer.js.map +1 -1
- package/dist/es5/parquet-writer.js +1 -1
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -1
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js +1 -1
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +4 -4
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/schema.js +7 -7
- package/dist/es5/parquetjs/schema/schema.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +117 -22
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/esm/index.js +5 -5
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -1
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js +57 -41
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -1
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js +33 -31
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -1
- package/dist/esm/lib/geo/decode-geo-metadata.js +12 -8
- package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -1
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js +12 -8
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -1
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js +14 -3
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -1
- package/dist/esm/lib/wasm/parse-parquet-wasm.js +3 -3
- package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -1
- package/dist/esm/parquet-loader.js +14 -2
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-loader.js +1 -1
- package/dist/esm/parquet-wasm-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-writer.js +1 -1
- package/dist/esm/parquet-wasm-writer.js.map +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquet-writer.js.map +1 -1
- package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -1
- package/dist/esm/parquetjs/parser/decoders.js.map +1 -1
- package/dist/esm/parquetjs/parser/parquet-reader.js +2 -2
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/esm/parquetjs/schema/declare.js +1 -1
- package/dist/esm/parquetjs/schema/declare.js.map +1 -1
- package/dist/esm/parquetjs/schema/schema.js +6 -6
- package/dist/esm/parquetjs/schema/schema.js.map +1 -1
- package/dist/esm/parquetjs/schema/shred.js +108 -21
- package/dist/esm/parquetjs/schema/shred.js.map +1 -1
- package/dist/index.d.ts +8 -49
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -6
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts +2 -2
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -1
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts +4 -4
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -1
- package/dist/lib/arrow/convert-schema-from-parquet.js +48 -44
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts +1 -1
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -1
- package/dist/lib/arrow/convert-schema-to-parquet.js +30 -31
- package/dist/lib/geo/decode-geo-metadata.js +12 -8
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts +2 -2
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-columns.js +13 -7
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts +3 -2
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -1
- package/dist/lib/parsers/parse-parquet-to-rows.js +16 -19
- package/dist/lib/wasm/parse-parquet-wasm.d.ts +3 -3
- package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -1
- package/dist/lib/wasm/parse-parquet-wasm.js +3 -3
- package/dist/parquet-loader.d.ts +3 -14
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +14 -2
- package/dist/parquet-worker.js +31 -38
- package/dist/parquet-worker.js.map +3 -3
- package/dist/parquet-writer.d.ts +2 -1
- package/dist/parquet-writer.d.ts.map +1 -1
- package/dist/parquet-writer.js +1 -0
- package/dist/parquetjs/encoder/parquet-encoder.d.ts +4 -4
- package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -1
- package/dist/parquetjs/parser/decoders.d.ts +2 -2
- package/dist/parquetjs/parser/decoders.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.d.ts +6 -6
- package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.js +1 -1
- package/dist/parquetjs/schema/declare.d.ts +6 -5
- package/dist/parquetjs/schema/declare.d.ts.map +1 -1
- package/dist/parquetjs/schema/declare.js +3 -3
- package/dist/parquetjs/schema/schema.d.ts +4 -4
- package/dist/parquetjs/schema/schema.d.ts.map +1 -1
- package/dist/parquetjs/schema/schema.js +5 -5
- package/dist/parquetjs/schema/shred.d.ts +17 -111
- package/dist/parquetjs/schema/shred.d.ts.map +1 -1
- package/dist/parquetjs/schema/shred.js +127 -119
- package/package.json +8 -8
- package/src/index.ts +32 -9
- package/src/lib/arrow/convert-row-group-to-columns.ts +2 -2
- package/src/lib/arrow/convert-schema-from-parquet.ts +56 -66
- package/src/lib/arrow/convert-schema-to-parquet.ts +32 -44
- package/src/lib/geo/decode-geo-metadata.ts +17 -8
- package/src/lib/parsers/parse-parquet-to-columns.ts +22 -11
- package/src/lib/parsers/parse-parquet-to-rows.ts +28 -23
- package/src/lib/wasm/parse-parquet-wasm.ts +7 -7
- package/src/parquet-loader.ts +25 -2
- package/src/parquet-writer.ts +4 -1
- package/src/parquetjs/encoder/parquet-encoder.ts +11 -10
- package/src/parquetjs/parser/decoders.ts +3 -3
- package/src/parquetjs/parser/parquet-reader.ts +7 -7
- package/src/parquetjs/schema/declare.ts +6 -5
- package/src/parquetjs/schema/schema.ts +8 -8
- package/src/parquetjs/schema/shred.ts +142 -103
package/dist/parquet-writer.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { Writer } from '@loaders.gl/loader-utils';
|
|
2
|
+
import { Table, TableBatch } from '@loaders.gl/schema';
|
|
2
3
|
export type ParquetWriterOptions = {};
|
|
3
|
-
export declare const ParquetWriter: Writer
|
|
4
|
+
export declare const ParquetWriter: Writer<Table, TableBatch, ParquetWriterOptions>;
|
|
4
5
|
//# sourceMappingURL=parquet-writer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parquet-writer.d.ts","sourceRoot":"","sources":["../src/parquet-writer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"parquet-writer.d.ts","sourceRoot":"","sources":["../src/parquet-writer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAC,MAAM,EAAC,MAAM,0BAA0B,CAAC;AACrD,OAAO,EAAC,KAAK,EAAE,UAAU,EAAC,MAAM,oBAAoB,CAAC;AAMrD,MAAM,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAItC,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,KAAK,EAAE,UAAU,EAAE,oBAAoB,CAUzE,CAAC"}
|
package/dist/parquet-writer.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/// <reference types="node" />
|
|
2
2
|
/// <reference types="node" />
|
|
3
3
|
import { stream } from '@loaders.gl/loader-utils';
|
|
4
|
-
import {
|
|
4
|
+
import { ParquetRowGroup, ParquetRow } from '../schema/declare';
|
|
5
5
|
import { ParquetSchema } from '../schema/schema';
|
|
6
6
|
import { RowGroup } from '../parquet-thrift';
|
|
7
7
|
export interface ParquetEncoderOptions {
|
|
@@ -34,7 +34,7 @@ export declare class ParquetEncoder<T> {
|
|
|
34
34
|
static openStream<T>(schema: ParquetSchema, outputStream: stream.Writable, opts?: ParquetEncoderOptions): Promise<ParquetEncoder<T>>;
|
|
35
35
|
schema: ParquetSchema;
|
|
36
36
|
envelopeWriter: ParquetEnvelopeWriter;
|
|
37
|
-
rowBuffer:
|
|
37
|
+
rowBuffer: ParquetRowGroup;
|
|
38
38
|
rowGroupSize: number;
|
|
39
39
|
closed: boolean;
|
|
40
40
|
userMetadata: Record<string, string>;
|
|
@@ -47,7 +47,7 @@ export declare class ParquetEncoder<T> {
|
|
|
47
47
|
* Append a single row to the parquet file. Rows are buffered in memory until
|
|
48
48
|
* rowGroupSize rows are in the buffer or close() is called
|
|
49
49
|
*/
|
|
50
|
-
appendRow<T>(row: T): Promise<void>;
|
|
50
|
+
appendRow<T extends ParquetRow>(row: T): Promise<void>;
|
|
51
51
|
/**
|
|
52
52
|
* Finish writing the parquet file and commit the footer to disk. This method
|
|
53
53
|
* MUST be called after you are finished adding rows. You must not call this
|
|
@@ -101,7 +101,7 @@ export declare class ParquetEnvelopeWriter {
|
|
|
101
101
|
* Encode a parquet row group. The records object should be created using the
|
|
102
102
|
* shredRecord method
|
|
103
103
|
*/
|
|
104
|
-
writeRowGroup(records:
|
|
104
|
+
writeRowGroup(records: ParquetRowGroup): Promise<void>;
|
|
105
105
|
/**
|
|
106
106
|
* Write the parquet file footer
|
|
107
107
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parquet-encoder.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/encoder/parquet-encoder.ts"],"names":[],"mappings":";;AAEA,OAAO,EAAC,MAAM,EAAC,MAAM,0BAA0B,CAAC;AAGhD,OAAO,EACL,
|
|
1
|
+
{"version":3,"file":"parquet-encoder.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/encoder/parquet-encoder.ts"],"names":[],"mappings":";;AAEA,OAAO,EAAC,MAAM,EAAC,MAAM,0BAA0B,CAAC;AAGhD,OAAO,EACL,eAAe,EAKf,UAAU,EACX,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAC,aAAa,EAAC,MAAM,kBAAkB,CAAC;AAE/C,OAAO,EAaL,QAAQ,EAGT,MAAM,mBAAmB,CAAC;AA2B3B,MAAM,WAAW,qBAAqB;IACpC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,OAAO,CAAC;IAGxB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;GAIG;AAEH,qBAAa,cAAc,CAAC,CAAC;IAC3B;;;OAGG;WACU,QAAQ,CAAC,CAAC,EACrB,MAAM,EAAE,aAAa,EACrB,IAAI,EAAE,MAAM,EACZ,IAAI,CAAC,EAAE,qBAAqB,GAC3B,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC;IAK7B;;;OAGG;WACU,UAAU,CAAC,CAAC,EACvB,MAAM,EAAE,aAAa,EACrB,YAAY,EAAE,MAAM,CAAC,QAAQ,EAC7B,IAAI,GAAE,qBAA0B,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC;IAKtB,MAAM,EAAE,aAAa,CAAC;IACtB,cAAc,EAAE,qBAAqB,CAAC;IACtC,SAAS,EAAE,eAAe,CAAC;IAC3B,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE5C;;OAEG;gBAED,MAAM,EAAE,aAAa,EACrB,cAAc,EAAE,qBAAqB,EACrC,IAAI,EAAE,qBAAqB;IAcvB,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAUlC;;;OAGG;IACG,SAAS,CAAC,CAAC,SAAS,UAAU,EAAE,GAAG,EAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAW5D;;;;;OAKG;IACG,KAAK,CAAC,QAAQ,CAAC,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAqBjD;;OAEG;IACH,WAAW,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI;IAK7C;;;;;OAKG;IACH,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAIlC;;;OAGG;IACH,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;CAG/B;AAED;;;;;GAKG;AACH,qBAAa,qBAAqB;IAChC;;OAEG;WACU,UAAU,CACrB,MAAM,EAAE,aAAa,EACrB,YAAY,EAAE,MAAM,CAAC,QAAQ,EAC7B,IAAI,EAAE,qBAAqB,GAC1B,OAAO,CAAC,qBAAqB,CAAC;IAM1B,MAAM,EAAE,aAAa,CAAC;IACtB,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACtC,KAAK,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,OAAO,CAAC;gBAG5B,MAAM,EAAE,aAAa,EACrB,OAAO,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,EACvC,OAAO,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,EAC5B,UAAU,EAAE,MAAM,EAClB,IAAI,EAAE,qBAAqB;IAY7B,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAKxC;;OAEG;IACH,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAI5B;;;OAGG;IACG,aAAa,CAAC,OAAO,EAAE,eAAe,GAAG,OAAO,CAAC,IAAI,CAAC;IAY5D;;OAEG;IACH,WAAW,CAAC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAWhE;;;OAGG;IACH,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;CAG/B"}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/// <reference types="node" />
|
|
2
|
-
import {
|
|
2
|
+
import { ParquetColumnChunk, ParquetOptions, ParquetPageData, SchemaDefinition } from '../schema/declare';
|
|
3
3
|
import { CursorBuffer } from '../codecs';
|
|
4
4
|
import { SchemaElement } from '../parquet-thrift';
|
|
5
5
|
/**
|
|
@@ -9,7 +9,7 @@ import { SchemaElement } from '../parquet-thrift';
|
|
|
9
9
|
* @param compression - compression type
|
|
10
10
|
* @returns parquet data page data
|
|
11
11
|
*/
|
|
12
|
-
export declare function decodeDataPages(buffer: Buffer, options: ParquetOptions): Promise<
|
|
12
|
+
export declare function decodeDataPages(buffer: Buffer, options: ParquetOptions): Promise<ParquetColumnChunk>;
|
|
13
13
|
/**
|
|
14
14
|
* Decode parquet page based on page type
|
|
15
15
|
* @param cursor
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"decoders.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parser/decoders.ts"],"names":[],"mappings":";AACA,OAAO,EAEL,
|
|
1
|
+
{"version":3,"file":"decoders.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parser/decoders.ts"],"names":[],"mappings":";AACA,OAAO,EAEL,kBAAkB,EAClB,cAAc,EACd,eAAe,EAGf,gBAAgB,EACjB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAC,YAAY,EAAsC,MAAM,WAAW,CAAC;AAC5E,OAAO,EAML,aAAa,EAEd,MAAM,mBAAmB,CAAC;AAK3B;;;;;;GAMG;AACH,wBAAsB,eAAe,CACnC,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,cAAc,GACtB,OAAO,CAAC,kBAAkB,CAAC,CAmD7B;AAED;;;;GAIG;AACH,wBAAsB,UAAU,CAC9B,MAAM,EAAE,YAAY,EACpB,OAAO,EAAE,cAAc,GACtB,OAAO,CAAC,eAAe,CAAC,CAyB1B;AAED;;;;;;;;;GASG;AACH,wBAAgB,YAAY,CAC1B,cAAc,EAAE,aAAa,EAAE,EAC/B,MAAM,EAAE,MAAM,EACd,GAAG,EAAE,MAAM,GACV;IACD,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,gBAAgB,CAAC;CAC1B,CA4DA"}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import type { ReadableFile } from '@loaders.gl/loader-utils';
|
|
2
2
|
import { ParquetSchema } from '../schema/schema';
|
|
3
3
|
import { ColumnChunk, FileMetaData, RowGroup } from '../parquet-thrift';
|
|
4
|
-
import {
|
|
4
|
+
import { ParquetRowGroup, ParquetColumnChunk, ParquetOptions } from '../schema/declare';
|
|
5
5
|
export type ParquetReaderProps = {
|
|
6
6
|
defaultDictionarySize?: number;
|
|
7
7
|
};
|
|
@@ -23,11 +23,11 @@ export declare class ParquetReader {
|
|
|
23
23
|
constructor(file: ReadableFile, props?: ParquetReaderProps);
|
|
24
24
|
close(): void;
|
|
25
25
|
/** Yield one row at a time */
|
|
26
|
-
rowIterator(props?: ParquetIterationProps): AsyncGenerator<import("../schema/declare").
|
|
26
|
+
rowIterator(props?: ParquetIterationProps): AsyncGenerator<import("../schema/declare").ParquetRow, void, unknown>;
|
|
27
27
|
/** Yield one batch of rows at a time */
|
|
28
|
-
rowBatchIterator(props?: ParquetIterationProps): AsyncGenerator<import("../schema/declare").
|
|
28
|
+
rowBatchIterator(props?: ParquetIterationProps): AsyncGenerator<import("../schema/declare").ParquetRow[], void, unknown>;
|
|
29
29
|
/** Iterate over the raw row groups */
|
|
30
|
-
rowGroupIterator(props?: ParquetIterationProps): AsyncGenerator<
|
|
30
|
+
rowGroupIterator(props?: ParquetIterationProps): AsyncGenerator<ParquetRowGroup, void, unknown>;
|
|
31
31
|
getRowCount(): Promise<number>;
|
|
32
32
|
getSchema(): Promise<ParquetSchema>;
|
|
33
33
|
/**
|
|
@@ -41,11 +41,11 @@ export declare class ParquetReader {
|
|
|
41
41
|
/** Metadata is stored in the footer */
|
|
42
42
|
readFooter(): Promise<FileMetaData>;
|
|
43
43
|
/** Data is stored in row groups (similar to Apache Arrow record batches) */
|
|
44
|
-
readRowGroup(schema: ParquetSchema, rowGroup: RowGroup, columnList: string[][]): Promise<
|
|
44
|
+
readRowGroup(schema: ParquetSchema, rowGroup: RowGroup, columnList: string[][]): Promise<ParquetRowGroup>;
|
|
45
45
|
/**
|
|
46
46
|
* Each row group contains column chunks for all the columns.
|
|
47
47
|
*/
|
|
48
|
-
readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<
|
|
48
|
+
readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<ParquetColumnChunk>;
|
|
49
49
|
/**
|
|
50
50
|
* Getting dictionary for allows to flatten values by indices.
|
|
51
51
|
* @param dictionaryPageOffset
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parquet-reader.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parser/parquet-reader.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,0BAA0B,CAAC;AAE3D,OAAO,EAAC,aAAa,EAAC,MAAM,kBAAkB,CAAC;AAK/C,OAAO,EAAC,WAAW,EAAoB,YAAY,EAAE,QAAQ,EAAO,MAAM,mBAAmB,CAAC;AAC9F,OAAO,EACL,
|
|
1
|
+
{"version":3,"file":"parquet-reader.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parser/parquet-reader.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAC,YAAY,EAAC,MAAM,0BAA0B,CAAC;AAE3D,OAAO,EAAC,aAAa,EAAC,MAAM,kBAAkB,CAAC;AAK/C,OAAO,EAAC,WAAW,EAAoB,YAAY,EAAE,QAAQ,EAAO,MAAM,mBAAmB,CAAC;AAC9F,OAAO,EACL,eAAe,EAEf,kBAAkB,EAElB,cAAc,EACf,MAAM,mBAAmB,CAAC;AAI3B,MAAM,MAAM,kBAAkB,GAAG;IAC/B,qBAAqB,CAAC,EAAE,MAAM,CAAC;CAChC,CAAC;AAEF,0DAA0D;AAC1D,MAAM,MAAM,qBAAqB,GAAG;IAClC,iDAAiD;IACjD,UAAU,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,EAAE,CAAC;CACpC,CAAC;AAMF;;;;;GAKG;AACH,qBAAa,aAAa;IACxB,KAAK,EAAE,QAAQ,CAAC,kBAAkB,CAAC,CAAC;IACpC,IAAI,EAAE,YAAY,CAAC;IACnB,QAAQ,EAAE,OAAO,CAAC,YAAY,CAAC,GAAG,IAAI,CAAQ;gBAElC,IAAI,EAAE,YAAY,EAAE,KAAK,CAAC,EAAE,kBAAkB;IAK1D,KAAK,IAAI,IAAI;IAOb,8BAA8B;IACvB,WAAW,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAShD,wCAAwC;IACjC,gBAAgB,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAOrD,sCAAsC;IAC/B,gBAAgB,CAAC,KAAK,CAAC,EAAE,qBAAqB;IAqB/C,WAAW,IAAI,OAAO,CAAC,MAAM,CAAC;IAK9B,SAAS,IAAI,OAAO,CAAC,aAAa,CAAC;IAQzC;;;OAGG;IACG,iBAAiB,IAAI,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IASpD,eAAe,IAAI,OAAO,CAAC,YAAY,CAAC;IAU9C,uCAAuC;IACjC,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAajC,uCAAuC;IACjC,UAAU,IAAI,OAAO,CAAC,YAAY,CAAC;IAsBzC,4EAA4E;IACtE,YAAY,CAChB,MAAM,EAAE,aAAa,EACrB,QAAQ,EAAE,QAAQ,EAClB,UAAU,EAAE,MAAM,EAAE,EAAE,GACrB,OAAO,CAAC,eAAe,CAAC;IAgB3B;;OAEG;IACG,eAAe,CAAC,MAAM,EAAE,aAAa,EAAE,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAoDhG;;;;;;OAMG;IACG,aAAa,CACjB,oBAAoB,EAAE,MAAM,EAC5B,OAAO,EAAE,cAAc,EACvB,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,MAAM,EAAE,CAAC;CAuBrB"}
|
|
@@ -41,7 +41,7 @@ class ParquetReader {
|
|
|
41
41
|
async *rowBatchIterator(props) {
|
|
42
42
|
const schema = await this.getSchema();
|
|
43
43
|
for await (const rowGroup of this.rowGroupIterator(props)) {
|
|
44
|
-
yield (0, shred_1.
|
|
44
|
+
yield (0, shred_1.materializeRows)(schema, rowGroup);
|
|
45
45
|
}
|
|
46
46
|
}
|
|
47
47
|
/** Iterate over the raw row groups */
|
|
@@ -65,19 +65,20 @@ export interface ParquetPageData {
|
|
|
65
65
|
/** The "raw" page header from the file */
|
|
66
66
|
pageHeader: PageHeader;
|
|
67
67
|
}
|
|
68
|
-
export interface
|
|
68
|
+
export interface ParquetRow {
|
|
69
69
|
[key: string]: any;
|
|
70
70
|
}
|
|
71
71
|
/** @
|
|
72
72
|
* Holds data for one row group (column chunks) */
|
|
73
|
-
export declare class
|
|
73
|
+
export declare class ParquetRowGroup {
|
|
74
74
|
/** Number of rows in this page */
|
|
75
75
|
rowCount: number;
|
|
76
|
-
|
|
77
|
-
|
|
76
|
+
/** Map of Column chunks */
|
|
77
|
+
columnData: Record<string, ParquetColumnChunk>;
|
|
78
|
+
constructor(rowCount?: number, columnData?: Record<string, ParquetColumnChunk>);
|
|
78
79
|
}
|
|
79
80
|
/** Holds the data for one column chunk */
|
|
80
|
-
export interface
|
|
81
|
+
export interface ParquetColumnChunk {
|
|
81
82
|
dlevels: number[];
|
|
82
83
|
rlevels: number[];
|
|
83
84
|
values: any[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"declare.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/declare.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,MAAM,YAAY,CAAC;AAC/B,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,mBAAmB,CAAC;AAElD,MAAM,MAAM,YAAY,GAAG,OAAO,GAAG,KAAK,GAAG,kBAAkB,CAAC;AAChE,MAAM,MAAM,kBAAkB,GAC1B,cAAc,GACd,MAAM,GACN,QAAQ,GACR,KAAK,GACL,QAAQ,GACR,KAAK,GACL,SAAS,GACT,MAAM,CAAC;AACX,MAAM,MAAM,cAAc,GAAG,UAAU,GAAG,UAAU,GAAG,UAAU,CAAC;AAClE,MAAM,MAAM,WAAW,GAAG,aAAa,GAAG,YAAY,CAAC;AAEvD;;GAEG;AACH,MAAM,MAAM,aAAa,GAErB,SAAS,GACT,OAAO,GACP,OAAO,GACP,OAAO,GACP,OAAO,GACP,QAAQ,GACR,YAAY,GACZ,sBAAsB,CAAC;AAE3B;;GAEG;AACH,MAAM,MAAM,YAAY,GAEpB,MAAM,GAMN,eAAe,GACf,eAAe,GACf,oBAAoB,GACpB,8BAA8B,GAC9B,MAAM,GACN,aAAa,GACb,aAAa,GACb,kBAAkB,GAClB,kBAAkB,GAClB,QAAQ,GACR,SAAS,GACT,SAAS,GACT,SAAS,GACT,OAAO,GACP,QAAQ,GACR,QAAQ,GACR,QAAQ,GACR,MAAM,GACN,MAAM,GACN,UAAU,CAAC;AAEf,MAAM,MAAM,iBAAiB,GAAG,MAAM,EAAE,CAAC;AAEzC,MAAM,WAAW,gBAAgB;IAC/B,CAAC,MAAM,EAAE,MAAM,GAAG,eAAe,CAAC;CACnC;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,CAAC,EAAE,WAAW,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,YAAY,CAAC;IACxB,WAAW,CAAC,EAAE,kBAAkB,CAAC;IACjC,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,MAAM,CAAC,EAAE,gBAAgB,CAAC;CAC3B;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,cAAc,EAAE,cAAc,CAAC;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,YAAY,CAAC;IACxB,WAAW,CAAC,EAAE,kBAAkB,CAAC;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;CACvC;AAED,mDAAmD;AACnD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,kBAAkB,CAAC;IAChC,MAAM,EAAE,YAAY,CAAC;IACrB,SAAS,CAAC,EAAE,KAAK,CAAC;IAClB,UAAU,CAAC,EAAE,iBAAiB,CAAC;CAChC;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,2BAA2B;IAC3B,MAAM,EAAE,GAAG,EAAE,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,iBAAiB,CAAC;IAC/B,0CAA0C;IAC1C,UAAU,EAAE,UAAU,CAAC;CACxB;AAED,MAAM,WAAW,
|
|
1
|
+
{"version":3,"file":"declare.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/declare.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,MAAM,YAAY,CAAC;AAC/B,OAAO,KAAK,EAAC,UAAU,EAAC,MAAM,mBAAmB,CAAC;AAElD,MAAM,MAAM,YAAY,GAAG,OAAO,GAAG,KAAK,GAAG,kBAAkB,CAAC;AAChE,MAAM,MAAM,kBAAkB,GAC1B,cAAc,GACd,MAAM,GACN,QAAQ,GACR,KAAK,GACL,QAAQ,GACR,KAAK,GACL,SAAS,GACT,MAAM,CAAC;AACX,MAAM,MAAM,cAAc,GAAG,UAAU,GAAG,UAAU,GAAG,UAAU,CAAC;AAClE,MAAM,MAAM,WAAW,GAAG,aAAa,GAAG,YAAY,CAAC;AAEvD;;GAEG;AACH,MAAM,MAAM,aAAa,GAErB,SAAS,GACT,OAAO,GACP,OAAO,GACP,OAAO,GACP,OAAO,GACP,QAAQ,GACR,YAAY,GACZ,sBAAsB,CAAC;AAE3B;;GAEG;AACH,MAAM,MAAM,YAAY,GAEpB,MAAM,GAMN,eAAe,GACf,eAAe,GACf,oBAAoB,GACpB,8BAA8B,GAC9B,MAAM,GACN,aAAa,GACb,aAAa,GACb,kBAAkB,GAClB,kBAAkB,GAClB,QAAQ,GACR,SAAS,GACT,SAAS,GACT,SAAS,GACT,OAAO,GACP,QAAQ,GACR,QAAQ,GACR,QAAQ,GACR,MAAM,GACN,MAAM,GACN,UAAU,CAAC;AAEf,MAAM,MAAM,iBAAiB,GAAG,MAAM,EAAE,CAAC;AAEzC,MAAM,WAAW,gBAAgB;IAC/B,CAAC,MAAM,EAAE,MAAM,GAAG,eAAe,CAAC;CACnC;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,CAAC,EAAE,WAAW,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,YAAY,CAAC;IACxB,WAAW,CAAC,EAAE,kBAAkB,CAAC;IACjC,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,MAAM,CAAC,EAAE,gBAAgB,CAAC;CAC3B;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,GAAG,EAAE,MAAM,CAAC;IACZ,aAAa,CAAC,EAAE,aAAa,CAAC;IAC9B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,cAAc,EAAE,cAAc,CAAC;IAC/B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,YAAY,CAAC;IACxB,WAAW,CAAC,EAAE,kBAAkB,CAAC;IACjC,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;CACvC;AAED,mDAAmD;AACnD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,WAAW,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,kBAAkB,CAAC;IAChC,MAAM,EAAE,YAAY,CAAC;IACrB,SAAS,CAAC,EAAE,KAAK,CAAC;IAClB,UAAU,CAAC,EAAE,iBAAiB,CAAC;CAChC;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,2BAA2B;IAC3B,MAAM,EAAE,GAAG,EAAE,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,CAAC,EAAE,iBAAiB,CAAC;IAC/B,0CAA0C;IAC1C,UAAU,EAAE,UAAU,CAAC;CACxB;AAED,MAAM,WAAW,UAAU;IACzB,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAC;CACpB;AAED;kDACkD;AAClD,qBAAa,eAAe;IAC1B,kCAAkC;IAClC,QAAQ,EAAE,MAAM,CAAC;IACjB,2BAA2B;IAC3B,UAAU,EAAE,MAAM,CAAC,MAAM,EAAE,kBAAkB,CAAC,CAAC;gBAEnC,QAAQ,GAAE,MAAU,EAAE,UAAU,GAAE,MAAM,CAAC,MAAM,EAAE,kBAAkB,CAAM;CAItF;AAED,0CAA0C;AAC1C,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,MAAM,EAAE,GAAG,EAAE,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,UAAU,EAAE,CAAC;CAC3B"}
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
3
|
+
exports.ParquetRowGroup = void 0;
|
|
4
4
|
/** @
|
|
5
5
|
* Holds data for one row group (column chunks) */
|
|
6
|
-
class
|
|
6
|
+
class ParquetRowGroup {
|
|
7
7
|
constructor(rowCount = 0, columnData = {}) {
|
|
8
8
|
this.rowCount = rowCount;
|
|
9
9
|
this.columnData = columnData;
|
|
10
10
|
}
|
|
11
11
|
}
|
|
12
|
-
exports.
|
|
12
|
+
exports.ParquetRowGroup = ParquetRowGroup;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { FieldDefinition,
|
|
1
|
+
import { FieldDefinition, ParquetRowGroup, ParquetCompression, ParquetField, ParquetRow, SchemaDefinition } from './declare';
|
|
2
2
|
/**
|
|
3
3
|
* A parquet file schema
|
|
4
4
|
*/
|
|
@@ -18,9 +18,9 @@ export declare class ParquetSchema {
|
|
|
18
18
|
* Retrieve a field definition and all the field's ancestors
|
|
19
19
|
*/
|
|
20
20
|
findFieldBranch(path: string | string[]): ParquetField[];
|
|
21
|
-
shredRecord(
|
|
22
|
-
|
|
21
|
+
shredRecord(row: ParquetRow, rowGroup: ParquetRowGroup): void;
|
|
22
|
+
materializeRows(rowGroup: ParquetRowGroup): ParquetRow[];
|
|
23
23
|
compress(type: ParquetCompression): this;
|
|
24
|
-
|
|
24
|
+
rowGroup(): ParquetRowGroup;
|
|
25
25
|
}
|
|
26
26
|
//# sourceMappingURL=schema.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/schema.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,eAAe,EACf,
|
|
1
|
+
{"version":3,"file":"schema.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/schema.ts"],"names":[],"mappings":"AAIA,OAAO,EACL,eAAe,EACf,eAAe,EACf,kBAAkB,EAClB,YAAY,EACZ,UAAU,EAEV,gBAAgB,EACjB,MAAM,WAAW,CAAC;AAInB;;GAEG;AACH,qBAAa,aAAa;IACjB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,CAAC;IACxC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,YAAY,CAAC,CAAC;IACrC,SAAS,EAAE,YAAY,EAAE,CAAC;IAEjC;;OAEG;gBACS,MAAM,EAAE,gBAAgB;IAMpC;;OAEG;IACH,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,YAAY;IAiBhD;;OAEG;IACH,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,GAAG,YAAY,EAAE;IAgBxD,WAAW,CAAC,GAAG,EAAE,UAAU,EAAE,QAAQ,EAAE,eAAe,GAAG,IAAI;IAI7D,eAAe,CAAC,QAAQ,EAAE,eAAe,GAAG,UAAU,EAAE;IAIxD,QAAQ,CAAC,IAAI,EAAE,kBAAkB,GAAG,IAAI;IAMxC,QAAQ,IAAI,eAAe;CAG5B"}
|
|
@@ -54,18 +54,18 @@ class ParquetSchema {
|
|
|
54
54
|
}
|
|
55
55
|
return branch;
|
|
56
56
|
}
|
|
57
|
-
shredRecord(
|
|
58
|
-
(0, shred_1.shredRecord)(this,
|
|
57
|
+
shredRecord(row, rowGroup) {
|
|
58
|
+
(0, shred_1.shredRecord)(this, row, rowGroup);
|
|
59
59
|
}
|
|
60
|
-
|
|
61
|
-
return (0, shred_1.
|
|
60
|
+
materializeRows(rowGroup) {
|
|
61
|
+
return (0, shred_1.materializeRows)(this, rowGroup);
|
|
62
62
|
}
|
|
63
63
|
compress(type) {
|
|
64
64
|
setCompress(this.schema, type);
|
|
65
65
|
setCompress(this.fields, type);
|
|
66
66
|
return this;
|
|
67
67
|
}
|
|
68
|
-
|
|
68
|
+
rowGroup() {
|
|
69
69
|
return (0, shred_1.shredBuffer)(this);
|
|
70
70
|
}
|
|
71
71
|
}
|
|
@@ -1,19 +1,20 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { ArrayType } from '@loaders.gl/schema';
|
|
2
|
+
import { ParquetRowGroup, ParquetRow } from './declare';
|
|
2
3
|
import { ParquetSchema } from './schema';
|
|
3
|
-
export {
|
|
4
|
-
export declare function shredBuffer(schema: ParquetSchema):
|
|
4
|
+
export { ParquetRowGroup };
|
|
5
|
+
export declare function shredBuffer(schema: ParquetSchema): ParquetRowGroup;
|
|
5
6
|
/**
|
|
6
7
|
* 'Shred' a record into a list of <value, repetition_level, definition_level>
|
|
7
8
|
* tuples per column using the Google Dremel Algorithm..
|
|
8
9
|
*
|
|
9
|
-
* The
|
|
10
|
-
* will be returned. You may re-use the
|
|
11
|
-
* to append to an existing
|
|
10
|
+
* The rowGroup argument must point to an object into which the shredded record
|
|
11
|
+
* will be returned. You may re-use the rowGroup for repeated calls to this function
|
|
12
|
+
* to append to an existing rowGroup, as long as the schema is unchanged.
|
|
12
13
|
*
|
|
13
|
-
* The format in which the shredded records will be stored in the
|
|
14
|
+
* The format in which the shredded records will be stored in the rowGroup is as
|
|
14
15
|
* follows:
|
|
15
16
|
*
|
|
16
|
-
*
|
|
17
|
+
* rowGroup = {
|
|
17
18
|
* columnData: [
|
|
18
19
|
* 'my_col': {
|
|
19
20
|
* dlevels: [d1, d2, .. dN],
|
|
@@ -24,16 +25,16 @@ export declare function shredBuffer(schema: ParquetSchema): ParquetBuffer;
|
|
|
24
25
|
* rowCount: X,
|
|
25
26
|
* }
|
|
26
27
|
*/
|
|
27
|
-
export declare function shredRecord(schema: ParquetSchema, record:
|
|
28
|
+
export declare function shredRecord(schema: ParquetSchema, record: ParquetRow, rowGroup: ParquetRowGroup): void;
|
|
28
29
|
/**
|
|
29
30
|
* 'Materialize' a list of <value, repetition_level, definition_level>
|
|
30
31
|
* tuples back to nested records (objects/arrays) using the Google Dremel
|
|
31
32
|
* Algorithm..
|
|
32
33
|
*
|
|
33
|
-
* The
|
|
34
|
+
* The rowGroup argument must point to an object with the following structure (i.e.
|
|
34
35
|
* the same structure that is returned by shredRecords):
|
|
35
36
|
*
|
|
36
|
-
*
|
|
37
|
+
* rowGroup = {
|
|
37
38
|
* columnData: [
|
|
38
39
|
* 'my_col': {
|
|
39
40
|
* dlevels: [d1, d2, .. dN],
|
|
@@ -44,16 +45,16 @@ export declare function shredRecord(schema: ParquetSchema, record: any, buffer:
|
|
|
44
45
|
* rowCount: X,
|
|
45
46
|
* }
|
|
46
47
|
*/
|
|
47
|
-
export declare function
|
|
48
|
+
export declare function materializeRows(schema: ParquetSchema, rowGroup: ParquetRowGroup): ParquetRow[];
|
|
48
49
|
/**
|
|
49
50
|
* 'Materialize' a list of <value, repetition_level, definition_level>
|
|
50
51
|
* tuples back to nested records (objects/arrays) using the Google Dremel
|
|
51
52
|
* Algorithm..
|
|
52
53
|
*
|
|
53
|
-
* The
|
|
54
|
+
* The rowGroup argument must point to an object with the following structure (i.e.
|
|
54
55
|
* the same structure that is returned by shredRecords):
|
|
55
56
|
*
|
|
56
|
-
*
|
|
57
|
+
* rowGroup = {
|
|
57
58
|
* columnData: [
|
|
58
59
|
* 'my_col': {
|
|
59
60
|
* dlevels: [d1, d2, .. dN],
|
|
@@ -63,101 +64,6 @@ export declare function materializeRecords(schema: ParquetSchema, buffer: Parque
|
|
|
63
64
|
* ],
|
|
64
65
|
* rowCount: X,
|
|
65
66
|
* }
|
|
66
|
-
|
|
67
|
-
export function
|
|
68
|
-
const columns: ParquetRecord = {};
|
|
69
|
-
for (const key in buffer.columnData) {
|
|
70
|
-
const columnData = buffer.columnData[key];
|
|
71
|
-
if (columnData.count) {
|
|
72
|
-
extractColumn(schema, columnData, key, columns);
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
return columns;
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
// eslint-disable-next-line max-statements, complexity
|
|
79
|
-
function extractColumn(
|
|
80
|
-
schema: ParquetSchema,
|
|
81
|
-
columnData: ParquetData,
|
|
82
|
-
key: string,
|
|
83
|
-
columns: Record<string, unknown>
|
|
84
|
-
) {
|
|
85
|
-
if (columnData.count <= 0) {
|
|
86
|
-
return;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
const record = columns;
|
|
90
|
-
|
|
91
|
-
const field = schema.findField(key);
|
|
92
|
-
const branch = schema.findFieldBranch(key);
|
|
93
|
-
|
|
94
|
-
// tslint:disable-next-line:prefer-array-literal
|
|
95
|
-
const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);
|
|
96
|
-
let vIndex = 0;
|
|
97
|
-
|
|
98
|
-
let i = 0;
|
|
99
|
-
const dLevel = columnData.dlevels[i];
|
|
100
|
-
const rLevel = columnData.rlevels[i];
|
|
101
|
-
rLevels[rLevel]++;
|
|
102
|
-
rLevels.fill(0, rLevel + 1);
|
|
103
|
-
|
|
104
|
-
let rIndex = 0;
|
|
105
|
-
let record = records[rLevels[rIndex++] - 1];
|
|
106
|
-
|
|
107
|
-
// Internal nodes
|
|
108
|
-
for (const step of branch) {
|
|
109
|
-
if (step === field || dLevel < step.dLevelMax) {
|
|
110
|
-
break;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
switch (step.repetitionType) {
|
|
114
|
-
case 'REPEATED':
|
|
115
|
-
if (!(step.name in record)) {
|
|
116
|
-
// eslint-disable max-depth
|
|
117
|
-
record[step.name] = [];
|
|
118
|
-
}
|
|
119
|
-
const ix = rLevels[rIndex++];
|
|
120
|
-
while (record[step.name].length <= ix) {
|
|
121
|
-
// eslint-disable max-depth
|
|
122
|
-
record[step.name].push({});
|
|
123
|
-
}
|
|
124
|
-
record = record[step.name][ix];
|
|
125
|
-
break;
|
|
126
|
-
|
|
127
|
-
default:
|
|
128
|
-
record[step.name] = record[step.name] || {};
|
|
129
|
-
record = record[step.name];
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
// Leaf node
|
|
134
|
-
if (dLevel === field.dLevelMax) {
|
|
135
|
-
const value = Types.fromPrimitive(
|
|
136
|
-
// @ts-ignore
|
|
137
|
-
field.originalType || field.primitiveType,
|
|
138
|
-
columnData.values[vIndex],
|
|
139
|
-
field
|
|
140
|
-
);
|
|
141
|
-
vIndex++;
|
|
142
|
-
|
|
143
|
-
switch (field.repetitionType) {
|
|
144
|
-
case 'REPEATED':
|
|
145
|
-
if (!(field.name in record)) {
|
|
146
|
-
// eslint-disable max-depth
|
|
147
|
-
record[field.name] = [];
|
|
148
|
-
}
|
|
149
|
-
const ix = rLevels[rIndex];
|
|
150
|
-
while (record[field.name].length <= ix) {
|
|
151
|
-
// eslint-disable max-depth
|
|
152
|
-
record[field.name].push(null);
|
|
153
|
-
}
|
|
154
|
-
record[field.name][ix] = value;
|
|
155
|
-
break;
|
|
156
|
-
|
|
157
|
-
default:
|
|
158
|
-
record[field.name] = value;
|
|
159
|
-
}
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
*/
|
|
67
|
+
*/
|
|
68
|
+
export declare function materializeColumns(schema: ParquetSchema, rowGroup: ParquetRowGroup): Record<string, ArrayType>;
|
|
163
69
|
//# sourceMappingURL=shred.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"shred.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/shred.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,
|
|
1
|
+
{"version":3,"file":"shred.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/schema/shred.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,SAAS,EAAC,MAAM,oBAAoB,CAAC;AAC7C,OAAO,EAAC,eAAe,EAAoC,UAAU,EAAC,MAAM,WAAW,CAAC;AACxF,OAAO,EAAC,aAAa,EAAC,MAAM,UAAU,CAAC;AAGvC,OAAO,EAAC,eAAe,EAAC,CAAC;AAEzB,wBAAgB,WAAW,CAAC,MAAM,EAAE,aAAa,GAAG,eAAe,CAYlE;AAED;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,wBAAgB,WAAW,CACzB,MAAM,EAAE,aAAa,EACrB,MAAM,EAAE,UAAU,EAClB,QAAQ,EAAE,eAAe,GACxB,IAAI,CAmBN;AAgED;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,aAAa,EAAE,QAAQ,EAAE,eAAe,GAAG,UAAU,EAAE,CAa9F;AAoFD;;;;;;;;;;;;;;;;;;GAkBG;AACH,wBAAgB,kBAAkB,CAChC,MAAM,EAAE,aAAa,EACrB,QAAQ,EAAE,eAAe,GACxB,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAS3B"}
|