@loaders.gl/parquet 3.0.12 → 3.1.0-alpha.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +7 -18
- package/dist/dist.min.js.map +1 -1
- package/dist/es5/bundle.js +2 -4
- package/dist/es5/bundle.js.map +1 -1
- package/dist/es5/constants.js +17 -0
- package/dist/es5/constants.js.map +1 -0
- package/dist/es5/index.js +53 -21
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/convert-schema.js +82 -0
- package/dist/es5/lib/convert-schema.js.map +1 -0
- package/dist/es5/lib/parse-parquet.js +173 -0
- package/dist/es5/lib/parse-parquet.js.map +1 -0
- package/dist/es5/lib/read-array-buffer.js +53 -0
- package/dist/es5/lib/read-array-buffer.js.map +1 -0
- package/dist/es5/parquet-loader.js +6 -79
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-writer.js +1 -1
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/codecs/dictionary.js +30 -0
- package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -0
- package/dist/es5/parquetjs/codecs/index.js +10 -0
- package/dist/es5/parquetjs/codecs/index.js.map +1 -1
- package/dist/es5/parquetjs/codecs/rle.js +2 -2
- package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
- package/dist/es5/parquetjs/compression.js +138 -104
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/{writer.js → encoder/writer.js} +397 -228
- package/dist/es5/parquetjs/encoder/writer.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
- package/dist/es5/parquetjs/parser/decoders.js +495 -0
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-cursor.js +215 -0
- package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +452 -0
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js +413 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -0
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/schema.js +2 -0
- package/dist/es5/parquetjs/schema/schema.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +2 -1
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js +79 -4
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js +21 -0
- package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -0
- package/dist/es5/parquetjs/utils/file-utils.js +108 -0
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -0
- package/dist/es5/parquetjs/{util.js → utils/read-utils.js} +13 -113
- package/dist/es5/parquetjs/utils/read-utils.js.map +1 -0
- package/dist/esm/bundle.js +2 -4
- package/dist/esm/bundle.js.map +1 -1
- package/dist/esm/constants.js +6 -0
- package/dist/esm/constants.js.map +1 -0
- package/dist/esm/index.js +14 -4
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/convert-schema.js +71 -0
- package/dist/esm/lib/convert-schema.js.map +1 -0
- package/dist/esm/lib/parse-parquet.js +28 -0
- package/dist/esm/lib/parse-parquet.js.map +1 -0
- package/dist/esm/lib/read-array-buffer.js +9 -0
- package/dist/esm/lib/read-array-buffer.js.map +1 -0
- package/dist/esm/parquet-loader.js +4 -24
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquet-writer.js.map +1 -1
- package/dist/esm/parquetjs/codecs/dictionary.js +12 -0
- package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -0
- package/dist/esm/parquetjs/codecs/index.js +9 -0
- package/dist/esm/parquetjs/codecs/index.js.map +1 -1
- package/dist/esm/parquetjs/codecs/rle.js +2 -2
- package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
- package/dist/esm/parquetjs/compression.js +54 -105
- package/dist/esm/parquetjs/compression.js.map +1 -1
- package/dist/esm/parquetjs/{writer.js → encoder/writer.js} +32 -35
- package/dist/esm/parquetjs/encoder/writer.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
- package/dist/esm/parquetjs/parser/decoders.js +300 -0
- package/dist/esm/parquetjs/parser/decoders.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-cursor.js +90 -0
- package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +164 -0
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-reader.js +133 -0
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -0
- package/dist/esm/parquetjs/schema/declare.js.map +1 -1
- package/dist/esm/parquetjs/schema/schema.js +2 -0
- package/dist/esm/parquetjs/schema/schema.js.map +1 -1
- package/dist/esm/parquetjs/schema/shred.js +2 -1
- package/dist/esm/parquetjs/schema/shred.js.map +1 -1
- package/dist/esm/parquetjs/schema/types.js +78 -4
- package/dist/esm/parquetjs/schema/types.js.map +1 -1
- package/dist/esm/parquetjs/utils/buffer-utils.js +12 -0
- package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -0
- package/dist/esm/parquetjs/utils/file-utils.js +79 -0
- package/dist/esm/parquetjs/utils/file-utils.js.map +1 -0
- package/dist/esm/parquetjs/{util.js → utils/read-utils.js} +11 -89
- package/dist/esm/parquetjs/utils/read-utils.js.map +1 -0
- package/dist/parquet-worker.js +7 -18
- package/dist/parquet-worker.js.map +1 -1
- package/package.json +10 -10
- package/src/bundle.ts +2 -3
- package/src/constants.ts +17 -0
- package/src/index.ts +30 -4
- package/src/lib/convert-schema.ts +95 -0
- package/src/lib/parse-parquet.ts +27 -0
- package/{dist/es5/libs → src/lib}/read-array-buffer.ts +0 -0
- package/src/parquet-loader.ts +4 -24
- package/src/parquetjs/codecs/dictionary.ts +11 -0
- package/src/parquetjs/codecs/index.ts +13 -0
- package/src/parquetjs/codecs/rle.ts +4 -2
- package/src/parquetjs/compression.ts +89 -50
- package/src/parquetjs/{writer.ts → encoder/writer.ts} +46 -45
- package/src/parquetjs/parquet-thrift/CompressionCodec.ts +2 -1
- package/src/parquetjs/parser/decoders.ts +448 -0
- package/src/parquetjs/parser/parquet-cursor.ts +94 -0
- package/src/parquetjs/parser/parquet-envelope-reader.ts +210 -0
- package/src/parquetjs/parser/parquet-reader.ts +179 -0
- package/src/parquetjs/schema/declare.ts +48 -2
- package/src/parquetjs/schema/schema.ts +2 -0
- package/src/parquetjs/schema/shred.ts +3 -1
- package/src/parquetjs/schema/types.ts +82 -5
- package/src/parquetjs/utils/buffer-utils.ts +18 -0
- package/src/parquetjs/utils/file-utils.ts +96 -0
- package/src/parquetjs/{util.ts → utils/read-utils.ts} +13 -110
- package/dist/dist.es5.min.js +0 -51
- package/dist/dist.es5.min.js.map +0 -1
- package/dist/es5/parquetjs/compression.ts.disabled +0 -105
- package/dist/es5/parquetjs/reader.js +0 -1078
- package/dist/es5/parquetjs/reader.js.map +0 -1
- package/dist/es5/parquetjs/util.js.map +0 -1
- package/dist/es5/parquetjs/writer.js.map +0 -1
- package/dist/esm/libs/read-array-buffer.ts +0 -31
- package/dist/esm/parquetjs/compression.ts.disabled +0 -105
- package/dist/esm/parquetjs/reader.js +0 -524
- package/dist/esm/parquetjs/reader.js.map +0 -1
- package/dist/esm/parquetjs/util.js.map +0 -1
- package/dist/esm/parquetjs/writer.js.map +0 -1
- package/src/libs/read-array-buffer.ts +0 -31
- package/src/parquetjs/compression.ts.disabled +0 -105
- package/src/parquetjs/reader.ts +0 -707
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../../src/parquetjs/encoder/writer.ts"],"names":["PARQUET_MAGIC","PARQUET_VERSION","PARQUET_DEFAULT_PAGE_SIZE","PARQUET_DEFAULT_ROW_GROUP_SIZE","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","ParquetWriter","schema","envelopeWriter","opts","rowBuffer","rowGroupSize","closed","userMetadata","writeHeader","close","row","Error","Shred","shredRecord","rowCount","callback","writeFooter","key","value","String","cnt","setPageSize","path","outputStream","openStream","ParquetEnvelopeWriter","writeFn","closeFn","fileOffset","write","offset","rowGroups","pageSize","useDataPageV2","Boolean","buf","length","writeSection","Buffer","from","records","encodeRowGroup","baseOffset","rgroup","push","metadata","body","encodeFooter","oswrite","bind","undefined","osclose","ParquetTransformer","objectMode","writeProxy","t","b","writer","encoding","appendRow","then","Promise","resolve","Transform","encodeValues","type","values","PARQUET_CODECS","encodeDataPage","column","data","rLevelsBuf","alloc","rLevelMax","rlevels","bitWidth","dLevelsBuf","dLevelMax","dlevels","valuesBuf","primitiveType","typeLength","dataBuf","concat","Compression","deflate","compression","compressedBuf","header","PageHeader","PageType","DATA_PAGE","data_page_header","DataPageHeader","num_values","count","Encoding","definition_level_encoding","repetition_level_encoding","uncompressed_page_size","compressed_page_size","headerBuf","page","headerSize","encodeDataPageV2","disableEnvelope","DATA_PAGE_V2","data_page_header_v2","DataPageHeaderV2","num_nulls","num_rows","definition_levels_byte_length","repetition_levels_byte_length","is_compressed","encodeColumnChunk","buffer","columnData","join","total_uncompressed_size","total_compressed_size","result","pageBuf","ColumnMetaData","path_in_schema","data_page_offset","encodings","Type","codec","CompressionCodec","metadataOffset","RowGroup","columns","total_byte_size","fieldList","field","isNested","cchunkData","cchunk","ColumnChunk","file_offset","meta_data","Int64","Number","FileMetaData","version","created_by","row_groups","key_value_metadata","kv","KeyValue","schemaRoot","SchemaElement","name","num_children","Object","keys","fields","relt","FieldRepetitionType","repetitionType","schemaElem","repetition_type","fieldCount","originalType","converted_type","ConvertedType","type_length","metadataEncoded","footerEncoded","copy","writeUInt32LE"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEA;;AACA;;AACA;;AASA;;AACA;;AAiBA;;AACA;;AACA;;;;;;;;;;;;;;;;AAKA,IAAMA,aAAa,GAAG,MAAtB;AAKA,IAAMC,eAAe,GAAG,CAAxB;AAKA,IAAMC,yBAAyB,GAAG,IAAlC;AACA,IAAMC,8BAA8B,GAAG,IAAvC;AAKA,IAAMC,kBAAkB,GAAG,OAA3B;AACA,IAAMC,sBAAsB,GAAG,KAA/B;;IAuBaC,a;AA2CX,yBACEC,MADF,EAEEC,cAFF,EAGEC,IAHF,EAIE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AACA,SAAKF,MAAL,GAAcA,MAAd;AACA,SAAKC,cAAL,GAAsBA,cAAtB;AAEA,SAAKE,SAAL,GAAiB,EAAjB;AACA,SAAKC,YAAL,GAAoBF,IAAI,CAACE,YAAL,IAAqBR,8BAAzC;AACA,SAAKS,MAAL,GAAc,KAAd;AACA,SAAKC,YAAL,GAAoB,EAApB;AAGA,SAAKC,WAAL;AACD;;;;;mFAED;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,uBAGU,KAAKN,cAAL,CAAoBM,WAApB,EAHV;;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA,uBAKU,KAAKN,cAAL,CAAoBO,KAApB,EALV;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;;iFAcA,kBAAmBC,GAAnB;AAAA;AAAA;AAAA;AAAA;AAAA,qBACM,KAAKJ,MADX;AAAA;AAAA;AAAA;;AAAA,sBAEU,IAAIK,KAAJ,CAAU,mBAAV,CAFV;;AAAA;AAIEC,gBAAAA,KAAK,CAACC,WAAN,CAAkB,KAAKZ,MAAvB,EAA+BS,GAA/B,EAAoC,KAAKN,SAAzC;;AACA,oBAAI,KAAKA,SAAL,CAAeU,QAAf,IAA2B,KAAKT,YAApC,EAAkD;AAEhD,uBAAKD,SAAL,GAAiB,EAAjB;AACD;;AARH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;;6EAiBA,kBAAYW,QAAZ;AAAA;AAAA;AAAA;AAAA;AAAA,qBACM,KAAKT,MADX;AAAA;AAAA;AAAA;;AAAA,sBAEU,IAAIK,KAAJ,CAAU,mBAAV,CAFV;;AAAA;AAKE,qBAAKL,MAAL,GAAc,IAAd;;AAEA,oBAAI,KAAKF,SAAL,CAAeU,QAAf,GAA0B,CAA1B,IAA+B,KAAKV,SAAL,CAAeU,QAAf,IAA2B,KAAKT,YAAnE,EAAiF;AAE/E,uBAAKD,SAAL,GAAiB,EAAjB;AACD;;AAVH;AAAA,uBAYQ,KAAKF,cAAL,CAAoBc,WAApB,CAAgC,KAAKT,YAArC,CAZR;;AAAA;AAAA;AAAA,uBAaQ,KAAKL,cAAL,CAAoBO,KAApB,EAbR;;AAAA;AAgBE,oBAAIM,QAAJ,EAAc;AACZA,kBAAAA,QAAQ;AACT;;AAlBH;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;WAwBA,qBAAYE,GAAZ,EAAyBC,KAAzB,EAA8C;AAE5C,WAAKX,YAAL,CAAkBY,MAAM,CAACF,GAAD,CAAxB,IAAiCE,MAAM,CAACD,KAAD,CAAvC;AACD;;;WAQD,yBAAgBE,GAAhB,EAAmC;AACjC,WAAKf,YAAL,GAAoBe,GAApB;AACD;;;WAMD,qBAAYA,GAAZ,EAA+B;AAC7B,WAAKlB,cAAL,CAAoBmB,WAApB,CAAgCD,GAAhC;AACD;;;;gFAnID,kBACEnB,MADF,EAEEqB,IAFF,EAGEnB,IAHF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,uBAK6B,uBAAOmB,IAAP,EAAanB,IAAb,CAL7B;;AAAA;AAKQoB,gBAAAA,YALR;AAAA,kDAMSvB,aAAa,CAACwB,UAAd,CAAyBvB,MAAzB,EAAiCsB,YAAjC,EAA+CpB,IAA/C,CANT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;;kFAaA,kBACEF,MADF,EAEEsB,YAFF,EAGEpB,IAHF;AAAA;AAAA;AAAA;AAAA;AAAA;AAKE,oBAAI,CAACA,IAAL,EAAW;AAETA,kBAAAA,IAAI,GAAG,EAAP;AACD;;AARH;AAAA,uBAU+BsB,qBAAqB,CAACD,UAAtB,CAAiCvB,MAAjC,EAAyCsB,YAAzC,EAAuDpB,IAAvD,CAV/B;;AAAA;AAUQD,gBAAAA,cAVR;AAAA,kDAYS,IAAIF,aAAJ,CAAkBC,MAAlB,EAA0BC,cAA1B,EAA0CC,IAA1C,CAZT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;;;;;IA+HWsB,qB;AAuBX,iCACExB,MADF,EAEEyB,OAFF,EAGEC,OAHF,EAIEC,UAJF,EAKEzB,IALF,EAME;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AACA,SAAKF,MAAL,GAAcA,MAAd;AACA,SAAK4B,KAAL,GAAaH,OAAb;AACA,SAAKjB,KAAL,GAAakB,OAAb;AACA,SAAKG,MAAL,GAAcF,UAAd;AACA,SAAKd,QAAL,GAAgB,CAAhB;AACA,SAAKiB,SAAL,GAAiB,EAAjB;AACA,SAAKC,QAAL,GAAgB7B,IAAI,CAAC6B,QAAL,IAAiBpC,yBAAjC;AACA,SAAKqC,aAAL,GAAqB,mBAAmB9B,IAAnB,GAA0B+B,OAAO,CAAC/B,IAAI,CAAC8B,aAAN,CAAjC,GAAwD,KAA7E;AACD;;;;WAED,sBAAaE,GAAb,EAAyC;AACvC,WAAKL,MAAL,IAAeK,GAAG,CAACC,MAAnB;AACA,aAAO,KAAKP,KAAL,CAAWM,GAAX,CAAP;AACD;;;WAKD,uBAA6B;AAC3B,aAAO,KAAKE,YAAL,CAAkBC,MAAM,CAACC,IAAP,CAAY7C,aAAZ,CAAlB,CAAP;AACD;;;;qFAMD,kBAAoB8C,OAApB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,uBACuBC,cAAc,CAAC,KAAKxC,MAAN,EAAcuC,OAAd,EAAuB;AACxDE,kBAAAA,UAAU,EAAE,KAAKZ,MADuC;AAExDE,kBAAAA,QAAQ,EAAE,KAAKA,QAFyC;AAGxDC,kBAAAA,aAAa,EAAE,KAAKA;AAHoC,iBAAvB,CADrC;;AAAA;AACQU,gBAAAA,MADR;AAOE,qBAAK7B,QAAL,IAAiB0B,OAAO,CAAC1B,QAAzB;AACA,qBAAKiB,SAAL,CAAea,IAAf,CAAoBD,MAAM,CAACE,QAA3B;AARF;AAAA,uBASe,KAAKR,YAAL,CAAkBM,MAAM,CAACG,IAAzB,CATf;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;WAeA,qBAAYvC,YAAZ,EAAiE;AAC/D,UAAI,CAACA,YAAL,EAAmB;AAEjBA,QAAAA,YAAY,GAAG,EAAf;AACD;;AAED,aAAO,KAAK8B,YAAL,CACLU,YAAY,CAAC,KAAK9C,MAAN,EAAc,KAAKa,QAAnB,EAA6B,KAAKiB,SAAlC,EAA6CxB,YAA7C,CADP,CAAP;AAGD;;;WAMD,qBAAYa,GAAZ,EAA+B;AAC7B,WAAKY,QAAL,GAAgBZ,GAAhB;AACD;;;;mFApFD,kBACEnB,MADF,EAEEsB,YAFF,EAGEpB,IAHF;AAAA;AAAA;AAAA;AAAA;AAAA;AAKQuB,gBAAAA,OALR,GAKkBsB,mBAAQC,IAAR,CAAaC,SAAb,EAAwB3B,YAAxB,CALlB;AAMQI,gBAAAA,OANR,GAMkBwB,mBAAQF,IAAR,CAAaC,SAAb,EAAwB3B,YAAxB,CANlB;AAAA,kDAOS,IAAIE,qBAAJ,CAA0BxB,MAA1B,EAAkCyB,OAAlC,EAA2CC,OAA3C,EAAoD,CAApD,EAAuDxB,IAAvD,CAPT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;;;;;IA0FWiD,kB;;;;;AAGX,8BAAYnD,MAAZ,EAAoE;AAAA;;AAAA,QAAjCE,IAAiC,uEAAJ,EAAI;AAAA;AAClE,8BAAM;AAACkD,MAAAA,UAAU,EAAE;AAAb,KAAN;AADkE;;AAGlE,QAAMC,UAAU,GAAI,UAAUC,CAAV,EAAsC;AACxD;AAAA,6EAAO,kBAAgBC,CAAhB;AAAA;AAAA;AAAA;AAAA;AACLD,kBAAAA,CAAC,CAACX,IAAF,CAAOY,CAAP;;AADK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,SAAP;;AAAA;AAAA;AAAA;AAAA;AAGD,KAJkB,6CAAnB;;AAMA,UAAKC,MAAL,GAAc,IAAIzD,aAAJ,CACZC,MADY,EAEZ,IAAIwB,qBAAJ,CAA0BxB,MAA1B,EAAkCqD,UAAlC,4DAA8C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,KAA9C,IAA8D,CAA9D,EAAiEnD,IAAjE,CAFY,EAGZA,IAHY,CAAd;AATkE;AAcnE;;;;WAGD,oBAAWO,GAAX,EAAqBgD,QAArB,EAAuC3C,QAAvC,EAAqF;AACnF,UAAIL,GAAJ,EAAS;AACP,eAAO,KAAK+C,MAAL,CAAYE,SAAZ,CAAsBjD,GAAtB,EAA2BkD,IAA3B,CAAgC7C,QAAhC,CAAP;AACD;;AACDA,MAAAA,QAAQ;AACR,aAAO8C,OAAO,CAACC,OAAR,EAAP;AACD;;;;8EAGD,mBAAa/C,QAAb;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,uBACQ,KAAK0C,MAAL,CAAYhD,KAAZ,CAAkBM,QAAlB,CADR;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,O;;;;;;;;;;EA7ByCgD,iB;;;;AAqC3C,SAASC,YAAT,CACEC,IADF,EAEEP,QAFF,EAGEQ,MAHF,EAIE/D,IAJF,EAKE;AACA,MAAI,EAAEuD,QAAQ,IAAIS,sBAAd,CAAJ,EAAmC;AACjC,UAAM,IAAIxD,KAAJ,6BAA+B+C,QAA/B,EAAN;AACD;;AACD,SAAOS,uBAAeT,QAAf,EAAyBM,YAAzB,CAAsCC,IAAtC,EAA4CC,MAA5C,EAAoD/D,IAApD,CAAP;AACD;;SAKciE,c;;;;;8EAAf,mBACEC,MADF,EAEEC,IAFF;AAAA;AAAA;AAAA;AAAA;AAAA;AASMC,YAAAA,UATN,GASmBjC,MAAM,CAACkC,KAAP,CAAa,CAAb,CATnB;;AAUE,gBAAIH,MAAM,CAACI,SAAP,GAAmB,CAAvB,EAA0B;AACxBF,cAAAA,UAAU,GAAGP,YAAY,CAAClE,kBAAD,EAAqBC,sBAArB,EAA6CuE,IAAI,CAACI,OAAlD,EAA2D;AAClFC,gBAAAA,QAAQ,EAAE,4BAAYN,MAAM,CAACI,SAAnB;AADwE,eAA3D,CAAzB;AAID;;AAEGG,YAAAA,UAjBN,GAiBmBtC,MAAM,CAACkC,KAAP,CAAa,CAAb,CAjBnB;;AAkBE,gBAAIH,MAAM,CAACQ,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,cAAAA,UAAU,GAAGZ,YAAY,CAAClE,kBAAD,EAAqBC,sBAArB,EAA6CuE,IAAI,CAACQ,OAAlD,EAA2D;AAClFH,gBAAAA,QAAQ,EAAE,4BAAYN,MAAM,CAACQ,SAAnB;AADwE,eAA3D,CAAzB;AAID;;AAGKE,YAAAA,SA1BR,GA0BoBf,YAAY,CAACK,MAAM,CAACW,aAAR,EAAwBX,MAAM,CAACX,QAA/B,EAA0CY,IAAI,CAACJ,MAA/C,EAAuD;AACnFe,cAAAA,UAAU,EAAEZ,MAAM,CAACY,UADgE;AAEnFN,cAAAA,QAAQ,EAAEN,MAAM,CAACY;AAFkE,aAAvD,CA1BhC;AA+BQC,YAAAA,OA/BR,GA+BkB5C,MAAM,CAAC6C,MAAP,CAAc,CAACZ,UAAD,EAAaK,UAAb,EAAyBG,SAAzB,CAAd,CA/BlB;AAAA;AAAA,mBAkC8BK,WAAW,CAACC,OAAZ,CAAoBhB,MAAM,CAACiB,WAA3B,EAAyCJ,OAAzC,CAlC9B;;AAAA;AAkCQK,YAAAA,aAlCR;AAqCQC,YAAAA,MArCR,GAqCiB,IAAIC,yBAAJ,CAAe;AAC5BxB,cAAAA,IAAI,EAAEyB,wBAASC,SADa;AAE5BC,cAAAA,gBAAgB,EAAE,IAAIC,6BAAJ,CAAmB;AACnCC,gBAAAA,UAAU,EAAExB,IAAI,CAACyB,KADkB;AAEnCrC,gBAAAA,QAAQ,EAAEsC,wBAAS3B,MAAM,CAACX,QAAhB,CAFyB;AAGnCuC,gBAAAA,yBAAyB,EAAED,wBAASjG,sBAAT,CAHQ;AAInCmG,gBAAAA,yBAAyB,EAAEF,wBAASjG,sBAAT;AAJQ,eAAnB,CAFU;AAQ5BoG,cAAAA,sBAAsB,EAAEjB,OAAO,CAAC9C,MARJ;AAS5BgE,cAAAA,oBAAoB,EAAEb,aAAa,CAACnD;AATR,aAAf,CArCjB;AAkDQiE,YAAAA,SAlDR,GAkDoB,gCAAgBb,MAAhB,CAlDpB;AAmDQc,YAAAA,IAnDR,GAmDehE,MAAM,CAAC6C,MAAP,CAAc,CAACkB,SAAD,EAAYd,aAAZ,CAAd,CAnDf;AAAA,+CAqDS;AAACC,cAAAA,MAAM,EAANA,MAAD;AAASe,cAAAA,UAAU,EAAEF,SAAS,CAACjE,MAA/B;AAAuCkE,cAAAA,IAAI,EAAJA;AAAvC,aArDT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;SA2DeE,gB;;;;;+EAAf,mBACEnC,MADF,EAEEC,IAFF,EAGExD,QAHF;AAAA;AAAA;AAAA;AAAA;AAAA;AAUQiE,YAAAA,SAVR,GAUoBf,YAAY,CAACK,MAAM,CAACW,aAAR,EAAwBX,MAAM,CAACX,QAA/B,EAA0CY,IAAI,CAACJ,MAA/C,EAAuD;AACnFe,cAAAA,UAAU,EAAEZ,MAAM,CAACY,UADgE;AAEnFN,cAAAA,QAAQ,EAAEN,MAAM,CAACY;AAFkE,aAAvD,CAVhC;AAAA;AAAA,mBAgB8BG,WAAW,CAACC,OAAZ,CAAoBhB,MAAM,CAACiB,WAA3B,EAAyCP,SAAzC,CAhB9B;;AAAA;AAgBQQ,YAAAA,aAhBR;AAmBMhB,YAAAA,UAnBN,GAmBmBjC,MAAM,CAACkC,KAAP,CAAa,CAAb,CAnBnB;;AAoBE,gBAAIH,MAAM,CAACI,SAAP,GAAmB,CAAvB,EAA0B;AACxBF,cAAAA,UAAU,GAAGP,YAAY,CAAClE,kBAAD,EAAqBC,sBAArB,EAA6CuE,IAAI,CAACI,OAAlD,EAA2D;AAClFC,gBAAAA,QAAQ,EAAE,4BAAYN,MAAM,CAACI,SAAnB,CADwE;AAElFgC,gBAAAA,eAAe,EAAE;AAFiE,eAA3D,CAAzB;AAID;;AAEG7B,YAAAA,UA3BN,GA2BmBtC,MAAM,CAACkC,KAAP,CAAa,CAAb,CA3BnB;;AA4BE,gBAAIH,MAAM,CAACQ,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,cAAAA,UAAU,GAAGZ,YAAY,CAAClE,kBAAD,EAAqBC,sBAArB,EAA6CuE,IAAI,CAACQ,OAAlD,EAA2D;AAClFH,gBAAAA,QAAQ,EAAE,4BAAYN,MAAM,CAACQ,SAAnB,CADwE;AAElF4B,gBAAAA,eAAe,EAAE;AAFiE,eAA3D,CAAzB;AAID;;AAGKjB,YAAAA,MApCR,GAoCiB,IAAIC,yBAAJ,CAAe;AAC5BxB,cAAAA,IAAI,EAAEyB,wBAASgB,YADa;AAE5BC,cAAAA,mBAAmB,EAAE,IAAIC,+BAAJ,CAAqB;AACxCd,gBAAAA,UAAU,EAAExB,IAAI,CAACyB,KADuB;AAExCc,gBAAAA,SAAS,EAAEvC,IAAI,CAACyB,KAAL,GAAazB,IAAI,CAACJ,MAAL,CAAY9B,MAFI;AAGxC0E,gBAAAA,QAAQ,EAAEhG,QAH8B;AAIxC4C,gBAAAA,QAAQ,EAAEsC,wBAAS3B,MAAM,CAACX,QAAhB,CAJ8B;AAKxCqD,gBAAAA,6BAA6B,EAAEnC,UAAU,CAACxC,MALF;AAMxC4E,gBAAAA,6BAA6B,EAAEzC,UAAU,CAACnC,MANF;AAOxC6E,gBAAAA,aAAa,EAAE5C,MAAM,CAACiB,WAAP,KAAuB;AAPE,eAArB,CAFO;AAW5Ba,cAAAA,sBAAsB,EAAE5B,UAAU,CAACnC,MAAX,GAAoBwC,UAAU,CAACxC,MAA/B,GAAwC2C,SAAS,CAAC3C,MAX9C;AAY5BgE,cAAAA,oBAAoB,EAAE7B,UAAU,CAACnC,MAAX,GAAoBwC,UAAU,CAACxC,MAA/B,GAAwCmD,aAAa,CAACnD;AAZhD,aAAf,CApCjB;AAoDQiE,YAAAA,SApDR,GAoDoB,gCAAgBb,MAAhB,CApDpB;AAqDQc,YAAAA,IArDR,GAqDehE,MAAM,CAAC6C,MAAP,CAAc,CAACkB,SAAD,EAAY9B,UAAZ,EAAwBK,UAAxB,EAAoCW,aAApC,CAAd,CArDf;AAAA,+CAsDS;AAACC,cAAAA,MAAM,EAANA,MAAD;AAASe,cAAAA,UAAU,EAAEF,SAAS,CAACjE,MAA/B;AAAuCkE,cAAAA,IAAI,EAAJA;AAAvC,aAtDT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;SA4DeY,iB;;;;;iFAAf,mBACE7C,MADF,EAEE8C,MAFF,EAGErF,MAHF,EAIE3B,IAJF;AAAA;AAAA;AAAA;AAAA;AAAA;AAUQmE,YAAAA,IAVR,GAUe6C,MAAM,CAACC,UAAP,CAAkB/C,MAAM,CAAC/C,IAAP,CAAY+F,IAAZ,EAAlB,CAVf;AAWQ3E,YAAAA,UAXR,GAWqB,CAACvC,IAAI,CAACuC,UAAL,IAAmB,CAApB,IAAyBZ,MAX9C;AAgBMwF,YAAAA,uBAhBN,GAgBgC,CAhBhC;AAkBMC,YAAAA,qBAlBN,GAkB8B,CAlB9B;;AAAA,iBAoBmBpH,IAAI,CAAC8B,aApBxB;AAAA;AAAA;AAAA;;AAAA;AAAA,mBAqBcuE,gBAAgB,CAACnC,MAAD,EAASC,IAAT,EAAe6C,MAAM,CAACrG,QAAtB,CArB9B;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA,mBAsBcsD,cAAc,CAACC,MAAD,EAASC,IAAT,CAtB5B;;AAAA;AAAA;;AAAA;AAoBUkD,YAAAA,MApBV;AAwBIC,YAAAA,OAAO,GAAGD,MAAM,CAAClB,IAAjB;AACAgB,YAAAA,uBAAuB,IAAIE,MAAM,CAAChC,MAAP,CAAcW,sBAAd,GAAuCqB,MAAM,CAACjB,UAAzE;AACAgB,YAAAA,qBAAqB,IAAIC,MAAM,CAAChC,MAAP,CAAcY,oBAAd,GAAqCoB,MAAM,CAACjB,UAArE;AAOI1D,YAAAA,QAjCR,GAiCmB,IAAI6E,6BAAJ,CAAmB;AAClCC,cAAAA,cAAc,EAAEtD,MAAM,CAAC/C,IADW;AAElCwE,cAAAA,UAAU,EAAExB,IAAI,CAACyB,KAFiB;AAGlC6B,cAAAA,gBAAgB,EAAElF,UAHgB;AAIlCmF,cAAAA,SAAS,EAAE,EAJuB;AAKlCP,cAAAA,uBAAuB,EAAvBA,uBALkC;AAMlCC,cAAAA,qBAAqB,EAArBA,qBANkC;AAOlCtD,cAAAA,IAAI,EAAE6D,oBAAKzD,MAAM,CAACW,aAAZ,CAP4B;AAQlC+C,cAAAA,KAAK,EAAEC,gCAAiB3D,MAAM,CAACiB,WAAxB;AAR2B,aAAnB,CAjCnB;AA6CEzC,YAAAA,QAAQ,CAACgF,SAAT,CAAmBjF,IAAnB,CAAwBoD,wBAASjG,sBAAT,CAAxB;AACA8C,YAAAA,QAAQ,CAACgF,SAAT,CAAmBjF,IAAnB,CAAwBoD,wBAAS3B,MAAM,CAACX,QAAhB,CAAxB;AAGMuE,YAAAA,cAjDR,GAiDyBvF,UAAU,GAAG+E,OAAO,CAACrF,MAjD9C;AAkDQU,YAAAA,IAlDR,GAkDeR,MAAM,CAAC6C,MAAP,CAAc,CAACsC,OAAD,EAAU,gCAAgB5E,QAAhB,CAAV,CAAd,CAlDf;AAAA,+CAmDS;AAACC,cAAAA,IAAI,EAAJA,IAAD;AAAOD,cAAAA,QAAQ,EAARA,QAAP;AAAiBoF,cAAAA,cAAc,EAAdA;AAAjB,aAnDT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;SAyDexF,c;;;;;8EAAf,mBACExC,MADF,EAEEqE,IAFF,EAGEnE,IAHF;AAAA;;AAAA;AAAA;AAAA;AAAA;AAQQ0C,YAAAA,QARR,GAQmB,IAAIqF,uBAAJ,CAAa;AAC5BpB,cAAAA,QAAQ,EAAExC,IAAI,CAACxD,QADa;AAE5BqH,cAAAA,OAAO,EAAE,EAFmB;AAG5BC,cAAAA,eAAe,EAAE;AAHW,aAAb,CARnB;AAcMtF,YAAAA,IAdN,GAcaR,MAAM,CAACkC,KAAP,CAAa,CAAb,CAdb;AAAA,oDAesBvE,MAAM,CAACoI,SAf7B;AAAA;;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;;AAeaC,YAAAA,KAfb;;AAAA,iBAgBQA,KAAK,CAACC,QAhBd;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;AAAA,mBAoB6BrB,iBAAiB,CAACoB,KAAD,EAAQhE,IAAR,EAAcxB,IAAI,CAACV,MAAnB,EAA2BjC,IAA3B,CApB9C;;AAAA;AAoBUqI,YAAAA,UApBV;AAsBUC,YAAAA,MAtBV,GAsBmB,IAAIC,0BAAJ,CAAgB;AAC7BC,cAAAA,WAAW,EAAEH,UAAU,CAACP,cADK;AAE7BW,cAAAA,SAAS,EAAEJ,UAAU,CAAC3F;AAFO,aAAhB,CAtBnB;AA2BIA,YAAAA,QAAQ,CAACsF,OAAT,CAAiBvF,IAAjB,CAAsB6F,MAAtB;AACA5F,YAAAA,QAAQ,CAACuF,eAAT,GAA2B,IAAIS,gBAAJ,CAAUC,MAAM,CAACjG,QAAQ,CAACuF,eAAV,CAAN,GAAmCI,UAAU,CAAC1F,IAAX,CAAgBV,MAA7D,CAA3B;AAEAU,YAAAA,IAAI,GAAGR,MAAM,CAAC6C,MAAP,CAAc,CAACrC,IAAD,EAAO0F,UAAU,CAAC1F,IAAlB,CAAd,CAAP;;AA9BJ;AAAA;AAAA;;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;;AAAA;;AAAA;;AAAA;AAAA,+CAiCS;AAACA,cAAAA,IAAI,EAAJA,IAAD;AAAOD,cAAAA,QAAQ,EAARA;AAAP,aAjCT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;AAuCA,SAASE,YAAT,CACE9C,MADF,EAEEa,QAFF,EAGEiB,SAHF,EAIExB,YAJF,EAKU;AACR,MAAMsC,QAAQ,GAAG,IAAIkG,2BAAJ,CAAiB;AAChCC,IAAAA,OAAO,EAAErJ,eADuB;AAEhCsJ,IAAAA,UAAU,EAAE,UAFoB;AAGhCnC,IAAAA,QAAQ,EAAEhG,QAHsB;AAIhCoI,IAAAA,UAAU,EAAEnH,SAJoB;AAKhC9B,IAAAA,MAAM,EAAE,EALwB;AAMhCkJ,IAAAA,kBAAkB,EAAE;AANY,GAAjB,CAAjB;;AASA,OAAK,IAAMlI,GAAX,IAAkBV,YAAlB,EAAgC;AAAA;;AAC9B,QAAM6I,EAAE,GAAG,IAAIC,uBAAJ,CAAa;AACtBpI,MAAAA,GAAG,EAAHA,GADsB;AAEtBC,MAAAA,KAAK,EAAEX,YAAY,CAACU,GAAD;AAFG,KAAb,CAAX;AAIA,6BAAA4B,QAAQ,CAACsG,kBAAT,0GAA6BvG,IAA7B,8GAAoCwG,EAApC;AACD;;AAED;AACE,QAAME,UAAU,GAAG,IAAIC,4BAAJ,CAAkB;AACnCC,MAAAA,IAAI,EAAE,MAD6B;AAEnCC,MAAAA,YAAY,EAAEC,MAAM,CAACC,IAAP,CAAY1J,MAAM,CAAC2J,MAAnB,EAA2BxH;AAFN,KAAlB,CAAnB;AAIAS,IAAAA,QAAQ,CAAC5C,MAAT,CAAgB2C,IAAhB,CAAqB0G,UAArB;AACD;;AAxBO,6CA0BYrJ,MAAM,CAACoI,SA1BnB;AAAA;;AAAA;AA0BR,wDAAsC;AAAA,UAA3BC,KAA2B;AACpC,UAAMuB,IAAI,GAAGC,mCAAoBxB,KAAK,CAACyB,cAA1B,CAAb;AACA,UAAMC,UAAU,GAAG,IAAIT,4BAAJ,CAAkB;AACnCC,QAAAA,IAAI,EAAElB,KAAK,CAACkB,IADuB;AAEnCS,QAAAA,eAAe,EAAEJ;AAFkB,OAAlB,CAAnB;;AAKA,UAAIvB,KAAK,CAACC,QAAV,EAAoB;AAClByB,QAAAA,UAAU,CAACP,YAAX,GAA0BnB,KAAK,CAAC4B,UAAhC;AACD,OAFD,MAEO;AACLF,QAAAA,UAAU,CAAC/F,IAAX,GAAkB6D,oBAAKQ,KAAK,CAACtD,aAAX,CAAlB;AACD;;AAED,UAAIsD,KAAK,CAAC6B,YAAV,EAAwB;AACtBH,QAAAA,UAAU,CAACI,cAAX,GAA4BC,6BAAc/B,KAAK,CAAC6B,YAApB,CAA5B;AACD;;AAEDH,MAAAA,UAAU,CAACM,WAAX,GAAyBhC,KAAK,CAACrD,UAA/B;AAEApC,MAAAA,QAAQ,CAAC5C,MAAT,CAAgB2C,IAAhB,CAAqBoH,UAArB;AACD;AA9CO;AAAA;AAAA;AAAA;AAAA;;AAgDR,MAAMO,eAAe,GAAG,gCAAgB1H,QAAhB,CAAxB;AACA,MAAM2H,aAAa,GAAGlI,MAAM,CAACkC,KAAP,CAAa+F,eAAe,CAACnI,MAAhB,GAAyB,CAAtC,CAAtB;AACAmI,EAAAA,eAAe,CAACE,IAAhB,CAAqBD,aAArB;AACAA,EAAAA,aAAa,CAACE,aAAd,CAA4BH,eAAe,CAACnI,MAA5C,EAAoDmI,eAAe,CAACnI,MAApE;AACAoI,EAAAA,aAAa,CAAC3I,KAAd,CAAoBnC,aAApB,EAAmC6K,eAAe,CAACnI,MAAhB,GAAyB,CAA5D;AACA,SAAOoI,aAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/* eslint-disable camelcase */\nimport {Transform, Writable} from 'stream';\nimport {ParquetCodecOptions, PARQUET_CODECS} from '../codecs';\nimport * as Compression from '../compression';\nimport {\n ParquetBuffer,\n ParquetCodec,\n ParquetData,\n ParquetField,\n PrimitiveType\n} from '../schema/declare';\nimport {ParquetSchema} from '../schema/schema';\nimport * as Shred from '../schema/shred';\nimport {\n ColumnChunk,\n ColumnMetaData,\n CompressionCodec,\n ConvertedType,\n DataPageHeader,\n DataPageHeaderV2,\n Encoding,\n FieldRepetitionType,\n FileMetaData,\n KeyValue,\n PageHeader,\n PageType,\n RowGroup,\n SchemaElement,\n Type\n} from '../parquet-thrift';\nimport {osopen, oswrite, osclose} from '../utils/file-utils';\nimport {getBitWidth, serializeThrift} from '../utils/read-utils';\nimport Int64 from 'node-int64';\n\n/**\n * Parquet File Magic String\n */\nconst PARQUET_MAGIC = 'PAR1';\n\n/**\n * Parquet File Format Version\n */\nconst PARQUET_VERSION = 1;\n\n/**\n * Default Page and Row Group sizes\n */\nconst PARQUET_DEFAULT_PAGE_SIZE = 8192;\nconst PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;\n\n/**\n * Repetition and Definition Level Encoding\n */\nconst PARQUET_RDLVL_TYPE = 'INT32';\nconst PARQUET_RDLVL_ENCODING = 'RLE';\n\nexport interface ParquetWriterOptions {\n baseOffset?: number;\n rowGroupSize?: number;\n pageSize?: number;\n useDataPageV2?: boolean;\n\n // Write Stream Options\n flags?: string;\n encoding?: string;\n fd?: number;\n mode?: number;\n autoClose?: boolean;\n start?: number;\n}\n\n/**\n * Write a parquet file to an output stream. The ParquetWriter will perform\n * buffering/batching for performance, so close() must be called after all rows\n * are written.\n */\n// eslint-disable-next-line @typescript-eslint/no-unused-vars\nexport class ParquetWriter<T> {\n /**\n * Convenience method to create a new buffered parquet writer that writes to\n * the specified file\n */\n static async openFile<T>(\n schema: ParquetSchema,\n path: string,\n opts?: ParquetWriterOptions\n ): Promise<ParquetWriter<T>> {\n const outputStream = await osopen(path, opts);\n return ParquetWriter.openStream(schema, outputStream, opts);\n }\n\n /**\n * Convenience method to create a new buffered parquet writer that writes to\n * the specified stream\n */\n static async openStream<T>(\n schema: ParquetSchema,\n outputStream: Writable,\n opts?: ParquetWriterOptions\n ): Promise<ParquetWriter<T>> {\n if (!opts) {\n // tslint:disable-next-line:no-parameter-reassignment\n opts = {};\n }\n\n const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);\n\n return new ParquetWriter(schema, envelopeWriter, opts);\n }\n\n public schema: ParquetSchema;\n public envelopeWriter: ParquetEnvelopeWriter;\n public rowBuffer: ParquetBuffer;\n public rowGroupSize: number;\n public closed: boolean;\n public userMetadata: Record<string, string>;\n\n /**\n * Create a new buffered parquet writer for a given envelope writer\n */\n constructor(\n schema: ParquetSchema,\n envelopeWriter: ParquetEnvelopeWriter,\n opts: ParquetWriterOptions\n ) {\n this.schema = schema;\n this.envelopeWriter = envelopeWriter;\n // @ts-ignore Row buffer typings...\n this.rowBuffer = {};\n this.rowGroupSize = opts.rowGroupSize || PARQUET_DEFAULT_ROW_GROUP_SIZE;\n this.closed = false;\n this.userMetadata = {};\n\n // eslint-disable-next-line @typescript-eslint/no-floating-promises\n this.writeHeader();\n }\n\n async writeHeader(): Promise<void> {\n // TODO - better not mess with promises in the constructor\n try {\n await this.envelopeWriter.writeHeader();\n } catch (err) {\n await this.envelopeWriter.close();\n throw err;\n }\n }\n\n /**\n * Append a single row to the parquet file. Rows are buffered in memory until\n * rowGroupSize rows are in the buffer or close() is called\n */\n async appendRow<T>(row: T): Promise<void> {\n if (this.closed) {\n throw new Error('writer was closed');\n }\n Shred.shredRecord(this.schema, row, this.rowBuffer);\n if (this.rowBuffer.rowCount >= this.rowGroupSize) {\n // @ts-ignore\n this.rowBuffer = {};\n }\n }\n\n /**\n * Finish writing the parquet file and commit the footer to disk. This method\n * MUST be called after you are finished adding rows. You must not call this\n * method twice on the same object or add any rows after the close() method has\n * been called\n */\n async close(callback?: () => void): Promise<void> {\n if (this.closed) {\n throw new Error('writer was closed');\n }\n\n this.closed = true;\n\n if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) {\n // @ts-ignore\n this.rowBuffer = {};\n }\n\n await this.envelopeWriter.writeFooter(this.userMetadata);\n await this.envelopeWriter.close();\n // this.envelopeWriter = null;\n\n if (callback) {\n callback();\n }\n }\n\n /**\n * Add key<>value metadata to the file\n */\n setMetadata(key: string, value: string): void {\n // TODO: value to be any, obj -> JSON\n this.userMetadata[String(key)] = String(value);\n }\n\n /**\n * Set the parquet row group size. This values controls the maximum number\n * of rows that are buffered in memory at any given time as well as the number\n * of rows that are co-located on disk. A higher value is generally better for\n * read-time I/O performance at the tradeoff of write-time memory usage.\n */\n setRowGroupSize(cnt: number): void {\n this.rowGroupSize = cnt;\n }\n\n /**\n * Set the parquet data page size. The data page size controls the maximum\n * number of column values that are written to disk as a consecutive array\n */\n setPageSize(cnt: number): void {\n this.envelopeWriter.setPageSize(cnt);\n }\n}\n\n/**\n * Create a parquet file from a schema and a number of row groups. This class\n * performs direct, unbuffered writes to the underlying output stream and is\n * intendend for advanced and internal users; the writeXXX methods must be\n * called in the correct order to produce a valid file.\n */\nexport class ParquetEnvelopeWriter {\n /**\n * Create a new parquet envelope writer that writes to the specified stream\n */\n static async openStream(\n schema: ParquetSchema,\n outputStream: Writable,\n opts: ParquetWriterOptions\n ): Promise<ParquetEnvelopeWriter> {\n const writeFn = oswrite.bind(undefined, outputStream);\n const closeFn = osclose.bind(undefined, outputStream);\n return new ParquetEnvelopeWriter(schema, writeFn, closeFn, 0, opts);\n }\n\n public schema: ParquetSchema;\n public write: (buf: Buffer) => Promise<void>;\n public close: () => Promise<void>;\n public offset: number;\n public rowCount: number;\n public rowGroups: RowGroup[];\n public pageSize: number;\n public useDataPageV2: boolean;\n\n constructor(\n schema: ParquetSchema,\n writeFn: (buf: Buffer) => Promise<void>,\n closeFn: () => Promise<void>,\n fileOffset: number,\n opts: ParquetWriterOptions\n ) {\n this.schema = schema;\n this.write = writeFn;\n this.close = closeFn;\n this.offset = fileOffset;\n this.rowCount = 0;\n this.rowGroups = [];\n this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE;\n this.useDataPageV2 = 'useDataPageV2' in opts ? Boolean(opts.useDataPageV2) : false;\n }\n\n writeSection(buf: Buffer): Promise<void> {\n this.offset += buf.length;\n return this.write(buf);\n }\n\n /**\n * Encode the parquet file header\n */\n writeHeader(): Promise<void> {\n return this.writeSection(Buffer.from(PARQUET_MAGIC));\n }\n\n /**\n * Encode a parquet row group. The records object should be created using the\n * shredRecord method\n */\n async writeRowGroup(records: ParquetBuffer): Promise<void> {\n const rgroup = await encodeRowGroup(this.schema, records, {\n baseOffset: this.offset,\n pageSize: this.pageSize,\n useDataPageV2: this.useDataPageV2\n });\n\n this.rowCount += records.rowCount;\n this.rowGroups.push(rgroup.metadata);\n return await this.writeSection(rgroup.body);\n }\n\n /**\n * Write the parquet file footer\n */\n writeFooter(userMetadata: Record<string, string>): Promise<void> {\n if (!userMetadata) {\n // tslint:disable-next-line:no-parameter-reassignment\n userMetadata = {};\n }\n\n return this.writeSection(\n encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata)\n );\n }\n\n /**\n * Set the parquet data page size. The data page size controls the maximum\n * number of column values that are written to disk as a consecutive array\n */\n setPageSize(cnt: number): void {\n this.pageSize = cnt;\n }\n}\n\n/**\n * Create a parquet transform stream\n */\nexport class ParquetTransformer<T> extends Transform {\n public writer: ParquetWriter<T>;\n\n constructor(schema: ParquetSchema, opts: ParquetWriterOptions = {}) {\n super({objectMode: true});\n\n const writeProxy = (function (t: ParquetTransformer<any>) {\n return async function (b: any): Promise<void> {\n t.push(b);\n };\n })(this);\n\n this.writer = new ParquetWriter(\n schema,\n new ParquetEnvelopeWriter(schema, writeProxy, async () => {}, 0, opts),\n opts\n );\n }\n\n // tslint:disable-next-line:function-name\n _transform(row: any, encoding: string, callback: (val?: any) => void): Promise<void> {\n if (row) {\n return this.writer.appendRow(row).then(callback);\n }\n callback();\n return Promise.resolve();\n }\n\n // tslint:disable-next-line:function-name\n async _flush(callback: (val?: any) => void) {\n await this.writer.close(callback);\n }\n}\n\n/**\n * Encode a consecutive array of data using one of the parquet encodings\n */\nfunction encodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n values: any[],\n opts: ParquetCodecOptions\n) {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].encodeValues(type, values, opts);\n}\n\n/**\n * Encode a parquet data page\n */\nasync function encodeDataPage(\n column: ParquetField,\n data: ParquetData\n): Promise<{\n header: PageHeader;\n headerSize: number;\n page: Buffer;\n}> {\n /* encode repetition and definition levels */\n let rLevelsBuf = Buffer.alloc(0);\n if (column.rLevelMax > 0) {\n rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {\n bitWidth: getBitWidth(column.rLevelMax)\n // disableEnvelope: false\n });\n }\n\n let dLevelsBuf = Buffer.alloc(0);\n if (column.dLevelMax > 0) {\n dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {\n bitWidth: getBitWidth(column.dLevelMax)\n // disableEnvelope: false\n });\n }\n\n /* encode values */\n const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, data.values, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n const dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);\n\n // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;\n const compressedBuf = await Compression.deflate(column.compression!, dataBuf);\n\n /* build page header */\n const header = new PageHeader({\n type: PageType.DATA_PAGE,\n data_page_header: new DataPageHeader({\n num_values: data.count,\n encoding: Encoding[column.encoding!] as any,\n definition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING], // [PARQUET_RDLVL_ENCODING],\n repetition_level_encoding: Encoding[PARQUET_RDLVL_ENCODING] // [PARQUET_RDLVL_ENCODING]\n }),\n uncompressed_page_size: dataBuf.length,\n compressed_page_size: compressedBuf.length\n });\n\n /* concat page header, repetition and definition levels and values */\n const headerBuf = serializeThrift(header);\n const page = Buffer.concat([headerBuf, compressedBuf]);\n\n return {header, headerSize: headerBuf.length, page};\n}\n\n/**\n * Encode a parquet data page (v2)\n */\nasync function encodeDataPageV2(\n column: ParquetField,\n data: ParquetData,\n rowCount: number\n): Promise<{\n header: PageHeader;\n headerSize: number;\n page: Buffer;\n}> {\n /* encode values */\n const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, data.values, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n // compression = column.compression === 'UNCOMPRESSED' ? (compression || 'UNCOMPRESSED') : column.compression;\n const compressedBuf = await Compression.deflate(column.compression!, valuesBuf);\n\n /* encode repetition and definition levels */\n let rLevelsBuf = Buffer.alloc(0);\n if (column.rLevelMax > 0) {\n rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {\n bitWidth: getBitWidth(column.rLevelMax),\n disableEnvelope: true\n });\n }\n\n let dLevelsBuf = Buffer.alloc(0);\n if (column.dLevelMax > 0) {\n dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.dlevels, {\n bitWidth: getBitWidth(column.dLevelMax),\n disableEnvelope: true\n });\n }\n\n /* build page header */\n const header = new PageHeader({\n type: PageType.DATA_PAGE_V2,\n data_page_header_v2: new DataPageHeaderV2({\n num_values: data.count,\n num_nulls: data.count - data.values.length,\n num_rows: rowCount,\n encoding: Encoding[column.encoding!] as any,\n definition_levels_byte_length: dLevelsBuf.length,\n repetition_levels_byte_length: rLevelsBuf.length,\n is_compressed: column.compression !== 'UNCOMPRESSED'\n }),\n uncompressed_page_size: rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length,\n compressed_page_size: rLevelsBuf.length + dLevelsBuf.length + compressedBuf.length\n });\n\n /* concat page header, repetition and definition levels and values */\n const headerBuf = serializeThrift(header);\n const page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);\n return {header, headerSize: headerBuf.length, page};\n}\n\n/**\n * Encode an array of values into a parquet column chunk\n */\nasync function encodeColumnChunk(\n column: ParquetField,\n buffer: ParquetBuffer,\n offset: number,\n opts: ParquetWriterOptions\n): Promise<{\n body: Buffer;\n metadata: ColumnMetaData;\n metadataOffset: number;\n}> {\n const data = buffer.columnData[column.path.join()];\n const baseOffset = (opts.baseOffset || 0) + offset;\n /* encode data page(s) */\n // const pages: Buffer[] = [];\n let pageBuf: Buffer;\n // tslint:disable-next-line:variable-name\n let total_uncompressed_size = 0;\n // tslint:disable-next-line:variable-name\n let total_compressed_size = 0;\n {\n const result = opts.useDataPageV2\n ? await encodeDataPageV2(column, data, buffer.rowCount)\n : await encodeDataPage(column, data);\n // pages.push(result.page);\n pageBuf = result.page;\n total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;\n total_compressed_size += result.header.compressed_page_size + result.headerSize;\n }\n\n // const pagesBuf = Buffer.concat(pages);\n // const compression = column.compression === 'UNCOMPRESSED' ? (opts.compression || 'UNCOMPRESSED') : column.compression;\n\n /* prepare metadata header */\n const metadata = new ColumnMetaData({\n path_in_schema: column.path,\n num_values: data.count,\n data_page_offset: baseOffset,\n encodings: [],\n total_uncompressed_size, // : pagesBuf.length,\n total_compressed_size,\n type: Type[column.primitiveType!],\n codec: CompressionCodec[column.compression!]\n });\n\n /* list encodings */\n metadata.encodings.push(Encoding[PARQUET_RDLVL_ENCODING]);\n metadata.encodings.push(Encoding[column.encoding!]);\n\n /* concat metadata header and data pages */\n const metadataOffset = baseOffset + pageBuf.length;\n const body = Buffer.concat([pageBuf, serializeThrift(metadata)]);\n return {body, metadata, metadataOffset};\n}\n\n/**\n * Encode a list of column values into a parquet row group\n */\nasync function encodeRowGroup(\n schema: ParquetSchema,\n data: ParquetBuffer,\n opts: ParquetWriterOptions\n): Promise<{\n body: Buffer;\n metadata: RowGroup;\n}> {\n const metadata = new RowGroup({\n num_rows: data.rowCount,\n columns: [],\n total_byte_size: 0\n });\n\n let body = Buffer.alloc(0);\n for (const field of schema.fieldList) {\n if (field.isNested) {\n continue; // eslint-disable-line no-continue\n }\n\n const cchunkData = await encodeColumnChunk(field, data, body.length, opts);\n\n const cchunk = new ColumnChunk({\n file_offset: cchunkData.metadataOffset,\n meta_data: cchunkData.metadata\n });\n\n metadata.columns.push(cchunk);\n metadata.total_byte_size = new Int64(Number(metadata.total_byte_size) + cchunkData.body.length);\n\n body = Buffer.concat([body, cchunkData.body]);\n }\n\n return {body, metadata};\n}\n\n/**\n * Encode a parquet file metadata footer\n */\nfunction encodeFooter(\n schema: ParquetSchema,\n rowCount: number,\n rowGroups: RowGroup[],\n userMetadata: Record<string, string>\n): Buffer {\n const metadata = new FileMetaData({\n version: PARQUET_VERSION,\n created_by: 'parquets',\n num_rows: rowCount,\n row_groups: rowGroups,\n schema: [],\n key_value_metadata: []\n });\n\n for (const key in userMetadata) {\n const kv = new KeyValue({\n key,\n value: userMetadata[key]\n });\n metadata.key_value_metadata?.push?.(kv);\n }\n\n {\n const schemaRoot = new SchemaElement({\n name: 'root',\n num_children: Object.keys(schema.fields).length\n });\n metadata.schema.push(schemaRoot);\n }\n\n for (const field of schema.fieldList) {\n const relt = FieldRepetitionType[field.repetitionType];\n const schemaElem = new SchemaElement({\n name: field.name,\n repetition_type: relt as any\n });\n\n if (field.isNested) {\n schemaElem.num_children = field.fieldCount;\n } else {\n schemaElem.type = Type[field.primitiveType!] as Type;\n }\n\n if (field.originalType) {\n schemaElem.converted_type = ConvertedType[field.originalType] as ConvertedType;\n }\n\n schemaElem.type_length = field.typeLength;\n\n metadata.schema.push(schemaElem);\n }\n\n const metadataEncoded = serializeThrift(metadata);\n const footerEncoded = Buffer.alloc(metadataEncoded.length + 8);\n metadataEncoded.copy(footerEncoded);\n footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length);\n footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);\n return footerEncoded;\n}\n"],"file":"writer.js"}
|
|
@@ -15,5 +15,6 @@ exports.CompressionCodec = CompressionCodec;
|
|
|
15
15
|
CompressionCodec[CompressionCodec["BROTLI"] = 4] = "BROTLI";
|
|
16
16
|
CompressionCodec[CompressionCodec["LZ4"] = 5] = "LZ4";
|
|
17
17
|
CompressionCodec[CompressionCodec["ZSTD"] = 6] = "ZSTD";
|
|
18
|
+
CompressionCodec[CompressionCodec["LZ4_RAW"] = 7] = "LZ4_RAW";
|
|
18
19
|
})(CompressionCodec || (exports.CompressionCodec = CompressionCodec = {}));
|
|
19
20
|
//# sourceMappingURL=CompressionCodec.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../src/parquetjs/parquet-thrift/CompressionCodec.ts"],"names":["CompressionCodec"],"mappings":";;;;;;IAMYA,gB;;;WAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;GAAAA,gB,gCAAAA,gB","sourcesContent":["/* tslint:disable */\n/* eslint-disable */\n/*\n * Autogenerated by @creditkarma/thrift-typescript v3.7.2\n * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING\n */\nexport enum CompressionCodec {\n UNCOMPRESSED = 0,\n SNAPPY = 1,\n GZIP = 2,\n LZO = 3,\n BROTLI = 4,\n LZ4 = 5,\n ZSTD = 6\n}\n"],"file":"CompressionCodec.js"}
|
|
1
|
+
{"version":3,"sources":["../../../../src/parquetjs/parquet-thrift/CompressionCodec.ts"],"names":["CompressionCodec"],"mappings":";;;;;;IAMYA,gB;;;WAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;AAAAA,EAAAA,gB,CAAAA,gB;GAAAA,gB,gCAAAA,gB","sourcesContent":["/* tslint:disable */\n/* eslint-disable */\n/*\n * Autogenerated by @creditkarma/thrift-typescript v3.7.2\n * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING\n */\nexport enum CompressionCodec {\n UNCOMPRESSED = 0,\n SNAPPY = 1,\n GZIP = 2,\n LZO = 3,\n BROTLI = 4,\n LZ4 = 5,\n ZSTD = 6,\n LZ4_RAW = 7 // Added in 2.9\n}\n"],"file":"CompressionCodec.js"}
|
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
|
|
4
|
+
|
|
5
|
+
Object.defineProperty(exports, "__esModule", {
|
|
6
|
+
value: true
|
|
7
|
+
});
|
|
8
|
+
exports.decodeDataPages = decodeDataPages;
|
|
9
|
+
exports.decodePage = decodePage;
|
|
10
|
+
exports.decodeSchema = decodeSchema;
|
|
11
|
+
|
|
12
|
+
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
|
|
13
|
+
|
|
14
|
+
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
|
|
15
|
+
|
|
16
|
+
var _codecs = require("../codecs");
|
|
17
|
+
|
|
18
|
+
var _parquetThrift = require("../parquet-thrift");
|
|
19
|
+
|
|
20
|
+
var _compression = require("../compression");
|
|
21
|
+
|
|
22
|
+
var _constants = require("../../constants");
|
|
23
|
+
|
|
24
|
+
var _readUtils = require("../utils/read-utils");
|
|
25
|
+
|
|
26
|
+
function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
|
|
27
|
+
|
|
28
|
+
function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
|
|
29
|
+
|
|
30
|
+
function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
|
|
31
|
+
|
|
32
|
+
function decodeDataPages(_x, _x2) {
|
|
33
|
+
return _decodeDataPages.apply(this, arguments);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function _decodeDataPages() {
|
|
37
|
+
_decodeDataPages = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee(buffer, options) {
|
|
38
|
+
var cursor, data, dictionary, page, index, value;
|
|
39
|
+
return _regenerator.default.wrap(function _callee$(_context) {
|
|
40
|
+
while (1) {
|
|
41
|
+
switch (_context.prev = _context.next) {
|
|
42
|
+
case 0:
|
|
43
|
+
cursor = {
|
|
44
|
+
buffer: buffer,
|
|
45
|
+
offset: 0,
|
|
46
|
+
size: buffer.length
|
|
47
|
+
};
|
|
48
|
+
data = {
|
|
49
|
+
rlevels: [],
|
|
50
|
+
dlevels: [],
|
|
51
|
+
values: [],
|
|
52
|
+
pageHeaders: [],
|
|
53
|
+
count: 0
|
|
54
|
+
};
|
|
55
|
+
dictionary = options.dictionary || [];
|
|
56
|
+
|
|
57
|
+
case 3:
|
|
58
|
+
if (!(cursor.offset < cursor.size && (!options.numValues || data.dlevels.length < Number(options.numValues)))) {
|
|
59
|
+
_context.next = 16;
|
|
60
|
+
break;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
_context.next = 6;
|
|
64
|
+
return decodePage(cursor, options);
|
|
65
|
+
|
|
66
|
+
case 6:
|
|
67
|
+
page = _context.sent;
|
|
68
|
+
|
|
69
|
+
if (!page.dictionary) {
|
|
70
|
+
_context.next = 10;
|
|
71
|
+
break;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
dictionary = page.dictionary;
|
|
75
|
+
return _context.abrupt("continue", 3);
|
|
76
|
+
|
|
77
|
+
case 10:
|
|
78
|
+
if (dictionary.length) {
|
|
79
|
+
page.values = page.values.map(function (value) {
|
|
80
|
+
return dictionary[value];
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
for (index = 0; index < page.rlevels.length; index++) {
|
|
85
|
+
data.rlevels.push(page.rlevels[index]);
|
|
86
|
+
data.dlevels.push(page.dlevels[index]);
|
|
87
|
+
value = page.values[index];
|
|
88
|
+
|
|
89
|
+
if (value !== undefined) {
|
|
90
|
+
data.values.push(value);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
data.count += page.count;
|
|
95
|
+
data.pageHeaders.push(page.pageHeader);
|
|
96
|
+
_context.next = 3;
|
|
97
|
+
break;
|
|
98
|
+
|
|
99
|
+
case 16:
|
|
100
|
+
return _context.abrupt("return", data);
|
|
101
|
+
|
|
102
|
+
case 17:
|
|
103
|
+
case "end":
|
|
104
|
+
return _context.stop();
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}, _callee);
|
|
108
|
+
}));
|
|
109
|
+
return _decodeDataPages.apply(this, arguments);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function decodePage(_x3, _x4) {
|
|
113
|
+
return _decodePage.apply(this, arguments);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function _decodePage() {
|
|
117
|
+
_decodePage = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(cursor, options) {
|
|
118
|
+
var page, _yield$decodePageHead, pageHeader, length, pageType;
|
|
119
|
+
|
|
120
|
+
return _regenerator.default.wrap(function _callee2$(_context2) {
|
|
121
|
+
while (1) {
|
|
122
|
+
switch (_context2.prev = _context2.next) {
|
|
123
|
+
case 0:
|
|
124
|
+
_context2.next = 2;
|
|
125
|
+
return (0, _readUtils.decodePageHeader)(cursor.buffer, cursor.offset);
|
|
126
|
+
|
|
127
|
+
case 2:
|
|
128
|
+
_yield$decodePageHead = _context2.sent;
|
|
129
|
+
pageHeader = _yield$decodePageHead.pageHeader;
|
|
130
|
+
length = _yield$decodePageHead.length;
|
|
131
|
+
cursor.offset += length;
|
|
132
|
+
pageType = (0, _readUtils.getThriftEnum)(_parquetThrift.PageType, pageHeader.type);
|
|
133
|
+
_context2.t0 = pageType;
|
|
134
|
+
_context2.next = _context2.t0 === 'DATA_PAGE' ? 10 : _context2.t0 === 'DATA_PAGE_V2' ? 14 : _context2.t0 === 'DICTIONARY_PAGE' ? 18 : 24;
|
|
135
|
+
break;
|
|
136
|
+
|
|
137
|
+
case 10:
|
|
138
|
+
_context2.next = 12;
|
|
139
|
+
return decodeDataPage(cursor, pageHeader, options);
|
|
140
|
+
|
|
141
|
+
case 12:
|
|
142
|
+
page = _context2.sent;
|
|
143
|
+
return _context2.abrupt("break", 25);
|
|
144
|
+
|
|
145
|
+
case 14:
|
|
146
|
+
_context2.next = 16;
|
|
147
|
+
return decodeDataPageV2(cursor, pageHeader, options);
|
|
148
|
+
|
|
149
|
+
case 16:
|
|
150
|
+
page = _context2.sent;
|
|
151
|
+
return _context2.abrupt("break", 25);
|
|
152
|
+
|
|
153
|
+
case 18:
|
|
154
|
+
_context2.next = 20;
|
|
155
|
+
return decodeDictionaryPage(cursor, pageHeader, options);
|
|
156
|
+
|
|
157
|
+
case 20:
|
|
158
|
+
_context2.t1 = _context2.sent;
|
|
159
|
+
_context2.t2 = pageHeader;
|
|
160
|
+
page = {
|
|
161
|
+
dictionary: _context2.t1,
|
|
162
|
+
pageHeader: _context2.t2
|
|
163
|
+
};
|
|
164
|
+
return _context2.abrupt("break", 25);
|
|
165
|
+
|
|
166
|
+
case 24:
|
|
167
|
+
throw new Error("invalid page type: ".concat(pageType));
|
|
168
|
+
|
|
169
|
+
case 25:
|
|
170
|
+
return _context2.abrupt("return", page);
|
|
171
|
+
|
|
172
|
+
case 26:
|
|
173
|
+
case "end":
|
|
174
|
+
return _context2.stop();
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}, _callee2);
|
|
178
|
+
}));
|
|
179
|
+
return _decodePage.apply(this, arguments);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function decodeSchema(schemaElements, offset, len) {
|
|
183
|
+
var schema = {};
|
|
184
|
+
var next = offset;
|
|
185
|
+
|
|
186
|
+
for (var i = 0; i < len; i++) {
|
|
187
|
+
var schemaElement = schemaElements[next];
|
|
188
|
+
var repetitionType = next > 0 ? (0, _readUtils.getThriftEnum)(_parquetThrift.FieldRepetitionType, schemaElement.repetition_type) : 'ROOT';
|
|
189
|
+
var optional = false;
|
|
190
|
+
var repeated = false;
|
|
191
|
+
|
|
192
|
+
switch (repetitionType) {
|
|
193
|
+
case 'REQUIRED':
|
|
194
|
+
break;
|
|
195
|
+
|
|
196
|
+
case 'OPTIONAL':
|
|
197
|
+
optional = true;
|
|
198
|
+
break;
|
|
199
|
+
|
|
200
|
+
case 'REPEATED':
|
|
201
|
+
repeated = true;
|
|
202
|
+
break;
|
|
203
|
+
|
|
204
|
+
default:
|
|
205
|
+
throw new Error('parquet: unknown repetition type');
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if (schemaElement.num_children > 0) {
|
|
209
|
+
var res = decodeSchema(schemaElements, next + 1, schemaElement.num_children);
|
|
210
|
+
next = res.next;
|
|
211
|
+
schema[schemaElement.name] = {
|
|
212
|
+
optional: optional,
|
|
213
|
+
repeated: repeated,
|
|
214
|
+
fields: res.schema
|
|
215
|
+
};
|
|
216
|
+
} else {
|
|
217
|
+
var type = (0, _readUtils.getThriftEnum)(_parquetThrift.Type, schemaElement.type);
|
|
218
|
+
var logicalType = type;
|
|
219
|
+
|
|
220
|
+
if (schemaElement.converted_type) {
|
|
221
|
+
logicalType = (0, _readUtils.getThriftEnum)(_parquetThrift.ConvertedType, schemaElement.converted_type);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
switch (logicalType) {
|
|
225
|
+
case 'DECIMAL':
|
|
226
|
+
logicalType = "".concat(logicalType, "_").concat(type);
|
|
227
|
+
break;
|
|
228
|
+
|
|
229
|
+
default:
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
schema[schemaElement.name] = {
|
|
233
|
+
type: logicalType,
|
|
234
|
+
typeLength: schemaElement.type_length,
|
|
235
|
+
presision: schemaElement.precision,
|
|
236
|
+
scale: schemaElement.scale,
|
|
237
|
+
optional: optional,
|
|
238
|
+
repeated: repeated
|
|
239
|
+
};
|
|
240
|
+
next++;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
return {
|
|
245
|
+
schema: schema,
|
|
246
|
+
offset: offset,
|
|
247
|
+
next: next
|
|
248
|
+
};
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function decodeValues(type, encoding, cursor, count, opts) {
|
|
252
|
+
if (!(encoding in _codecs.PARQUET_CODECS)) {
|
|
253
|
+
throw new Error("invalid encoding: ".concat(encoding));
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
return _codecs.PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
function decodeDataPage(_x5, _x6, _x7) {
|
|
260
|
+
return _decodeDataPage.apply(this, arguments);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function _decodeDataPage() {
|
|
264
|
+
_decodeDataPage = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(cursor, header, options) {
|
|
265
|
+
var _header$data_page_hea, _header$data_page_hea2, _header$data_page_hea3, _header$data_page_hea4;
|
|
266
|
+
|
|
267
|
+
var cursorEnd, valueCount, dataCursor, valuesBuf, rLevelEncoding, rLevels, dLevelEncoding, dLevels, valueCountNonNull, _iterator, _step, dlvl, valueEncoding, decodeOptions, values;
|
|
268
|
+
|
|
269
|
+
return _regenerator.default.wrap(function _callee3$(_context3) {
|
|
270
|
+
while (1) {
|
|
271
|
+
switch (_context3.prev = _context3.next) {
|
|
272
|
+
case 0:
|
|
273
|
+
cursorEnd = cursor.offset + header.compressed_page_size;
|
|
274
|
+
valueCount = (_header$data_page_hea = header.data_page_header) === null || _header$data_page_hea === void 0 ? void 0 : _header$data_page_hea.num_values;
|
|
275
|
+
dataCursor = cursor;
|
|
276
|
+
|
|
277
|
+
if (!(options.compression !== 'UNCOMPRESSED')) {
|
|
278
|
+
_context3.next = 9;
|
|
279
|
+
break;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
_context3.next = 6;
|
|
283
|
+
return (0, _compression.decompress)(options.compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
|
|
284
|
+
|
|
285
|
+
case 6:
|
|
286
|
+
valuesBuf = _context3.sent;
|
|
287
|
+
dataCursor = {
|
|
288
|
+
buffer: valuesBuf,
|
|
289
|
+
offset: 0,
|
|
290
|
+
size: valuesBuf.length
|
|
291
|
+
};
|
|
292
|
+
cursor.offset = cursorEnd;
|
|
293
|
+
|
|
294
|
+
case 9:
|
|
295
|
+
rLevelEncoding = (0, _readUtils.getThriftEnum)(_parquetThrift.Encoding, (_header$data_page_hea2 = header.data_page_header) === null || _header$data_page_hea2 === void 0 ? void 0 : _header$data_page_hea2.repetition_level_encoding);
|
|
296
|
+
rLevels = new Array(valueCount);
|
|
297
|
+
|
|
298
|
+
if (options.column.rLevelMax > 0) {
|
|
299
|
+
rLevels = decodeValues(_constants.PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount, {
|
|
300
|
+
bitWidth: (0, _readUtils.getBitWidth)(options.column.rLevelMax),
|
|
301
|
+
disableEnvelope: false
|
|
302
|
+
});
|
|
303
|
+
} else {
|
|
304
|
+
rLevels.fill(0);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
dLevelEncoding = (0, _readUtils.getThriftEnum)(_parquetThrift.Encoding, (_header$data_page_hea3 = header.data_page_header) === null || _header$data_page_hea3 === void 0 ? void 0 : _header$data_page_hea3.definition_level_encoding);
|
|
308
|
+
dLevels = new Array(valueCount);
|
|
309
|
+
|
|
310
|
+
if (options.column.dLevelMax > 0) {
|
|
311
|
+
dLevels = decodeValues(_constants.PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount, {
|
|
312
|
+
bitWidth: (0, _readUtils.getBitWidth)(options.column.dLevelMax),
|
|
313
|
+
disableEnvelope: false
|
|
314
|
+
});
|
|
315
|
+
} else {
|
|
316
|
+
dLevels.fill(0);
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
valueCountNonNull = 0;
|
|
320
|
+
_iterator = _createForOfIteratorHelper(dLevels);
|
|
321
|
+
|
|
322
|
+
try {
|
|
323
|
+
for (_iterator.s(); !(_step = _iterator.n()).done;) {
|
|
324
|
+
dlvl = _step.value;
|
|
325
|
+
|
|
326
|
+
if (dlvl === options.column.dLevelMax) {
|
|
327
|
+
valueCountNonNull++;
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
} catch (err) {
|
|
331
|
+
_iterator.e(err);
|
|
332
|
+
} finally {
|
|
333
|
+
_iterator.f();
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
valueEncoding = (0, _readUtils.getThriftEnum)(_parquetThrift.Encoding, (_header$data_page_hea4 = header.data_page_header) === null || _header$data_page_hea4 === void 0 ? void 0 : _header$data_page_hea4.encoding);
|
|
337
|
+
decodeOptions = {
|
|
338
|
+
typeLength: options.column.typeLength,
|
|
339
|
+
bitWidth: options.column.typeLength
|
|
340
|
+
};
|
|
341
|
+
values = decodeValues(options.column.primitiveType, valueEncoding, dataCursor, valueCountNonNull, decodeOptions);
|
|
342
|
+
return _context3.abrupt("return", {
|
|
343
|
+
dlevels: dLevels,
|
|
344
|
+
rlevels: rLevels,
|
|
345
|
+
values: values,
|
|
346
|
+
count: valueCount,
|
|
347
|
+
pageHeader: header
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
case 22:
|
|
351
|
+
case "end":
|
|
352
|
+
return _context3.stop();
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
}, _callee3);
|
|
356
|
+
}));
|
|
357
|
+
return _decodeDataPage.apply(this, arguments);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
function decodeDataPageV2(_x8, _x9, _x10) {
|
|
361
|
+
return _decodeDataPageV.apply(this, arguments);
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
function _decodeDataPageV() {
|
|
365
|
+
_decodeDataPageV = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee4(cursor, header, opts) {
|
|
366
|
+
var _header$data_page_hea5, _header$data_page_hea6, _header$data_page_hea7, _header$data_page_hea8;
|
|
367
|
+
|
|
368
|
+
var cursorEnd, valueCount, valueCountNonNull, valueEncoding, rLevels, dLevels, valuesBufCursor, valuesBuf, decodeOptions, values;
|
|
369
|
+
return _regenerator.default.wrap(function _callee4$(_context4) {
|
|
370
|
+
while (1) {
|
|
371
|
+
switch (_context4.prev = _context4.next) {
|
|
372
|
+
case 0:
|
|
373
|
+
cursorEnd = cursor.offset + header.compressed_page_size;
|
|
374
|
+
valueCount = (_header$data_page_hea5 = header.data_page_header_v2) === null || _header$data_page_hea5 === void 0 ? void 0 : _header$data_page_hea5.num_values;
|
|
375
|
+
valueCountNonNull = valueCount - ((_header$data_page_hea6 = header.data_page_header_v2) === null || _header$data_page_hea6 === void 0 ? void 0 : _header$data_page_hea6.num_nulls);
|
|
376
|
+
valueEncoding = (0, _readUtils.getThriftEnum)(_parquetThrift.Encoding, (_header$data_page_hea7 = header.data_page_header_v2) === null || _header$data_page_hea7 === void 0 ? void 0 : _header$data_page_hea7.encoding);
|
|
377
|
+
rLevels = new Array(valueCount);
|
|
378
|
+
|
|
379
|
+
if (opts.column.rLevelMax > 0) {
|
|
380
|
+
rLevels = decodeValues(_constants.PARQUET_RDLVL_TYPE, _constants.PARQUET_RDLVL_ENCODING, cursor, valueCount, {
|
|
381
|
+
bitWidth: (0, _readUtils.getBitWidth)(opts.column.rLevelMax),
|
|
382
|
+
disableEnvelope: true
|
|
383
|
+
});
|
|
384
|
+
} else {
|
|
385
|
+
rLevels.fill(0);
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
dLevels = new Array(valueCount);
|
|
389
|
+
|
|
390
|
+
if (opts.column.dLevelMax > 0) {
|
|
391
|
+
dLevels = decodeValues(_constants.PARQUET_RDLVL_TYPE, _constants.PARQUET_RDLVL_ENCODING, cursor, valueCount, {
|
|
392
|
+
bitWidth: (0, _readUtils.getBitWidth)(opts.column.dLevelMax),
|
|
393
|
+
disableEnvelope: true
|
|
394
|
+
});
|
|
395
|
+
} else {
|
|
396
|
+
dLevels.fill(0);
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
valuesBufCursor = cursor;
|
|
400
|
+
|
|
401
|
+
if (!((_header$data_page_hea8 = header.data_page_header_v2) !== null && _header$data_page_hea8 !== void 0 && _header$data_page_hea8.is_compressed)) {
|
|
402
|
+
_context4.next = 15;
|
|
403
|
+
break;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
_context4.next = 12;
|
|
407
|
+
return (0, _compression.decompress)(opts.compression, cursor.buffer.slice(cursor.offset, cursorEnd), header.uncompressed_page_size);
|
|
408
|
+
|
|
409
|
+
case 12:
|
|
410
|
+
valuesBuf = _context4.sent;
|
|
411
|
+
valuesBufCursor = {
|
|
412
|
+
buffer: valuesBuf,
|
|
413
|
+
offset: 0,
|
|
414
|
+
size: valuesBuf.length
|
|
415
|
+
};
|
|
416
|
+
cursor.offset = cursorEnd;
|
|
417
|
+
|
|
418
|
+
case 15:
|
|
419
|
+
decodeOptions = {
|
|
420
|
+
typeLength: opts.column.typeLength,
|
|
421
|
+
bitWidth: opts.column.typeLength
|
|
422
|
+
};
|
|
423
|
+
values = decodeValues(opts.column.primitiveType, valueEncoding, valuesBufCursor, valueCountNonNull, decodeOptions);
|
|
424
|
+
return _context4.abrupt("return", {
|
|
425
|
+
dlevels: dLevels,
|
|
426
|
+
rlevels: rLevels,
|
|
427
|
+
values: values,
|
|
428
|
+
count: valueCount,
|
|
429
|
+
pageHeader: header
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
case 18:
|
|
433
|
+
case "end":
|
|
434
|
+
return _context4.stop();
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
}, _callee4);
|
|
438
|
+
}));
|
|
439
|
+
return _decodeDataPageV.apply(this, arguments);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
function decodeDictionaryPage(_x11, _x12, _x13) {
|
|
443
|
+
return _decodeDictionaryPage.apply(this, arguments);
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
function _decodeDictionaryPage() {
|
|
447
|
+
_decodeDictionaryPage = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(cursor, pageHeader, options) {
|
|
448
|
+
var _pageHeader$dictionar;
|
|
449
|
+
|
|
450
|
+
var cursorEnd, dictCursor, valuesBuf, numValues;
|
|
451
|
+
return _regenerator.default.wrap(function _callee5$(_context5) {
|
|
452
|
+
while (1) {
|
|
453
|
+
switch (_context5.prev = _context5.next) {
|
|
454
|
+
case 0:
|
|
455
|
+
cursorEnd = cursor.offset + pageHeader.compressed_page_size;
|
|
456
|
+
dictCursor = {
|
|
457
|
+
offset: 0,
|
|
458
|
+
buffer: cursor.buffer.slice(cursor.offset, cursorEnd),
|
|
459
|
+
size: cursorEnd - cursor.offset
|
|
460
|
+
};
|
|
461
|
+
cursor.offset = cursorEnd;
|
|
462
|
+
|
|
463
|
+
if (!(options.compression !== 'UNCOMPRESSED')) {
|
|
464
|
+
_context5.next = 9;
|
|
465
|
+
break;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
_context5.next = 6;
|
|
469
|
+
return (0, _compression.decompress)(options.compression, dictCursor.buffer.slice(dictCursor.offset, cursorEnd), pageHeader.uncompressed_page_size);
|
|
470
|
+
|
|
471
|
+
case 6:
|
|
472
|
+
valuesBuf = _context5.sent;
|
|
473
|
+
dictCursor = {
|
|
474
|
+
buffer: valuesBuf,
|
|
475
|
+
offset: 0,
|
|
476
|
+
size: valuesBuf.length
|
|
477
|
+
};
|
|
478
|
+
cursor.offset = cursorEnd;
|
|
479
|
+
|
|
480
|
+
case 9:
|
|
481
|
+
numValues = (pageHeader === null || pageHeader === void 0 ? void 0 : (_pageHeader$dictionar = pageHeader.dictionary_page_header) === null || _pageHeader$dictionar === void 0 ? void 0 : _pageHeader$dictionar.num_values) || 0;
|
|
482
|
+
return _context5.abrupt("return", decodeValues(options.column.primitiveType, options.column.encoding, dictCursor, numValues, options).map(function (d) {
|
|
483
|
+
return d.toString();
|
|
484
|
+
}));
|
|
485
|
+
|
|
486
|
+
case 11:
|
|
487
|
+
case "end":
|
|
488
|
+
return _context5.stop();
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
}, _callee5);
|
|
492
|
+
}));
|
|
493
|
+
return _decodeDictionaryPage.apply(this, arguments);
|
|
494
|
+
}
|
|
495
|
+
//# sourceMappingURL=decoders.js.map
|