@loaders.gl/parquet 3.0.10 → 3.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.es5.min.js +1 -1
- package/dist/dist.min.js +1 -1
- package/dist/es5/parquet-loader.js +1 -1
- package/dist/es5/parquet-writer.js +1 -1
- package/dist/esm/parquet-loader.js +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquetjs/codecs/plain.js +3 -3
- package/dist/esm/parquetjs/codecs/plain.js.map +1 -1
- package/dist/esm/parquetjs/codecs/rle.js +1 -1
- package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
- package/dist/esm/parquetjs/compression.js +2 -2
- package/dist/esm/parquetjs/compression.js.map +1 -1
- package/dist/esm/parquetjs/reader.js +3 -3
- package/dist/esm/parquetjs/reader.js.map +1 -1
- package/dist/esm/parquetjs/schema/schema.js +3 -3
- package/dist/esm/parquetjs/schema/schema.js.map +1 -1
- package/dist/esm/parquetjs/schema/shred.js +2 -2
- package/dist/esm/parquetjs/schema/shred.js.map +1 -1
- package/dist/esm/parquetjs/schema/types.js +18 -18
- package/dist/esm/parquetjs/schema/types.js.map +1 -1
- package/dist/esm/parquetjs/writer.js +1 -1
- package/dist/esm/parquetjs/writer.js.map +1 -1
- package/dist/parquet-worker.js +1 -1
- package/dist/parquet-worker.js.map +1 -1
- package/package.json +4 -4
|
@@ -265,7 +265,7 @@ export class ParquetEnvelopeReader {
|
|
|
265
265
|
|
|
266
266
|
const field = schema.findField((_colChunk$meta_data = colChunk.meta_data) === null || _colChunk$meta_data === void 0 ? void 0 : _colChunk$meta_data.path_in_schema);
|
|
267
267
|
const type = Util.getThriftEnum(Type, (_colChunk$meta_data2 = colChunk.meta_data) === null || _colChunk$meta_data2 === void 0 ? void 0 : _colChunk$meta_data2.type);
|
|
268
|
-
if (type !== field.primitiveType) throw new Error(
|
|
268
|
+
if (type !== field.primitiveType) throw new Error("chunk type not matching schema: ".concat(type));
|
|
269
269
|
const compression = Util.getThriftEnum(CompressionCodec, (_colChunk$meta_data3 = colChunk.meta_data) === null || _colChunk$meta_data3 === void 0 ? void 0 : _colChunk$meta_data3.codec);
|
|
270
270
|
const pagesOffset = Number((_colChunk$meta_data4 = colChunk.meta_data) === null || _colChunk$meta_data4 === void 0 ? void 0 : _colChunk$meta_data4.data_page_offset);
|
|
271
271
|
const pagesSize = Number((_colChunk$meta_data5 = colChunk.meta_data) === null || _colChunk$meta_data5 === void 0 ? void 0 : _colChunk$meta_data5.total_compressed_size);
|
|
@@ -299,7 +299,7 @@ export class ParquetEnvelopeReader {
|
|
|
299
299
|
|
|
300
300
|
function decodeValues(type, encoding, cursor, count, opts) {
|
|
301
301
|
if (!(encoding in PARQUET_CODECS)) {
|
|
302
|
-
throw new Error(
|
|
302
|
+
throw new Error("invalid encoding: ".concat(encoding));
|
|
303
303
|
}
|
|
304
304
|
|
|
305
305
|
return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);
|
|
@@ -337,7 +337,7 @@ function decodeDataPages(buffer, column, compression) {
|
|
|
337
337
|
break;
|
|
338
338
|
|
|
339
339
|
default:
|
|
340
|
-
throw new Error(
|
|
340
|
+
throw new Error("invalid page type: ".concat(pageType));
|
|
341
341
|
}
|
|
342
342
|
|
|
343
343
|
Array.prototype.push.apply(data.rlevels, pageData.rlevels);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../src/parquetjs/reader.ts"],"names":["PARQUET_CODECS","Compression","ParquetSchema","Shred","CompressionCodec","ConvertedType","Encoding","FieldRepetitionType","PageType","Type","Util","PARQUET_MAGIC","PARQUET_VERSION","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","Symbol","asyncIterator","ParquetCursor","constructor","metadata","envelopeReader","schema","columnList","rowGroup","rowGroupIndex","next","length","row_groups","rowBuffer","readRowGroup","materializeRecords","shift","rewind","done","value","return","throw","ParquetReader","openFile","filePath","ParquetEnvelopeReader","readHeader","readFooter","err","close","openBuffer","buffer","openArrayBuffer","arrayBuffer","readFn","start","Buffer","from","closeFn","size","byteLength","version","Error","root","decodeSchema","num_children","getCursor","map","x","Array","isArray","getRowCount","Number","num_rows","getSchema","getMetadata","md","kv","key_value_metadata","key","fileStat","fstat","fileDescriptor","fopen","fread","bind","undefined","fclose","position","Promise","resolve","slice","read","fileSize","buf","toString","rowCount","columnData","colChunk","columns","colMetadata","meta_data","colKey","path_in_schema","fieldIndexOf","join","readColumnChunk","file_path","field","findField","type","getThriftEnum","primitiveType","compression","codec","pagesOffset","data_page_offset","pagesSize","total_compressed_size","pagesBuf","decodeDataPages","trailerLen","trailerBuf","metadataSize","readUInt32LE","metadataOffset","metadataBuf","decodeFileMetadata","decodeValues","encoding","cursor","count","opts","column","offset","data","rlevels","dlevels","values","pageHeader","decodePageHeader","pageType","pageData","decodeDataPage","decodeDataPageV2","prototype","push","apply","header","cursorEnd","compressed_page_size","valueCount","data_page_header","num_values","dataCursor","valuesBuf","inflate","uncompressed_page_size","rLevelEncoding","repetition_level_encoding","rLevels","rLevelMax","bitWidth","getBitWidth","disableEnvelope","fill","dLevelEncoding","definition_level_encoding","dLevels","dLevelMax","valueCountNonNull","dlvl","valueEncoding","typeLength","data_page_header_v2","num_nulls","valuesBufCursor","is_compressed","schemaElements","len","i","schemaElement","repetitionType","repetition_type","optional","repeated","res","name","fields","logicalType","converted_type","type_length"],"mappings":";;;;AACA,SAA2CA,cAA3C,QAAgE,UAAhE;AACA,OAAO,KAAKC,WAAZ,MAA6B,eAA7B;AAYA,SAAQC,aAAR,QAA4B,iBAA5B;AACA,OAAO,KAAKC,KAAZ,MAAuB,gBAAvB;AAEA,SAEEC,gBAFF,EAGEC,aAHF,EAIEC,QAJF,EAKEC,mBALF,EAQEC,QARF,EAWEC,IAXF,QAYO,kBAZP;AAaA,OAAO,KAAKC,IAAZ,MAAsB,QAAtB;AAMA,MAAMC,aAAa,GAAG,MAAtB;AAKA,MAAMC,eAAe,GAAG,CAAxB;AAKA,MAAMC,kBAAkB,GAAG,OAA3B;AACA,MAAMC,sBAAsB,GAAG,KAA/B;wBAkEGC,MAAM,CAACC,a;AA7DV,OAAO,MAAMC,aAAN,CAAmD;AAcxDC,EAAAA,WAAW,CACTC,QADS,EAETC,cAFS,EAGTC,MAHS,EAITC,UAJS,EAKT;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AACA,SAAKH,QAAL,GAAgBA,QAAhB;AACA,SAAKC,cAAL,GAAsBA,cAAtB;AACA,SAAKC,MAAL,GAAcA,MAAd;AACA,SAAKC,UAAL,GAAkBA,UAAlB;AACA,SAAKC,QAAL,GAAgB,EAAhB;AACA,SAAKC,aAAL,GAAqB,CAArB;AACD;;AAMS,QAAJC,IAAI,GAAwB;AAChC,QAAI,KAAKF,QAAL,CAAcG,MAAd,KAAyB,CAA7B,EAAgC;AAC9B,UAAI,KAAKF,aAAL,IAAsB,KAAKL,QAAL,CAAcQ,UAAd,CAAyBD,MAAnD,EAA2D;AAEzD,eAAO,IAAP;AACD;;AACD,YAAME,SAAS,GAAG,MAAM,KAAKR,cAAL,CAAoBS,YAApB,CACtB,KAAKR,MADiB,EAEtB,KAAKF,QAAL,CAAcQ,UAAd,CAAyB,KAAKH,aAA9B,CAFsB,EAGtB,KAAKF,UAHiB,CAAxB;AAKA,WAAKC,QAAL,GAAgBpB,KAAK,CAAC2B,kBAAN,CAAyB,KAAKT,MAA9B,EAAsCO,SAAtC,CAAhB;AACA,WAAKJ,aAAL;AACD;;AACD,WAAO,KAAKD,QAAL,CAAcQ,KAAd,EAAP;AACD;;AAKDC,EAAAA,MAAM,GAAS;AACb,SAAKT,QAAL,GAAgB,EAAhB;AACA,SAAKC,aAAL,GAAqB,CAArB;AACD;;AAMD,4BAA2C;AACzC,QAAIS,IAAI,GAAG,KAAX;AACA,WAAO;AACLR,MAAAA,IAAI,EAAE,YAAY;AAChB,YAAIQ,IAAJ,EAAU;AACR,iBAAO;AAACA,YAAAA,IAAD;AAAOC,YAAAA,KAAK,EAAE;AAAd,WAAP;AACD;;AACD,cAAMA,KAAK,GAAG,MAAM,KAAKT,IAAL,EAApB;;AACA,YAAIS,KAAK,KAAK,IAAd,EAAoB;AAClB,iBAAO;AAACD,YAAAA,IAAI,EAAE,IAAP;AAAaC,YAAAA;AAAb,WAAP;AACD;;AACD,eAAO;AAACD,UAAAA,IAAI,EAAE,KAAP;AAAcC,UAAAA;AAAd,SAAP;AACD,OAVI;AAWLC,MAAAA,MAAM,EAAE,YAAY;AAClBF,QAAAA,IAAI,GAAG,IAAP;AACA,eAAO;AAACA,UAAAA,IAAD;AAAOC,UAAAA,KAAK,EAAE;AAAd,SAAP;AACD,OAdI;AAeLE,MAAAA,KAAK,EAAE,YAAY;AACjBH,QAAAA,IAAI,GAAG,IAAP;AACA,eAAO;AAACA,UAAAA,IAAI,EAAE,IAAP;AAAaC,UAAAA,KAAK,EAAE;AAApB,SAAP;AACD;AAlBI,KAAP;AAoBD;;AAnFuD;yBAwOvDnB,MAAM,CAACC,a;AA3IV,OAAO,MAAMqB,aAAN,CAAmD;AAKnC,eAARC,QAAQ,CAAIC,QAAJ,EAAiD;AACpE,UAAMnB,cAAc,GAAG,MAAMoB,qBAAqB,CAACF,QAAtB,CAA+BC,QAA/B,CAA7B;;AACA,QAAI;AACF,YAAMnB,cAAc,CAACqB,UAAf,EAAN;AACA,YAAMtB,QAAQ,GAAG,MAAMC,cAAc,CAACsB,UAAf,EAAvB;AACA,aAAO,IAAIL,aAAJ,CAAqBlB,QAArB,EAA+BC,cAA/B,CAAP;AACD,KAJD,CAIE,OAAOuB,GAAP,EAAY;AACZ,YAAMvB,cAAc,CAACwB,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAEsB,eAAVE,UAAU,CAAIC,MAAJ,EAA+C;AACpE,UAAM1B,cAAc,GAAG,MAAMoB,qBAAqB,CAACK,UAAtB,CAAiCC,MAAjC,CAA7B;;AACA,QAAI;AACF,YAAM1B,cAAc,CAACqB,UAAf,EAAN;AACA,YAAMtB,QAAQ,GAAG,MAAMC,cAAc,CAACsB,UAAf,EAAvB;AACA,aAAO,IAAIL,aAAJ,CAAqBlB,QAArB,EAA+BC,cAA/B,CAAP;AACD,KAJD,CAIE,OAAOuB,GAAP,EAAY;AACZ,YAAMvB,cAAc,CAACwB,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAK2B,eAAfI,eAAe,CAAIC,WAAJ,EAAyD;AACnF,UAAMC,MAAM,GAAG,OAAOC,KAAP,EAAsBxB,MAAtB,KAAyCyB,MAAM,CAACC,IAAP,CAAYJ,WAAZ,EAAyBE,KAAzB,EAAgCxB,MAAhC,CAAxD;;AACA,UAAM2B,OAAO,GAAG,YAAY,CAAE,CAA9B;;AACA,UAAMC,IAAI,GAAGN,WAAW,CAACO,UAAzB;AACA,UAAMnC,cAAc,GAAG,IAAIoB,qBAAJ,CAA0BS,MAA1B,EAAkCI,OAAlC,EAA2CC,IAA3C,CAAvB;;AACA,QAAI;AACF,YAAMlC,cAAc,CAACqB,UAAf,EAAN;AACA,YAAMtB,QAAQ,GAAG,MAAMC,cAAc,CAACsB,UAAf,EAAvB;AACA,aAAO,IAAIL,aAAJ,CAAkBlB,QAAlB,EAA4BC,cAA5B,CAAP;AACD,KAJD,CAIE,OAAOuB,GAAP,EAAY;AACZ,YAAMvB,cAAc,CAACwB,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAYDzB,EAAAA,WAAW,CAACC,QAAD,EAAyBC,cAAzB,EAAgE;AAAA;;AAAA;;AAAA;;AACzE,QAAID,QAAQ,CAACqC,OAAT,KAAqB5C,eAAzB,EAA0C;AACxC,YAAM,IAAI6C,KAAJ,CAAU,yBAAV,CAAN;AACD;;AAED,SAAKtC,QAAL,GAAgBA,QAAhB;AACA,SAAKC,cAAL,GAAsBA,cAAtB;AACA,UAAMsC,IAAI,GAAG,KAAKvC,QAAL,CAAcE,MAAd,CAAqB,CAArB,CAAb;AACA,UAAM;AAACA,MAAAA;AAAD,QAAWsC,YAAY,CAAC,KAAKxC,QAAL,CAAcE,MAAf,EAAuB,CAAvB,EAA0BqC,IAAI,CAACE,YAA/B,CAA7B;AACA,SAAKvC,MAAL,GAAc,IAAInB,aAAJ,CAAkBmB,MAAlB,CAAd;AACD;;AAMU,QAALuB,KAAK,GAAkB;AAC3B,UAAM,KAAKxB,cAAL,CAAoBwB,KAApB,EAAN;AAGD;;AAeDiB,EAAAA,SAAS,CAACvC,UAAD,EAAgE;AACvE,QAAI,CAACA,UAAL,EAAiB;AAEfA,MAAAA,UAAU,GAAG,EAAb;AACD;;AAGDA,IAAAA,UAAU,GAAGA,UAAU,CAACwC,GAAX,CAAgBC,CAAD,IAAQC,KAAK,CAACC,OAAN,CAAcF,CAAd,IAAmBA,CAAnB,GAAuB,CAACA,CAAD,CAA9C,CAAb;AAEA,WAAO,IAAI9C,aAAJ,CACL,KAAKE,QADA,EAEL,KAAKC,cAFA,EAGL,KAAKC,MAHA,EAILC,UAJK,CAAP;AAMD;;AAMD4C,EAAAA,WAAW,GAAW;AACpB,WAAOC,MAAM,CAAC,KAAKhD,QAAL,CAAciD,QAAf,CAAb;AACD;;AAKDC,EAAAA,SAAS,GAAkB;AACzB,WAAO,KAAKhD,MAAZ;AACD;;AAKDiD,EAAAA,WAAW,GAA2B;AACpC,UAAMC,EAA0B,GAAG,EAAnC;;AACA,SAAK,MAAMC,EAAX,IAAiB,KAAKrD,QAAL,CAAcsD,kBAA/B,EAAoD;AAClDF,MAAAA,EAAE,CAACC,EAAE,CAACE,GAAJ,CAAF,GAAaF,EAAE,CAACtC,KAAhB;AACD;;AACD,WAAOqC,EAAP;AACD;;AAMD,6BAA2C;AACzC,WAAO,KAAKV,SAAL,GAAiB9C,MAAM,CAACC,aAAxB,GAAP;AACD;;AA7IuD;AAsJ1D,OAAO,MAAMwB,qBAAN,CAA4B;AASZ,eAARF,QAAQ,CAACC,QAAD,EAAmD;AACtE,UAAMoC,QAAQ,GAAG,MAAMjE,IAAI,CAACkE,KAAL,CAAWrC,QAAX,CAAvB;AACA,UAAMsC,cAAc,GAAG,MAAMnE,IAAI,CAACoE,KAAL,CAAWvC,QAAX,CAA7B;AAEA,UAAMU,MAAM,GAAGvC,IAAI,CAACqE,KAAL,CAAWC,IAAX,CAAgBC,SAAhB,EAA2BJ,cAA3B,CAAf;AACA,UAAMxB,OAAO,GAAG3C,IAAI,CAACwE,MAAL,CAAYF,IAAZ,CAAiBC,SAAjB,EAA4BJ,cAA5B,CAAhB;AAEA,WAAO,IAAIrC,qBAAJ,CAA0BS,MAA1B,EAAkCI,OAAlC,EAA2CsB,QAAQ,CAACrB,IAApD,CAAP;AACD;;AAEsB,eAAVT,UAAU,CAACC,MAAD,EAAiD;AACtE,UAAMG,MAAM,GAAG,CAACkC,QAAD,EAAmBzD,MAAnB,KACb0D,OAAO,CAACC,OAAR,CAAgBvC,MAAM,CAACwC,KAAP,CAAaH,QAAb,EAAuBA,QAAQ,GAAGzD,MAAlC,CAAhB,CADF;;AAEA,UAAM2B,OAAO,GAAG,MAAM+B,OAAO,CAACC,OAAR,EAAtB;;AACA,WAAO,IAAI7C,qBAAJ,CAA0BS,MAA1B,EAAkCI,OAAlC,EAA2CP,MAAM,CAACpB,MAAlD,CAAP;AACD;;AAEDR,EAAAA,WAAW,CACTqE,IADS,EAET3C,KAFS,EAGT4C,QAHS,EAIT;AAAA;;AAAA;;AAAA;;AACA,SAAKD,IAAL,GAAYA,IAAZ;AACA,SAAK3C,KAAL,GAAaA,KAAb;AACA,SAAK4C,QAAL,GAAgBA,QAAhB;AACD;;AAEe,QAAV/C,UAAU,GAAkB;AAChC,UAAMgD,GAAG,GAAG,MAAM,KAAKF,IAAL,CAAU,CAAV,EAAa5E,aAAa,CAACe,MAA3B,CAAlB;;AAEA,QAAI+D,GAAG,CAACC,QAAJ,OAAmB/E,aAAvB,EAAsC;AACpC,YAAM,IAAI8C,KAAJ,CAAU,wBAAV,CAAN;AACD;AACF;;AAEiB,QAAZ5B,YAAY,CAChBR,MADgB,EAEhBE,QAFgB,EAGhBD,UAHgB,EAIQ;AACxB,UAAMwB,MAAqB,GAAG;AAC5B6C,MAAAA,QAAQ,EAAExB,MAAM,CAAC5C,QAAQ,CAAC6C,QAAV,CADY;AAE5BwB,MAAAA,UAAU,EAAE;AAFgB,KAA9B;;AAIA,SAAK,MAAMC,QAAX,IAAuBtE,QAAQ,CAACuE,OAAhC,EAAyC;AACvC,YAAMC,WAAW,GAAGF,QAAQ,CAACG,SAA7B;AACA,YAAMC,MAAM,GAAGF,WAAH,aAAGA,WAAH,uBAAGA,WAAW,CAAEG,cAA5B;;AACA,UAAI5E,UAAU,CAACI,MAAX,GAAoB,CAApB,IAAyBhB,IAAI,CAACyF,YAAL,CAAkB7E,UAAlB,EAA8B2E,MAA9B,IAAyC,CAAtE,EAAyE;AACvE;AACD;;AACDnD,MAAAA,MAAM,CAAC8C,UAAP,CAAkBK,MAAM,CAAEG,IAAR,EAAlB,IAAoC,MAAM,KAAKC,eAAL,CAAqBhF,MAArB,EAA6BwE,QAA7B,CAA1C;AACD;;AACD,WAAO/C,MAAP;AACD;;AAEoB,QAAfuD,eAAe,CAAChF,MAAD,EAAwBwE,QAAxB,EAAqE;AAAA;;AACxF,QAAIA,QAAQ,CAACS,SAAT,KAAuBrB,SAAvB,IAAoCY,QAAQ,CAACS,SAAT,KAAuB,IAA/D,EAAqE;AACnE,YAAM,IAAI7C,KAAJ,CAAU,uCAAV,CAAN;AACD;;AAED,UAAM8C,KAAK,GAAGlF,MAAM,CAACmF,SAAP,wBAAiBX,QAAQ,CAACG,SAA1B,wDAAiB,oBAAoBE,cAArC,CAAd;AACA,UAAMO,IAAmB,GAAG/F,IAAI,CAACgG,aAAL,CAAmBjG,IAAnB,0BAAyBoF,QAAQ,CAACG,SAAlC,yDAAyB,qBAAoBS,IAA7C,CAA5B;AACA,QAAIA,IAAI,KAAKF,KAAK,CAACI,aAAnB,EAAkC,MAAM,IAAIlD,KAAJ,CAAW,mCAAkCgD,IAAK,EAAlD,CAAN;AAElC,UAAMG,WAA+B,GAAGlG,IAAI,CAACgG,aAAL,CACtCtG,gBADsC,0BAEtCyF,QAAQ,CAACG,SAF6B,yDAEtC,qBAAoBa,KAFkB,CAAxC;AAKA,UAAMC,WAAW,GAAG3C,MAAM,yBAAC0B,QAAQ,CAACG,SAAV,yDAAC,qBAAoBe,gBAArB,CAA1B;AACA,UAAMC,SAAS,GAAG7C,MAAM,yBAAC0B,QAAQ,CAACG,SAAV,yDAAC,qBAAoBiB,qBAArB,CAAxB;AACA,UAAMC,QAAQ,GAAG,MAAM,KAAK3B,IAAL,CAAUuB,WAAV,EAAuBE,SAAvB,CAAvB;AAEA,WAAOG,eAAe,CAACD,QAAD,EAAWX,KAAX,EAAkBK,WAAlB,CAAtB;AACD;;AAEe,QAAVlE,UAAU,GAA0B;AACxC,UAAM0E,UAAU,GAAGzG,aAAa,CAACe,MAAd,GAAuB,CAA1C;AACA,UAAM2F,UAAU,GAAG,MAAM,KAAK9B,IAAL,CAAU,KAAKC,QAAL,GAAgB4B,UAA1B,EAAsCA,UAAtC,CAAzB;;AAEA,QAAIC,UAAU,CAAC/B,KAAX,CAAiB,CAAjB,EAAoBI,QAApB,OAAmC/E,aAAvC,EAAsD;AACpD,YAAM,IAAI8C,KAAJ,CAAU,0BAAV,CAAN;AACD;;AAED,UAAM6D,YAAY,GAAGD,UAAU,CAACE,YAAX,CAAwB,CAAxB,CAArB;AACA,UAAMC,cAAc,GAAG,KAAKhC,QAAL,GAAgB8B,YAAhB,GAA+BF,UAAtD;;AACA,QAAII,cAAc,GAAG7G,aAAa,CAACe,MAAnC,EAA2C;AACzC,YAAM,IAAI+B,KAAJ,CAAU,uBAAV,CAAN;AACD;;AAED,UAAMgE,WAAW,GAAG,MAAM,KAAKlC,IAAL,CAAUiC,cAAV,EAA0BF,YAA1B,CAA1B;AAGA,UAAM;AAACnG,MAAAA;AAAD,QAAaT,IAAI,CAACgH,kBAAL,CAAwBD,WAAxB,CAAnB;AACA,WAAOtG,QAAP;AACD;;AAxGgC;;AA8GnC,SAASwG,YAAT,CACElB,IADF,EAEEmB,QAFF,EAGEC,MAHF,EAIEC,KAJF,EAKEC,IALF,EAMS;AACP,MAAI,EAAEH,QAAQ,IAAI5H,cAAd,CAAJ,EAAmC;AACjC,UAAM,IAAIyD,KAAJ,CAAW,qBAAoBmE,QAAS,EAAxC,CAAN;AACD;;AACD,SAAO5H,cAAc,CAAC4H,QAAD,CAAd,CAAyBD,YAAzB,CAAsClB,IAAtC,EAA4CoB,MAA5C,EAAoDC,KAApD,EAA2DC,IAA3D,CAAP;AACD;;AAED,SAASZ,eAAT,CACErE,MADF,EAEEkF,MAFF,EAGEpB,WAHF,EAIe;AACb,QAAMiB,MAAoB,GAAG;AAC3B/E,IAAAA,MAD2B;AAE3BmF,IAAAA,MAAM,EAAE,CAFmB;AAG3B3E,IAAAA,IAAI,EAAER,MAAM,CAACpB;AAHc,GAA7B;AAMA,QAAMwG,IAAiB,GAAG;AACxBC,IAAAA,OAAO,EAAE,EADe;AAExBC,IAAAA,OAAO,EAAE,EAFe;AAGxBC,IAAAA,MAAM,EAAE,EAHgB;AAIxBP,IAAAA,KAAK,EAAE;AAJiB,GAA1B;;AAQA,SAAOD,MAAM,CAACI,MAAP,GAAgBJ,MAAM,CAACvE,IAA9B,EAAoC;AAIlC,UAAM;AAACgF,MAAAA,UAAD;AAAa5G,MAAAA;AAAb,QAAuBhB,IAAI,CAAC6H,gBAAL,CAAsBV,MAAM,CAAC/E,MAA7B,CAA7B;AACA+E,IAAAA,MAAM,CAACI,MAAP,IAAiBvG,MAAjB;AAEA,UAAM8G,QAAQ,GAAG9H,IAAI,CAACgG,aAAL,CAAmBlG,QAAnB,EAA6B8H,UAAU,CAAC7B,IAAxC,CAAjB;AAEA,QAAIgC,QAA4B,GAAG,IAAnC;;AACA,YAAQD,QAAR;AACE,WAAK,WAAL;AACEC,QAAAA,QAAQ,GAAGC,cAAc,CAACb,MAAD,EAASS,UAAT,EAAqBN,MAArB,EAA6BpB,WAA7B,CAAzB;AACA;;AACF,WAAK,cAAL;AACE6B,QAAAA,QAAQ,GAAGE,gBAAgB,CAACd,MAAD,EAASS,UAAT,EAAqBN,MAArB,EAA6BpB,WAA7B,CAA3B;AACA;;AACF;AACE,cAAM,IAAInD,KAAJ,CAAW,sBAAqB+E,QAAS,EAAzC,CAAN;AARJ;;AAWAxE,IAAAA,KAAK,CAAC4E,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BZ,IAAI,CAACC,OAAhC,EAAyCM,QAAQ,CAACN,OAAlD;AACAnE,IAAAA,KAAK,CAAC4E,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BZ,IAAI,CAACE,OAAhC,EAAyCK,QAAQ,CAACL,OAAlD;AACApE,IAAAA,KAAK,CAAC4E,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BZ,IAAI,CAACG,MAAhC,EAAwCI,QAAQ,CAACJ,MAAjD;AACAH,IAAAA,IAAI,CAACJ,KAAL,IAAcW,QAAQ,CAACX,KAAvB;AACD;;AAED,SAAOI,IAAP;AACD;;AAED,SAASQ,cAAT,CACEb,MADF,EAEEkB,MAFF,EAGEf,MAHF,EAIEpB,WAJF,EAKe;AAAA;;AACb,QAAMoC,SAAS,GAAGnB,MAAM,CAACI,MAAP,GAAgBc,MAAM,CAACE,oBAAzC;AACA,QAAMC,UAAU,4BAAGH,MAAM,CAACI,gBAAV,0DAAG,sBAAyBC,UAA5C;AAmBA,MAAIC,UAAU,GAAGxB,MAAjB;;AACA,MAAIjB,WAAW,KAAK,cAApB,EAAoC;AAClC,UAAM0C,SAAS,GAAGrJ,WAAW,CAACsJ,OAAZ,CAChB3C,WADgB,EAEhBiB,MAAM,CAAC/E,MAAP,CAAcwC,KAAd,CAAoBuC,MAAM,CAACI,MAA3B,EAAmCe,SAAnC,CAFgB,EAGhBD,MAAM,CAACS,sBAHS,CAAlB;AAKAH,IAAAA,UAAU,GAAG;AACXvG,MAAAA,MAAM,EAAEwG,SADG;AAEXrB,MAAAA,MAAM,EAAE,CAFG;AAGX3E,MAAAA,IAAI,EAAEgG,SAAS,CAAC5H;AAHL,KAAb;AAKAmG,IAAAA,MAAM,CAACI,MAAP,GAAgBe,SAAhB;AACD;;AAGD,QAAMS,cAAc,GAAG/I,IAAI,CAACgG,aAAL,CACrBpG,QADqB,4BAErByI,MAAM,CAACI,gBAFc,2DAErB,uBAAyBO,yBAFJ,CAAvB;AAKA,MAAIC,OAAO,GAAG,IAAI3F,KAAJ,CAAUkF,UAAV,CAAd;;AACA,MAAIlB,MAAM,CAAC4B,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,OAAO,GAAGhC,YAAY,CAAC9G,kBAAD,EAAqB4I,cAArB,EAAqCJ,UAArC,EAAiDH,UAAjD,EAA8D;AAClFW,MAAAA,QAAQ,EAAEnJ,IAAI,CAACoJ,WAAL,CAAiB9B,MAAM,CAAC4B,SAAxB,CADwE;AAElFG,MAAAA,eAAe,EAAE;AAFiE,KAA9D,CAAtB;AAKD,GAND,MAMO;AACLJ,IAAAA,OAAO,CAACK,IAAR,CAAa,CAAb;AACD;;AAGD,QAAMC,cAAc,GAAGvJ,IAAI,CAACgG,aAAL,CACrBpG,QADqB,4BAErByI,MAAM,CAACI,gBAFc,2DAErB,uBAAyBe,yBAFJ,CAAvB;AAKA,MAAIC,OAAO,GAAG,IAAInG,KAAJ,CAAUkF,UAAV,CAAd;;AACA,MAAIlB,MAAM,CAACoC,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,OAAO,GAAGxC,YAAY,CAAC9G,kBAAD,EAAqBoJ,cAArB,EAAqCZ,UAArC,EAAiDH,UAAjD,EAA8D;AAClFW,MAAAA,QAAQ,EAAEnJ,IAAI,CAACoJ,WAAL,CAAiB9B,MAAM,CAACoC,SAAxB,CADwE;AAElFL,MAAAA,eAAe,EAAE;AAFiE,KAA9D,CAAtB;AAKD,GAND,MAMO;AACLI,IAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AACD,MAAIK,iBAAiB,GAAG,CAAxB;;AACA,OAAK,MAAMC,IAAX,IAAmBH,OAAnB,EAA4B;AAC1B,QAAIG,IAAI,KAAKtC,MAAM,CAACoC,SAApB,EAA+B;AAC7BC,MAAAA,iBAAiB;AAClB;AACF;;AAGD,QAAME,aAAa,GAAG7J,IAAI,CAACgG,aAAL,CACpBpG,QADoB,4BAEpByI,MAAM,CAACI,gBAFa,2DAEpB,uBAAyBvB,QAFL,CAAtB;AAIA,QAAMS,MAAM,GAAGV,YAAY,CAACK,MAAM,CAACrB,aAAR,EAAwB4D,aAAxB,EAAuClB,UAAvC,EAAmDgB,iBAAnD,EAAsE;AAC/FG,IAAAA,UAAU,EAAExC,MAAM,CAACwC,UAD4E;AAE/FX,IAAAA,QAAQ,EAAE7B,MAAM,CAACwC;AAF8E,GAAtE,CAA3B;AASA,SAAO;AACLpC,IAAAA,OAAO,EAAE+B,OADJ;AAELhC,IAAAA,OAAO,EAAEwB,OAFJ;AAGLtB,IAAAA,MAHK;AAILP,IAAAA,KAAK,EAAEoB;AAJF,GAAP;AAMD;;AAED,SAASP,gBAAT,CACEd,MADF,EAEEkB,MAFF,EAGEf,MAHF,EAIEpB,WAJF,EAKe;AAAA;;AACb,QAAMoC,SAAS,GAAGnB,MAAM,CAACI,MAAP,GAAgBc,MAAM,CAACE,oBAAzC;AAEA,QAAMC,UAAU,6BAAGH,MAAM,CAAC0B,mBAAV,2DAAG,uBAA4BrB,UAA/C;AAEA,QAAMiB,iBAAiB,GAAGnB,UAAU,8BAAGH,MAAM,CAAC0B,mBAAV,2DAAG,uBAA4BC,SAA/B,CAApC;AACA,QAAMH,aAAa,GAAG7J,IAAI,CAACgG,aAAL,CACpBpG,QADoB,4BAEpByI,MAAM,CAAC0B,mBAFa,2DAEpB,uBAA4B7C,QAFR,CAAtB;AAOA,MAAI+B,OAAO,GAAG,IAAI3F,KAAJ,CAAUkF,UAAV,CAAd;;AACA,MAAIlB,MAAM,CAAC4B,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,OAAO,GAAGhC,YAAY,CAAC9G,kBAAD,EAAqBC,sBAArB,EAA6C+G,MAA7C,EAAqDqB,UAArD,EAAkE;AACtFW,MAAAA,QAAQ,EAAEnJ,IAAI,CAACoJ,WAAL,CAAiB9B,MAAM,CAAC4B,SAAxB,CAD4E;AAEtFG,MAAAA,eAAe,EAAE;AAFqE,KAAlE,CAAtB;AAID,GALD,MAKO;AACLJ,IAAAA,OAAO,CAACK,IAAR,CAAa,CAAb;AACD;;AAID,MAAIG,OAAO,GAAG,IAAInG,KAAJ,CAAUkF,UAAV,CAAd;;AACA,MAAIlB,MAAM,CAACoC,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,OAAO,GAAGxC,YAAY,CAAC9G,kBAAD,EAAqBC,sBAArB,EAA6C+G,MAA7C,EAAqDqB,UAArD,EAAkE;AACtFW,MAAAA,QAAQ,EAAEnJ,IAAI,CAACoJ,WAAL,CAAiB9B,MAAM,CAACoC,SAAxB,CAD4E;AAEtFL,MAAAA,eAAe,EAAE;AAFqE,KAAlE,CAAtB;AAID,GALD,MAKO;AACLI,IAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AAGD,MAAIW,eAAe,GAAG9C,MAAtB;;AAEA,gCAAIkB,MAAM,CAAC0B,mBAAX,mDAAI,uBAA4BG,aAAhC,EAA+C;AAC7C,UAAMtB,SAAS,GAAGrJ,WAAW,CAACsJ,OAAZ,CAChB3C,WADgB,EAEhBiB,MAAM,CAAC/E,MAAP,CAAcwC,KAAd,CAAoBuC,MAAM,CAACI,MAA3B,EAAmCe,SAAnC,CAFgB,EAGhBD,MAAM,CAACS,sBAHS,CAAlB;AAMAmB,IAAAA,eAAe,GAAG;AAChB7H,MAAAA,MAAM,EAAEwG,SADQ;AAEhBrB,MAAAA,MAAM,EAAE,CAFQ;AAGhB3E,MAAAA,IAAI,EAAEgG,SAAS,CAAC5H;AAHA,KAAlB;AAMAmG,IAAAA,MAAM,CAACI,MAAP,GAAgBe,SAAhB;AACD;;AAED,QAAMX,MAAM,GAAGV,YAAY,CACzBK,MAAM,CAACrB,aADkB,EAEzB4D,aAFyB,EAGzBI,eAHyB,EAIzBN,iBAJyB,EAKzB;AACEG,IAAAA,UAAU,EAAExC,MAAM,CAACwC,UADrB;AAEEX,IAAAA,QAAQ,EAAE7B,MAAM,CAACwC;AAFnB,GALyB,CAA3B;AAWA,SAAO;AACLpC,IAAAA,OAAO,EAAE+B,OADJ;AAELhC,IAAAA,OAAO,EAAEwB,OAFJ;AAGLtB,IAAAA,MAHK;AAILP,IAAAA,KAAK,EAAEoB;AAJF,GAAP;AAMD;;AAED,SAASvF,YAAT,CACEkH,cADF,EAEE5C,MAFF,EAGE6C,GAHF,EAQE;AACA,QAAMzJ,MAAwB,GAAG,EAAjC;AACA,MAAII,IAAI,GAAGwG,MAAX;;AACA,OAAK,IAAI8C,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGD,GAApB,EAAyBC,CAAC,EAA1B,EAA8B;AAC5B,UAAMC,aAAa,GAAGH,cAAc,CAACpJ,IAAD,CAApC;AAEA,UAAMwJ,cAAc,GAClBxJ,IAAI,GAAG,CAAP,GAAWf,IAAI,CAACgG,aAAL,CAAmBnG,mBAAnB,EAAwCyK,aAAa,CAACE,eAAtD,CAAX,GAAqF,MADvF;AAGA,QAAIC,QAAQ,GAAG,KAAf;AACA,QAAIC,QAAQ,GAAG,KAAf;;AACA,YAAQH,cAAR;AACE,WAAK,UAAL;AACE;;AACF,WAAK,UAAL;AACEE,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF,WAAK,UAAL;AACEC,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF;AACE,cAAM,IAAI3H,KAAJ,CAAU,kCAAV,CAAN;AAVJ;;AAaA,QAAIuH,aAAa,CAACpH,YAAd,GAA8B,CAAlC,EAAqC;AACnC,YAAMyH,GAAG,GAAG1H,YAAY,CAACkH,cAAD,EAAiBpJ,IAAI,GAAG,CAAxB,EAA2BuJ,aAAa,CAACpH,YAAzC,CAAxB;AACAnC,MAAAA,IAAI,GAAG4J,GAAG,CAAC5J,IAAX;AACAJ,MAAAA,MAAM,CAAC2J,aAAa,CAACM,IAAf,CAAN,GAA6B;AAE3BH,QAAAA,QAF2B;AAG3BC,QAAAA,QAH2B;AAI3BG,QAAAA,MAAM,EAAEF,GAAG,CAAChK;AAJe,OAA7B;AAMD,KATD,MASO;AACL,UAAImK,WAAW,GAAG9K,IAAI,CAACgG,aAAL,CAAmBjG,IAAnB,EAAyBuK,aAAa,CAACvE,IAAvC,CAAlB;;AAEA,UAAIuE,aAAa,CAACS,cAAlB,EAAkC;AAChCD,QAAAA,WAAW,GAAG9K,IAAI,CAACgG,aAAL,CAAmBrG,aAAnB,EAAkC2K,aAAa,CAACS,cAAhD,CAAd;AACD;;AAEDpK,MAAAA,MAAM,CAAC2J,aAAa,CAACM,IAAf,CAAN,GAA6B;AAC3B7E,QAAAA,IAAI,EAAE+E,WADqB;AAE3BhB,QAAAA,UAAU,EAAEQ,aAAa,CAACU,WAFC;AAG3BP,QAAAA,QAH2B;AAI3BC,QAAAA;AAJ2B,OAA7B;AAMA3J,MAAAA,IAAI;AACL;AACF;;AACD,SAAO;AAACJ,IAAAA,MAAD;AAAS4G,IAAAA,MAAT;AAAiBxG,IAAAA;AAAjB,GAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {CursorBuffer, ParquetCodecOptions, PARQUET_CODECS} from './codecs';\nimport * as Compression from './compression';\nimport {\n ParquetBuffer,\n ParquetCodec,\n ParquetCompression,\n ParquetData,\n ParquetField,\n ParquetRecord,\n ParquetType,\n PrimitiveType,\n SchemaDefinition\n} from './schema/declare';\nimport {ParquetSchema} from './schema/schema';\nimport * as Shred from './schema/shred';\n// tslint:disable-next-line:max-line-length\nimport {\n ColumnChunk,\n CompressionCodec,\n ConvertedType,\n Encoding,\n FieldRepetitionType,\n FileMetaData,\n PageHeader,\n PageType,\n RowGroup,\n SchemaElement,\n Type\n} from './parquet-thrift';\nimport * as Util from './util';\n// import Fs = require('fs');\n\n/**\n * Parquet File Magic String\n */\nconst PARQUET_MAGIC = 'PAR1';\n\n/**\n * Parquet File Format Version\n */\nconst PARQUET_VERSION = 1;\n\n/**\n * Internal type used for repetition/definition levels\n */\nconst PARQUET_RDLVL_TYPE = 'INT32';\nconst PARQUET_RDLVL_ENCODING = 'RLE';\n\n/**\n * A parquet cursor is used to retrieve rows from a parquet file in order\n */\nexport class ParquetCursor<T> implements AsyncIterable<T> {\n public metadata: FileMetaData;\n public envelopeReader: ParquetEnvelopeReader;\n public schema: ParquetSchema;\n public columnList: string[][];\n public rowGroup: ParquetRecord[];\n public rowGroupIndex: number;\n\n /**\n * Create a new parquet reader from the file metadata and an envelope reader.\n * It is usually not recommended to call this constructor directly except for\n * advanced and internal use cases. Consider using getCursor() on the\n * ParquetReader instead\n */\n constructor(\n metadata: FileMetaData,\n envelopeReader: ParquetEnvelopeReader,\n schema: ParquetSchema,\n columnList: string[][]\n ) {\n this.metadata = metadata;\n this.envelopeReader = envelopeReader;\n this.schema = schema;\n this.columnList = columnList;\n this.rowGroup = [];\n this.rowGroupIndex = 0;\n }\n\n /**\n * Retrieve the next row from the cursor. Returns a row or NULL if the end\n * of the file was reached\n */\n async next<T = any>(): Promise<T> {\n if (this.rowGroup.length === 0) {\n if (this.rowGroupIndex >= this.metadata.row_groups.length) {\n // @ts-ignore\n return null;\n }\n const rowBuffer = await this.envelopeReader.readRowGroup(\n this.schema,\n this.metadata.row_groups[this.rowGroupIndex],\n this.columnList\n );\n this.rowGroup = Shred.materializeRecords(this.schema, rowBuffer);\n this.rowGroupIndex++;\n }\n return this.rowGroup.shift() as any;\n }\n\n /**\n * Rewind the cursor the the beginning of the file\n */\n rewind(): void {\n this.rowGroup = [];\n this.rowGroupIndex = 0;\n }\n\n /**\n * Implement AsyncIterable\n */\n // tslint:disable-next-line:function-name\n [Symbol.asyncIterator](): AsyncIterator<T> {\n let done = false;\n return {\n next: async () => {\n if (done) {\n return {done, value: null};\n }\n const value = await this.next();\n if (value === null) {\n return {done: true, value};\n }\n return {done: false, value};\n },\n return: async () => {\n done = true;\n return {done, value: null};\n },\n throw: async () => {\n done = true;\n return {done: true, value: null};\n }\n };\n }\n}\n\n/**\n * A parquet reader allows retrieving the rows from a parquet file in order.\n * The basic usage is to create a reader and then retrieve a cursor/iterator\n * which allows you to consume row after row until all rows have been read. It is\n * important that you call close() after you are finished reading the file to\n * avoid leaking file descriptors.\n */\nexport class ParquetReader<T> implements AsyncIterable<T> {\n /**\n * Open the parquet file pointed to by the specified path and return a new\n * parquet reader\n */\n static async openFile<T>(filePath: string): Promise<ParquetReader<T>> {\n const envelopeReader = await ParquetEnvelopeReader.openFile(filePath);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader<T>(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n static async openBuffer<T>(buffer: Buffer): Promise<ParquetReader<T>> {\n const envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader<T>(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n /**\n * return a new parquet reader initialized with a read function\n */\n static async openArrayBuffer<T>(arrayBuffer: ArrayBuffer): Promise<ParquetReader<T>> {\n const readFn = async (start: number, length: number) => Buffer.from(arrayBuffer, start, length);\n const closeFn = async () => {};\n const size = arrayBuffer.byteLength;\n const envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n public metadata: FileMetaData;\n public envelopeReader: ParquetEnvelopeReader;\n public schema: ParquetSchema;\n\n /**\n * Create a new parquet reader from the file metadata and an envelope reader.\n * It is not recommended to call this constructor directly except for advanced\n * and internal use cases. Consider using one of the open{File,Buffer} methods\n * instead\n */\n constructor(metadata: FileMetaData, envelopeReader: ParquetEnvelopeReader) {\n if (metadata.version !== PARQUET_VERSION) {\n throw new Error('invalid parquet version');\n }\n\n this.metadata = metadata;\n this.envelopeReader = envelopeReader;\n const root = this.metadata.schema[0];\n const {schema} = decodeSchema(this.metadata.schema, 1, root.num_children!);\n this.schema = new ParquetSchema(schema);\n }\n\n /**\n * Close this parquet reader. You MUST call this method once you're finished\n * reading rows\n */\n async close(): Promise<void> {\n await this.envelopeReader.close();\n // this.envelopeReader = null;\n // this.metadata = null;\n }\n\n /**\n * Return a cursor to the file. You may open more than one cursor and use\n * them concurrently. All cursors become invalid once close() is called on\n * the reader object.\n *\n * The required_columns parameter controls which columns are actually read\n * from disk. An empty array or no value implies all columns. A list of column\n * names means that only those columns should be loaded from disk.\n */\n getCursor(): ParquetCursor<T>;\n // @ts-ignore\n getCursor<K extends keyof T>(columnList: (K | K[])[]): ParquetCursor<Pick<T, K>>;\n getCursor(columnList: (string | string[])[]): ParquetCursor<Partial<T>>;\n getCursor(columnList?: (string | string[])[]): ParquetCursor<Partial<T>> {\n if (!columnList) {\n // tslint:disable-next-line:no-parameter-reassignment\n columnList = [];\n }\n\n // tslint:disable-next-line:no-parameter-reassignment\n columnList = columnList.map((x) => (Array.isArray(x) ? x : [x]));\n\n return new ParquetCursor<T>(\n this.metadata,\n this.envelopeReader,\n this.schema,\n columnList as string[][]\n );\n }\n\n /**\n * Return the number of rows in this file. Note that the number of rows is\n * not neccessarily equal to the number of rows in each column.\n */\n getRowCount(): number {\n return Number(this.metadata.num_rows);\n }\n\n /**\n * Returns the ParquetSchema for this file\n */\n getSchema(): ParquetSchema {\n return this.schema;\n }\n\n /**\n * Returns the user (key/value) metadata for this file\n */\n getMetadata(): Record<string, string> {\n const md: Record<string, string> = {};\n for (const kv of this.metadata.key_value_metadata!) {\n md[kv.key] = kv.value!;\n }\n return md;\n }\n\n /**\n * Implement AsyncIterable\n */\n // tslint:disable-next-line:function-name\n [Symbol.asyncIterator](): AsyncIterator<T> {\n return this.getCursor()[Symbol.asyncIterator]();\n }\n}\n\n/**\n * The parquet envelope reader allows direct, unbuffered access to the individual\n * sections of the parquet file, namely the header, footer and the row groups.\n * This class is intended for advanced/internal users; if you just want to retrieve\n * rows from a parquet file use the ParquetReader instead\n */\nexport class ParquetEnvelopeReader {\n public read: (position: number, length: number) => Promise<Buffer>;\n /**\n * Close this parquet reader. You MUST call this method once you're finished\n * reading rows\n */\n public close: () => Promise<void>;\n public fileSize: number;\n\n static async openFile(filePath: string): Promise<ParquetEnvelopeReader> {\n const fileStat = await Util.fstat(filePath);\n const fileDescriptor = await Util.fopen(filePath);\n\n const readFn = Util.fread.bind(undefined, fileDescriptor);\n const closeFn = Util.fclose.bind(undefined, fileDescriptor);\n\n return new ParquetEnvelopeReader(readFn, closeFn, fileStat.size);\n }\n\n static async openBuffer(buffer: Buffer): Promise<ParquetEnvelopeReader> {\n const readFn = (position: number, length: number) =>\n Promise.resolve(buffer.slice(position, position + length));\n const closeFn = () => Promise.resolve();\n return new ParquetEnvelopeReader(readFn, closeFn, buffer.length);\n }\n\n constructor(\n read: (position: number, length: number) => Promise<Buffer>,\n close: () => Promise<void>,\n fileSize: number\n ) {\n this.read = read;\n this.close = close;\n this.fileSize = fileSize;\n }\n\n async readHeader(): Promise<void> {\n const buf = await this.read(0, PARQUET_MAGIC.length);\n\n if (buf.toString() !== PARQUET_MAGIC) {\n throw new Error('not valid parquet file');\n }\n }\n\n async readRowGroup(\n schema: ParquetSchema,\n rowGroup: RowGroup,\n columnList: string[][]\n ): Promise<ParquetBuffer> {\n const buffer: ParquetBuffer = {\n rowCount: Number(rowGroup.num_rows),\n columnData: {}\n };\n for (const colChunk of rowGroup.columns) {\n const colMetadata = colChunk.meta_data;\n const colKey = colMetadata?.path_in_schema;\n if (columnList.length > 0 && Util.fieldIndexOf(columnList, colKey!) < 0) {\n continue; // eslint-disable-line no-continue\n }\n buffer.columnData[colKey!.join()] = await this.readColumnChunk(schema, colChunk);\n }\n return buffer;\n }\n\n async readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<ParquetData> {\n if (colChunk.file_path !== undefined && colChunk.file_path !== null) {\n throw new Error('external references are not supported');\n }\n\n const field = schema.findField(colChunk.meta_data?.path_in_schema!);\n const type: PrimitiveType = Util.getThriftEnum(Type, colChunk.meta_data?.type!) as any;\n if (type !== field.primitiveType) throw new Error(`chunk type not matching schema: ${type}`);\n\n const compression: ParquetCompression = Util.getThriftEnum(\n CompressionCodec,\n colChunk.meta_data?.codec!\n ) as any;\n\n const pagesOffset = Number(colChunk.meta_data?.data_page_offset!);\n const pagesSize = Number(colChunk.meta_data?.total_compressed_size!);\n const pagesBuf = await this.read(pagesOffset, pagesSize);\n\n return decodeDataPages(pagesBuf, field, compression);\n }\n\n async readFooter(): Promise<FileMetaData> {\n const trailerLen = PARQUET_MAGIC.length + 4;\n const trailerBuf = await this.read(this.fileSize - trailerLen, trailerLen);\n\n if (trailerBuf.slice(4).toString() !== PARQUET_MAGIC) {\n throw new Error('not a valid parquet file');\n }\n\n const metadataSize = trailerBuf.readUInt32LE(0);\n const metadataOffset = this.fileSize - metadataSize - trailerLen;\n if (metadataOffset < PARQUET_MAGIC.length) {\n throw new Error('invalid metadata size');\n }\n\n const metadataBuf = await this.read(metadataOffset, metadataSize);\n // let metadata = new parquet_thrift.FileMetaData();\n // parquet_util.decodeThrift(metadata, metadataBuf);\n const {metadata} = Util.decodeFileMetadata(metadataBuf);\n return metadata;\n }\n}\n\n/**\n * Decode a consecutive array of data using one of the parquet encodings\n */\nfunction decodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): any[] {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);\n}\n\nfunction decodeDataPages(\n buffer: Buffer,\n column: ParquetField,\n compression: ParquetCompression\n): ParquetData {\n const cursor: CursorBuffer = {\n buffer,\n offset: 0,\n size: buffer.length\n };\n\n const data: ParquetData = {\n rlevels: [],\n dlevels: [],\n values: [],\n count: 0\n };\n\n // @ts-ignore size can be undefined\n while (cursor.offset < cursor.size) {\n // const pageHeader = new parquet_thrift.PageHeader();\n // cursor.offset += parquet_util.decodeThrift(pageHeader, cursor.buffer);\n\n const {pageHeader, length} = Util.decodePageHeader(cursor.buffer);\n cursor.offset += length;\n\n const pageType = Util.getThriftEnum(PageType, pageHeader.type);\n\n let pageData: ParquetData | null = null;\n switch (pageType) {\n case 'DATA_PAGE':\n pageData = decodeDataPage(cursor, pageHeader, column, compression);\n break;\n case 'DATA_PAGE_V2':\n pageData = decodeDataPageV2(cursor, pageHeader, column, compression);\n break;\n default:\n throw new Error(`invalid page type: ${pageType}`);\n }\n\n Array.prototype.push.apply(data.rlevels, pageData.rlevels);\n Array.prototype.push.apply(data.dlevels, pageData.dlevels);\n Array.prototype.push.apply(data.values, pageData.values);\n data.count += pageData.count;\n }\n\n return data;\n}\n\nfunction decodeDataPage(\n cursor: CursorBuffer,\n header: PageHeader,\n column: ParquetField,\n compression: ParquetCompression\n): ParquetData {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n const valueCount = header.data_page_header?.num_values;\n\n // const info = {\n // path: opts.column.path.join('.'),\n // valueEncoding,\n // dLevelEncoding,\n // rLevelEncoding,\n // cursorOffset: cursor.offset,\n // cursorEnd,\n // cusrorSize: cursor.size,\n // header,\n // opts,\n // buffer: cursor.buffer.toJSON(),\n // values: null as any[],\n // valBuf: null as any\n // };\n // Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2));\n\n /* uncompress page */\n let dataCursor = cursor;\n if (compression !== 'UNCOMPRESSED') {\n const valuesBuf = Compression.inflate(\n compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n dataCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n cursor.offset = cursorEnd;\n }\n\n /* read repetition levels */\n const rLevelEncoding = Util.getThriftEnum(\n Encoding,\n header.data_page_header?.repetition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n if (column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount!, {\n bitWidth: Util.getBitWidth(column.rLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n const dLevelEncoding = Util.getThriftEnum(\n Encoding,\n header.data_page_header?.definition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount!, {\n bitWidth: Util.getBitWidth(column.dLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n dLevels.fill(0);\n }\n let valueCountNonNull = 0;\n for (const dlvl of dLevels) {\n if (dlvl === column.dLevelMax) {\n valueCountNonNull++;\n }\n }\n\n /* read values */\n const valueEncoding = Util.getThriftEnum(\n Encoding,\n header.data_page_header?.encoding!\n ) as ParquetCodec;\n const values = decodeValues(column.primitiveType!, valueEncoding, dataCursor, valueCountNonNull, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n // info.valBuf = uncursor.buffer.toJSON();\n // info.values = values;\n // Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2));\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!\n };\n}\n\nfunction decodeDataPageV2(\n cursor: CursorBuffer,\n header: PageHeader,\n column: ParquetField,\n compression: ParquetCompression\n): ParquetData {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n\n const valueCount = header.data_page_header_v2?.num_values;\n // @ts-ignore\n const valueCountNonNull = valueCount - header.data_page_header_v2?.num_nulls;\n const valueEncoding = Util.getThriftEnum(\n Encoding,\n header.data_page_header_v2?.encoding!\n ) as ParquetCodec;\n\n /* read repetition levels */\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n if (column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: Util.getBitWidth(column.rLevelMax),\n disableEnvelope: true\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: Util.getBitWidth(column.dLevelMax),\n disableEnvelope: true\n });\n } else {\n dLevels.fill(0);\n }\n\n /* read values */\n let valuesBufCursor = cursor;\n\n if (header.data_page_header_v2?.is_compressed) {\n const valuesBuf = Compression.inflate(\n compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n\n valuesBufCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const values = decodeValues(\n column.primitiveType!,\n valueEncoding,\n valuesBufCursor,\n valueCountNonNull,\n {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n }\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!\n };\n}\n\nfunction decodeSchema(\n schemaElements: SchemaElement[],\n offset: number,\n len: number\n): {\n offset: number;\n next: number;\n schema: SchemaDefinition;\n} {\n const schema: SchemaDefinition = {};\n let next = offset;\n for (let i = 0; i < len; i++) {\n const schemaElement = schemaElements[next];\n\n const repetitionType =\n next > 0 ? Util.getThriftEnum(FieldRepetitionType, schemaElement.repetition_type!) : 'ROOT';\n\n let optional = false;\n let repeated = false;\n switch (repetitionType) {\n case 'REQUIRED':\n break;\n case 'OPTIONAL':\n optional = true;\n break;\n case 'REPEATED':\n repeated = true;\n break;\n default:\n throw new Error('parquet: unknown repetition type');\n }\n\n if (schemaElement.num_children! > 0) {\n const res = decodeSchema(schemaElements, next + 1, schemaElement.num_children!);\n next = res.next;\n schema[schemaElement.name] = {\n // type: undefined,\n optional,\n repeated,\n fields: res.schema\n };\n } else {\n let logicalType = Util.getThriftEnum(Type, schemaElement.type!);\n\n if (schemaElement.converted_type) {\n logicalType = Util.getThriftEnum(ConvertedType, schemaElement.converted_type);\n }\n\n schema[schemaElement.name] = {\n type: logicalType as ParquetType,\n typeLength: schemaElement.type_length,\n optional,\n repeated\n };\n next++;\n }\n }\n return {schema, offset, next};\n}\n"],"file":"reader.js"}
|
|
1
|
+
{"version":3,"sources":["../../../src/parquetjs/reader.ts"],"names":["PARQUET_CODECS","Compression","ParquetSchema","Shred","CompressionCodec","ConvertedType","Encoding","FieldRepetitionType","PageType","Type","Util","PARQUET_MAGIC","PARQUET_VERSION","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING","Symbol","asyncIterator","ParquetCursor","constructor","metadata","envelopeReader","schema","columnList","rowGroup","rowGroupIndex","next","length","row_groups","rowBuffer","readRowGroup","materializeRecords","shift","rewind","done","value","return","throw","ParquetReader","openFile","filePath","ParquetEnvelopeReader","readHeader","readFooter","err","close","openBuffer","buffer","openArrayBuffer","arrayBuffer","readFn","start","Buffer","from","closeFn","size","byteLength","version","Error","root","decodeSchema","num_children","getCursor","map","x","Array","isArray","getRowCount","Number","num_rows","getSchema","getMetadata","md","kv","key_value_metadata","key","fileStat","fstat","fileDescriptor","fopen","fread","bind","undefined","fclose","position","Promise","resolve","slice","read","fileSize","buf","toString","rowCount","columnData","colChunk","columns","colMetadata","meta_data","colKey","path_in_schema","fieldIndexOf","join","readColumnChunk","file_path","field","findField","type","getThriftEnum","primitiveType","compression","codec","pagesOffset","data_page_offset","pagesSize","total_compressed_size","pagesBuf","decodeDataPages","trailerLen","trailerBuf","metadataSize","readUInt32LE","metadataOffset","metadataBuf","decodeFileMetadata","decodeValues","encoding","cursor","count","opts","column","offset","data","rlevels","dlevels","values","pageHeader","decodePageHeader","pageType","pageData","decodeDataPage","decodeDataPageV2","prototype","push","apply","header","cursorEnd","compressed_page_size","valueCount","data_page_header","num_values","dataCursor","valuesBuf","inflate","uncompressed_page_size","rLevelEncoding","repetition_level_encoding","rLevels","rLevelMax","bitWidth","getBitWidth","disableEnvelope","fill","dLevelEncoding","definition_level_encoding","dLevels","dLevelMax","valueCountNonNull","dlvl","valueEncoding","typeLength","data_page_header_v2","num_nulls","valuesBufCursor","is_compressed","schemaElements","len","i","schemaElement","repetitionType","repetition_type","optional","repeated","res","name","fields","logicalType","converted_type","type_length"],"mappings":";;;;AACA,SAA2CA,cAA3C,QAAgE,UAAhE;AACA,OAAO,KAAKC,WAAZ,MAA6B,eAA7B;AAYA,SAAQC,aAAR,QAA4B,iBAA5B;AACA,OAAO,KAAKC,KAAZ,MAAuB,gBAAvB;AAEA,SAEEC,gBAFF,EAGEC,aAHF,EAIEC,QAJF,EAKEC,mBALF,EAQEC,QARF,EAWEC,IAXF,QAYO,kBAZP;AAaA,OAAO,KAAKC,IAAZ,MAAsB,QAAtB;AAMA,MAAMC,aAAa,GAAG,MAAtB;AAKA,MAAMC,eAAe,GAAG,CAAxB;AAKA,MAAMC,kBAAkB,GAAG,OAA3B;AACA,MAAMC,sBAAsB,GAAG,KAA/B;wBAkEGC,MAAM,CAACC,a;AA7DV,OAAO,MAAMC,aAAN,CAAmD;AAcxDC,EAAAA,WAAW,CACTC,QADS,EAETC,cAFS,EAGTC,MAHS,EAITC,UAJS,EAKT;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AAAA;;AACA,SAAKH,QAAL,GAAgBA,QAAhB;AACA,SAAKC,cAAL,GAAsBA,cAAtB;AACA,SAAKC,MAAL,GAAcA,MAAd;AACA,SAAKC,UAAL,GAAkBA,UAAlB;AACA,SAAKC,QAAL,GAAgB,EAAhB;AACA,SAAKC,aAAL,GAAqB,CAArB;AACD;;AAMS,QAAJC,IAAI,GAAwB;AAChC,QAAI,KAAKF,QAAL,CAAcG,MAAd,KAAyB,CAA7B,EAAgC;AAC9B,UAAI,KAAKF,aAAL,IAAsB,KAAKL,QAAL,CAAcQ,UAAd,CAAyBD,MAAnD,EAA2D;AAEzD,eAAO,IAAP;AACD;;AACD,YAAME,SAAS,GAAG,MAAM,KAAKR,cAAL,CAAoBS,YAApB,CACtB,KAAKR,MADiB,EAEtB,KAAKF,QAAL,CAAcQ,UAAd,CAAyB,KAAKH,aAA9B,CAFsB,EAGtB,KAAKF,UAHiB,CAAxB;AAKA,WAAKC,QAAL,GAAgBpB,KAAK,CAAC2B,kBAAN,CAAyB,KAAKT,MAA9B,EAAsCO,SAAtC,CAAhB;AACA,WAAKJ,aAAL;AACD;;AACD,WAAO,KAAKD,QAAL,CAAcQ,KAAd,EAAP;AACD;;AAKDC,EAAAA,MAAM,GAAS;AACb,SAAKT,QAAL,GAAgB,EAAhB;AACA,SAAKC,aAAL,GAAqB,CAArB;AACD;;AAMD,4BAA2C;AACzC,QAAIS,IAAI,GAAG,KAAX;AACA,WAAO;AACLR,MAAAA,IAAI,EAAE,YAAY;AAChB,YAAIQ,IAAJ,EAAU;AACR,iBAAO;AAACA,YAAAA,IAAD;AAAOC,YAAAA,KAAK,EAAE;AAAd,WAAP;AACD;;AACD,cAAMA,KAAK,GAAG,MAAM,KAAKT,IAAL,EAApB;;AACA,YAAIS,KAAK,KAAK,IAAd,EAAoB;AAClB,iBAAO;AAACD,YAAAA,IAAI,EAAE,IAAP;AAAaC,YAAAA;AAAb,WAAP;AACD;;AACD,eAAO;AAACD,UAAAA,IAAI,EAAE,KAAP;AAAcC,UAAAA;AAAd,SAAP;AACD,OAVI;AAWLC,MAAAA,MAAM,EAAE,YAAY;AAClBF,QAAAA,IAAI,GAAG,IAAP;AACA,eAAO;AAACA,UAAAA,IAAD;AAAOC,UAAAA,KAAK,EAAE;AAAd,SAAP;AACD,OAdI;AAeLE,MAAAA,KAAK,EAAE,YAAY;AACjBH,QAAAA,IAAI,GAAG,IAAP;AACA,eAAO;AAACA,UAAAA,IAAI,EAAE,IAAP;AAAaC,UAAAA,KAAK,EAAE;AAApB,SAAP;AACD;AAlBI,KAAP;AAoBD;;AAnFuD;yBAwOvDnB,MAAM,CAACC,a;AA3IV,OAAO,MAAMqB,aAAN,CAAmD;AAKnC,eAARC,QAAQ,CAAIC,QAAJ,EAAiD;AACpE,UAAMnB,cAAc,GAAG,MAAMoB,qBAAqB,CAACF,QAAtB,CAA+BC,QAA/B,CAA7B;;AACA,QAAI;AACF,YAAMnB,cAAc,CAACqB,UAAf,EAAN;AACA,YAAMtB,QAAQ,GAAG,MAAMC,cAAc,CAACsB,UAAf,EAAvB;AACA,aAAO,IAAIL,aAAJ,CAAqBlB,QAArB,EAA+BC,cAA/B,CAAP;AACD,KAJD,CAIE,OAAOuB,GAAP,EAAY;AACZ,YAAMvB,cAAc,CAACwB,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAEsB,eAAVE,UAAU,CAAIC,MAAJ,EAA+C;AACpE,UAAM1B,cAAc,GAAG,MAAMoB,qBAAqB,CAACK,UAAtB,CAAiCC,MAAjC,CAA7B;;AACA,QAAI;AACF,YAAM1B,cAAc,CAACqB,UAAf,EAAN;AACA,YAAMtB,QAAQ,GAAG,MAAMC,cAAc,CAACsB,UAAf,EAAvB;AACA,aAAO,IAAIL,aAAJ,CAAqBlB,QAArB,EAA+BC,cAA/B,CAAP;AACD,KAJD,CAIE,OAAOuB,GAAP,EAAY;AACZ,YAAMvB,cAAc,CAACwB,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAK2B,eAAfI,eAAe,CAAIC,WAAJ,EAAyD;AACnF,UAAMC,MAAM,GAAG,OAAOC,KAAP,EAAsBxB,MAAtB,KAAyCyB,MAAM,CAACC,IAAP,CAAYJ,WAAZ,EAAyBE,KAAzB,EAAgCxB,MAAhC,CAAxD;;AACA,UAAM2B,OAAO,GAAG,YAAY,CAAE,CAA9B;;AACA,UAAMC,IAAI,GAAGN,WAAW,CAACO,UAAzB;AACA,UAAMnC,cAAc,GAAG,IAAIoB,qBAAJ,CAA0BS,MAA1B,EAAkCI,OAAlC,EAA2CC,IAA3C,CAAvB;;AACA,QAAI;AACF,YAAMlC,cAAc,CAACqB,UAAf,EAAN;AACA,YAAMtB,QAAQ,GAAG,MAAMC,cAAc,CAACsB,UAAf,EAAvB;AACA,aAAO,IAAIL,aAAJ,CAAkBlB,QAAlB,EAA4BC,cAA5B,CAAP;AACD,KAJD,CAIE,OAAOuB,GAAP,EAAY;AACZ,YAAMvB,cAAc,CAACwB,KAAf,EAAN;AACA,YAAMD,GAAN;AACD;AACF;;AAYDzB,EAAAA,WAAW,CAACC,QAAD,EAAyBC,cAAzB,EAAgE;AAAA;;AAAA;;AAAA;;AACzE,QAAID,QAAQ,CAACqC,OAAT,KAAqB5C,eAAzB,EAA0C;AACxC,YAAM,IAAI6C,KAAJ,CAAU,yBAAV,CAAN;AACD;;AAED,SAAKtC,QAAL,GAAgBA,QAAhB;AACA,SAAKC,cAAL,GAAsBA,cAAtB;AACA,UAAMsC,IAAI,GAAG,KAAKvC,QAAL,CAAcE,MAAd,CAAqB,CAArB,CAAb;AACA,UAAM;AAACA,MAAAA;AAAD,QAAWsC,YAAY,CAAC,KAAKxC,QAAL,CAAcE,MAAf,EAAuB,CAAvB,EAA0BqC,IAAI,CAACE,YAA/B,CAA7B;AACA,SAAKvC,MAAL,GAAc,IAAInB,aAAJ,CAAkBmB,MAAlB,CAAd;AACD;;AAMU,QAALuB,KAAK,GAAkB;AAC3B,UAAM,KAAKxB,cAAL,CAAoBwB,KAApB,EAAN;AAGD;;AAeDiB,EAAAA,SAAS,CAACvC,UAAD,EAAgE;AACvE,QAAI,CAACA,UAAL,EAAiB;AAEfA,MAAAA,UAAU,GAAG,EAAb;AACD;;AAGDA,IAAAA,UAAU,GAAGA,UAAU,CAACwC,GAAX,CAAgBC,CAAD,IAAQC,KAAK,CAACC,OAAN,CAAcF,CAAd,IAAmBA,CAAnB,GAAuB,CAACA,CAAD,CAA9C,CAAb;AAEA,WAAO,IAAI9C,aAAJ,CACL,KAAKE,QADA,EAEL,KAAKC,cAFA,EAGL,KAAKC,MAHA,EAILC,UAJK,CAAP;AAMD;;AAMD4C,EAAAA,WAAW,GAAW;AACpB,WAAOC,MAAM,CAAC,KAAKhD,QAAL,CAAciD,QAAf,CAAb;AACD;;AAKDC,EAAAA,SAAS,GAAkB;AACzB,WAAO,KAAKhD,MAAZ;AACD;;AAKDiD,EAAAA,WAAW,GAA2B;AACpC,UAAMC,EAA0B,GAAG,EAAnC;;AACA,SAAK,MAAMC,EAAX,IAAiB,KAAKrD,QAAL,CAAcsD,kBAA/B,EAAoD;AAClDF,MAAAA,EAAE,CAACC,EAAE,CAACE,GAAJ,CAAF,GAAaF,EAAE,CAACtC,KAAhB;AACD;;AACD,WAAOqC,EAAP;AACD;;AAMD,6BAA2C;AACzC,WAAO,KAAKV,SAAL,GAAiB9C,MAAM,CAACC,aAAxB,GAAP;AACD;;AA7IuD;AAsJ1D,OAAO,MAAMwB,qBAAN,CAA4B;AASZ,eAARF,QAAQ,CAACC,QAAD,EAAmD;AACtE,UAAMoC,QAAQ,GAAG,MAAMjE,IAAI,CAACkE,KAAL,CAAWrC,QAAX,CAAvB;AACA,UAAMsC,cAAc,GAAG,MAAMnE,IAAI,CAACoE,KAAL,CAAWvC,QAAX,CAA7B;AAEA,UAAMU,MAAM,GAAGvC,IAAI,CAACqE,KAAL,CAAWC,IAAX,CAAgBC,SAAhB,EAA2BJ,cAA3B,CAAf;AACA,UAAMxB,OAAO,GAAG3C,IAAI,CAACwE,MAAL,CAAYF,IAAZ,CAAiBC,SAAjB,EAA4BJ,cAA5B,CAAhB;AAEA,WAAO,IAAIrC,qBAAJ,CAA0BS,MAA1B,EAAkCI,OAAlC,EAA2CsB,QAAQ,CAACrB,IAApD,CAAP;AACD;;AAEsB,eAAVT,UAAU,CAACC,MAAD,EAAiD;AACtE,UAAMG,MAAM,GAAG,CAACkC,QAAD,EAAmBzD,MAAnB,KACb0D,OAAO,CAACC,OAAR,CAAgBvC,MAAM,CAACwC,KAAP,CAAaH,QAAb,EAAuBA,QAAQ,GAAGzD,MAAlC,CAAhB,CADF;;AAEA,UAAM2B,OAAO,GAAG,MAAM+B,OAAO,CAACC,OAAR,EAAtB;;AACA,WAAO,IAAI7C,qBAAJ,CAA0BS,MAA1B,EAAkCI,OAAlC,EAA2CP,MAAM,CAACpB,MAAlD,CAAP;AACD;;AAEDR,EAAAA,WAAW,CACTqE,IADS,EAET3C,KAFS,EAGT4C,QAHS,EAIT;AAAA;;AAAA;;AAAA;;AACA,SAAKD,IAAL,GAAYA,IAAZ;AACA,SAAK3C,KAAL,GAAaA,KAAb;AACA,SAAK4C,QAAL,GAAgBA,QAAhB;AACD;;AAEe,QAAV/C,UAAU,GAAkB;AAChC,UAAMgD,GAAG,GAAG,MAAM,KAAKF,IAAL,CAAU,CAAV,EAAa5E,aAAa,CAACe,MAA3B,CAAlB;;AAEA,QAAI+D,GAAG,CAACC,QAAJ,OAAmB/E,aAAvB,EAAsC;AACpC,YAAM,IAAI8C,KAAJ,CAAU,wBAAV,CAAN;AACD;AACF;;AAEiB,QAAZ5B,YAAY,CAChBR,MADgB,EAEhBE,QAFgB,EAGhBD,UAHgB,EAIQ;AACxB,UAAMwB,MAAqB,GAAG;AAC5B6C,MAAAA,QAAQ,EAAExB,MAAM,CAAC5C,QAAQ,CAAC6C,QAAV,CADY;AAE5BwB,MAAAA,UAAU,EAAE;AAFgB,KAA9B;;AAIA,SAAK,MAAMC,QAAX,IAAuBtE,QAAQ,CAACuE,OAAhC,EAAyC;AACvC,YAAMC,WAAW,GAAGF,QAAQ,CAACG,SAA7B;AACA,YAAMC,MAAM,GAAGF,WAAH,aAAGA,WAAH,uBAAGA,WAAW,CAAEG,cAA5B;;AACA,UAAI5E,UAAU,CAACI,MAAX,GAAoB,CAApB,IAAyBhB,IAAI,CAACyF,YAAL,CAAkB7E,UAAlB,EAA8B2E,MAA9B,IAAyC,CAAtE,EAAyE;AACvE;AACD;;AACDnD,MAAAA,MAAM,CAAC8C,UAAP,CAAkBK,MAAM,CAAEG,IAAR,EAAlB,IAAoC,MAAM,KAAKC,eAAL,CAAqBhF,MAArB,EAA6BwE,QAA7B,CAA1C;AACD;;AACD,WAAO/C,MAAP;AACD;;AAEoB,QAAfuD,eAAe,CAAChF,MAAD,EAAwBwE,QAAxB,EAAqE;AAAA;;AACxF,QAAIA,QAAQ,CAACS,SAAT,KAAuBrB,SAAvB,IAAoCY,QAAQ,CAACS,SAAT,KAAuB,IAA/D,EAAqE;AACnE,YAAM,IAAI7C,KAAJ,CAAU,uCAAV,CAAN;AACD;;AAED,UAAM8C,KAAK,GAAGlF,MAAM,CAACmF,SAAP,wBAAiBX,QAAQ,CAACG,SAA1B,wDAAiB,oBAAoBE,cAArC,CAAd;AACA,UAAMO,IAAmB,GAAG/F,IAAI,CAACgG,aAAL,CAAmBjG,IAAnB,0BAAyBoF,QAAQ,CAACG,SAAlC,yDAAyB,qBAAoBS,IAA7C,CAA5B;AACA,QAAIA,IAAI,KAAKF,KAAK,CAACI,aAAnB,EAAkC,MAAM,IAAIlD,KAAJ,2CAA6CgD,IAA7C,EAAN;AAElC,UAAMG,WAA+B,GAAGlG,IAAI,CAACgG,aAAL,CACtCtG,gBADsC,0BAEtCyF,QAAQ,CAACG,SAF6B,yDAEtC,qBAAoBa,KAFkB,CAAxC;AAKA,UAAMC,WAAW,GAAG3C,MAAM,yBAAC0B,QAAQ,CAACG,SAAV,yDAAC,qBAAoBe,gBAArB,CAA1B;AACA,UAAMC,SAAS,GAAG7C,MAAM,yBAAC0B,QAAQ,CAACG,SAAV,yDAAC,qBAAoBiB,qBAArB,CAAxB;AACA,UAAMC,QAAQ,GAAG,MAAM,KAAK3B,IAAL,CAAUuB,WAAV,EAAuBE,SAAvB,CAAvB;AAEA,WAAOG,eAAe,CAACD,QAAD,EAAWX,KAAX,EAAkBK,WAAlB,CAAtB;AACD;;AAEe,QAAVlE,UAAU,GAA0B;AACxC,UAAM0E,UAAU,GAAGzG,aAAa,CAACe,MAAd,GAAuB,CAA1C;AACA,UAAM2F,UAAU,GAAG,MAAM,KAAK9B,IAAL,CAAU,KAAKC,QAAL,GAAgB4B,UAA1B,EAAsCA,UAAtC,CAAzB;;AAEA,QAAIC,UAAU,CAAC/B,KAAX,CAAiB,CAAjB,EAAoBI,QAApB,OAAmC/E,aAAvC,EAAsD;AACpD,YAAM,IAAI8C,KAAJ,CAAU,0BAAV,CAAN;AACD;;AAED,UAAM6D,YAAY,GAAGD,UAAU,CAACE,YAAX,CAAwB,CAAxB,CAArB;AACA,UAAMC,cAAc,GAAG,KAAKhC,QAAL,GAAgB8B,YAAhB,GAA+BF,UAAtD;;AACA,QAAII,cAAc,GAAG7G,aAAa,CAACe,MAAnC,EAA2C;AACzC,YAAM,IAAI+B,KAAJ,CAAU,uBAAV,CAAN;AACD;;AAED,UAAMgE,WAAW,GAAG,MAAM,KAAKlC,IAAL,CAAUiC,cAAV,EAA0BF,YAA1B,CAA1B;AAGA,UAAM;AAACnG,MAAAA;AAAD,QAAaT,IAAI,CAACgH,kBAAL,CAAwBD,WAAxB,CAAnB;AACA,WAAOtG,QAAP;AACD;;AAxGgC;;AA8GnC,SAASwG,YAAT,CACElB,IADF,EAEEmB,QAFF,EAGEC,MAHF,EAIEC,KAJF,EAKEC,IALF,EAMS;AACP,MAAI,EAAEH,QAAQ,IAAI5H,cAAd,CAAJ,EAAmC;AACjC,UAAM,IAAIyD,KAAJ,6BAA+BmE,QAA/B,EAAN;AACD;;AACD,SAAO5H,cAAc,CAAC4H,QAAD,CAAd,CAAyBD,YAAzB,CAAsClB,IAAtC,EAA4CoB,MAA5C,EAAoDC,KAApD,EAA2DC,IAA3D,CAAP;AACD;;AAED,SAASZ,eAAT,CACErE,MADF,EAEEkF,MAFF,EAGEpB,WAHF,EAIe;AACb,QAAMiB,MAAoB,GAAG;AAC3B/E,IAAAA,MAD2B;AAE3BmF,IAAAA,MAAM,EAAE,CAFmB;AAG3B3E,IAAAA,IAAI,EAAER,MAAM,CAACpB;AAHc,GAA7B;AAMA,QAAMwG,IAAiB,GAAG;AACxBC,IAAAA,OAAO,EAAE,EADe;AAExBC,IAAAA,OAAO,EAAE,EAFe;AAGxBC,IAAAA,MAAM,EAAE,EAHgB;AAIxBP,IAAAA,KAAK,EAAE;AAJiB,GAA1B;;AAQA,SAAOD,MAAM,CAACI,MAAP,GAAgBJ,MAAM,CAACvE,IAA9B,EAAoC;AAIlC,UAAM;AAACgF,MAAAA,UAAD;AAAa5G,MAAAA;AAAb,QAAuBhB,IAAI,CAAC6H,gBAAL,CAAsBV,MAAM,CAAC/E,MAA7B,CAA7B;AACA+E,IAAAA,MAAM,CAACI,MAAP,IAAiBvG,MAAjB;AAEA,UAAM8G,QAAQ,GAAG9H,IAAI,CAACgG,aAAL,CAAmBlG,QAAnB,EAA6B8H,UAAU,CAAC7B,IAAxC,CAAjB;AAEA,QAAIgC,QAA4B,GAAG,IAAnC;;AACA,YAAQD,QAAR;AACE,WAAK,WAAL;AACEC,QAAAA,QAAQ,GAAGC,cAAc,CAACb,MAAD,EAASS,UAAT,EAAqBN,MAArB,EAA6BpB,WAA7B,CAAzB;AACA;;AACF,WAAK,cAAL;AACE6B,QAAAA,QAAQ,GAAGE,gBAAgB,CAACd,MAAD,EAASS,UAAT,EAAqBN,MAArB,EAA6BpB,WAA7B,CAA3B;AACA;;AACF;AACE,cAAM,IAAInD,KAAJ,8BAAgC+E,QAAhC,EAAN;AARJ;;AAWAxE,IAAAA,KAAK,CAAC4E,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BZ,IAAI,CAACC,OAAhC,EAAyCM,QAAQ,CAACN,OAAlD;AACAnE,IAAAA,KAAK,CAAC4E,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BZ,IAAI,CAACE,OAAhC,EAAyCK,QAAQ,CAACL,OAAlD;AACApE,IAAAA,KAAK,CAAC4E,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BZ,IAAI,CAACG,MAAhC,EAAwCI,QAAQ,CAACJ,MAAjD;AACAH,IAAAA,IAAI,CAACJ,KAAL,IAAcW,QAAQ,CAACX,KAAvB;AACD;;AAED,SAAOI,IAAP;AACD;;AAED,SAASQ,cAAT,CACEb,MADF,EAEEkB,MAFF,EAGEf,MAHF,EAIEpB,WAJF,EAKe;AAAA;;AACb,QAAMoC,SAAS,GAAGnB,MAAM,CAACI,MAAP,GAAgBc,MAAM,CAACE,oBAAzC;AACA,QAAMC,UAAU,4BAAGH,MAAM,CAACI,gBAAV,0DAAG,sBAAyBC,UAA5C;AAmBA,MAAIC,UAAU,GAAGxB,MAAjB;;AACA,MAAIjB,WAAW,KAAK,cAApB,EAAoC;AAClC,UAAM0C,SAAS,GAAGrJ,WAAW,CAACsJ,OAAZ,CAChB3C,WADgB,EAEhBiB,MAAM,CAAC/E,MAAP,CAAcwC,KAAd,CAAoBuC,MAAM,CAACI,MAA3B,EAAmCe,SAAnC,CAFgB,EAGhBD,MAAM,CAACS,sBAHS,CAAlB;AAKAH,IAAAA,UAAU,GAAG;AACXvG,MAAAA,MAAM,EAAEwG,SADG;AAEXrB,MAAAA,MAAM,EAAE,CAFG;AAGX3E,MAAAA,IAAI,EAAEgG,SAAS,CAAC5H;AAHL,KAAb;AAKAmG,IAAAA,MAAM,CAACI,MAAP,GAAgBe,SAAhB;AACD;;AAGD,QAAMS,cAAc,GAAG/I,IAAI,CAACgG,aAAL,CACrBpG,QADqB,4BAErByI,MAAM,CAACI,gBAFc,2DAErB,uBAAyBO,yBAFJ,CAAvB;AAKA,MAAIC,OAAO,GAAG,IAAI3F,KAAJ,CAAUkF,UAAV,CAAd;;AACA,MAAIlB,MAAM,CAAC4B,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,OAAO,GAAGhC,YAAY,CAAC9G,kBAAD,EAAqB4I,cAArB,EAAqCJ,UAArC,EAAiDH,UAAjD,EAA8D;AAClFW,MAAAA,QAAQ,EAAEnJ,IAAI,CAACoJ,WAAL,CAAiB9B,MAAM,CAAC4B,SAAxB,CADwE;AAElFG,MAAAA,eAAe,EAAE;AAFiE,KAA9D,CAAtB;AAKD,GAND,MAMO;AACLJ,IAAAA,OAAO,CAACK,IAAR,CAAa,CAAb;AACD;;AAGD,QAAMC,cAAc,GAAGvJ,IAAI,CAACgG,aAAL,CACrBpG,QADqB,4BAErByI,MAAM,CAACI,gBAFc,2DAErB,uBAAyBe,yBAFJ,CAAvB;AAKA,MAAIC,OAAO,GAAG,IAAInG,KAAJ,CAAUkF,UAAV,CAAd;;AACA,MAAIlB,MAAM,CAACoC,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,OAAO,GAAGxC,YAAY,CAAC9G,kBAAD,EAAqBoJ,cAArB,EAAqCZ,UAArC,EAAiDH,UAAjD,EAA8D;AAClFW,MAAAA,QAAQ,EAAEnJ,IAAI,CAACoJ,WAAL,CAAiB9B,MAAM,CAACoC,SAAxB,CADwE;AAElFL,MAAAA,eAAe,EAAE;AAFiE,KAA9D,CAAtB;AAKD,GAND,MAMO;AACLI,IAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AACD,MAAIK,iBAAiB,GAAG,CAAxB;;AACA,OAAK,MAAMC,IAAX,IAAmBH,OAAnB,EAA4B;AAC1B,QAAIG,IAAI,KAAKtC,MAAM,CAACoC,SAApB,EAA+B;AAC7BC,MAAAA,iBAAiB;AAClB;AACF;;AAGD,QAAME,aAAa,GAAG7J,IAAI,CAACgG,aAAL,CACpBpG,QADoB,4BAEpByI,MAAM,CAACI,gBAFa,2DAEpB,uBAAyBvB,QAFL,CAAtB;AAIA,QAAMS,MAAM,GAAGV,YAAY,CAACK,MAAM,CAACrB,aAAR,EAAwB4D,aAAxB,EAAuClB,UAAvC,EAAmDgB,iBAAnD,EAAsE;AAC/FG,IAAAA,UAAU,EAAExC,MAAM,CAACwC,UAD4E;AAE/FX,IAAAA,QAAQ,EAAE7B,MAAM,CAACwC;AAF8E,GAAtE,CAA3B;AASA,SAAO;AACLpC,IAAAA,OAAO,EAAE+B,OADJ;AAELhC,IAAAA,OAAO,EAAEwB,OAFJ;AAGLtB,IAAAA,MAHK;AAILP,IAAAA,KAAK,EAAEoB;AAJF,GAAP;AAMD;;AAED,SAASP,gBAAT,CACEd,MADF,EAEEkB,MAFF,EAGEf,MAHF,EAIEpB,WAJF,EAKe;AAAA;;AACb,QAAMoC,SAAS,GAAGnB,MAAM,CAACI,MAAP,GAAgBc,MAAM,CAACE,oBAAzC;AAEA,QAAMC,UAAU,6BAAGH,MAAM,CAAC0B,mBAAV,2DAAG,uBAA4BrB,UAA/C;AAEA,QAAMiB,iBAAiB,GAAGnB,UAAU,8BAAGH,MAAM,CAAC0B,mBAAV,2DAAG,uBAA4BC,SAA/B,CAApC;AACA,QAAMH,aAAa,GAAG7J,IAAI,CAACgG,aAAL,CACpBpG,QADoB,4BAEpByI,MAAM,CAAC0B,mBAFa,2DAEpB,uBAA4B7C,QAFR,CAAtB;AAOA,MAAI+B,OAAO,GAAG,IAAI3F,KAAJ,CAAUkF,UAAV,CAAd;;AACA,MAAIlB,MAAM,CAAC4B,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,OAAO,GAAGhC,YAAY,CAAC9G,kBAAD,EAAqBC,sBAArB,EAA6C+G,MAA7C,EAAqDqB,UAArD,EAAkE;AACtFW,MAAAA,QAAQ,EAAEnJ,IAAI,CAACoJ,WAAL,CAAiB9B,MAAM,CAAC4B,SAAxB,CAD4E;AAEtFG,MAAAA,eAAe,EAAE;AAFqE,KAAlE,CAAtB;AAID,GALD,MAKO;AACLJ,IAAAA,OAAO,CAACK,IAAR,CAAa,CAAb;AACD;;AAID,MAAIG,OAAO,GAAG,IAAInG,KAAJ,CAAUkF,UAAV,CAAd;;AACA,MAAIlB,MAAM,CAACoC,SAAP,GAAmB,CAAvB,EAA0B;AACxBD,IAAAA,OAAO,GAAGxC,YAAY,CAAC9G,kBAAD,EAAqBC,sBAArB,EAA6C+G,MAA7C,EAAqDqB,UAArD,EAAkE;AACtFW,MAAAA,QAAQ,EAAEnJ,IAAI,CAACoJ,WAAL,CAAiB9B,MAAM,CAACoC,SAAxB,CAD4E;AAEtFL,MAAAA,eAAe,EAAE;AAFqE,KAAlE,CAAtB;AAID,GALD,MAKO;AACLI,IAAAA,OAAO,CAACH,IAAR,CAAa,CAAb;AACD;;AAGD,MAAIW,eAAe,GAAG9C,MAAtB;;AAEA,gCAAIkB,MAAM,CAAC0B,mBAAX,mDAAI,uBAA4BG,aAAhC,EAA+C;AAC7C,UAAMtB,SAAS,GAAGrJ,WAAW,CAACsJ,OAAZ,CAChB3C,WADgB,EAEhBiB,MAAM,CAAC/E,MAAP,CAAcwC,KAAd,CAAoBuC,MAAM,CAACI,MAA3B,EAAmCe,SAAnC,CAFgB,EAGhBD,MAAM,CAACS,sBAHS,CAAlB;AAMAmB,IAAAA,eAAe,GAAG;AAChB7H,MAAAA,MAAM,EAAEwG,SADQ;AAEhBrB,MAAAA,MAAM,EAAE,CAFQ;AAGhB3E,MAAAA,IAAI,EAAEgG,SAAS,CAAC5H;AAHA,KAAlB;AAMAmG,IAAAA,MAAM,CAACI,MAAP,GAAgBe,SAAhB;AACD;;AAED,QAAMX,MAAM,GAAGV,YAAY,CACzBK,MAAM,CAACrB,aADkB,EAEzB4D,aAFyB,EAGzBI,eAHyB,EAIzBN,iBAJyB,EAKzB;AACEG,IAAAA,UAAU,EAAExC,MAAM,CAACwC,UADrB;AAEEX,IAAAA,QAAQ,EAAE7B,MAAM,CAACwC;AAFnB,GALyB,CAA3B;AAWA,SAAO;AACLpC,IAAAA,OAAO,EAAE+B,OADJ;AAELhC,IAAAA,OAAO,EAAEwB,OAFJ;AAGLtB,IAAAA,MAHK;AAILP,IAAAA,KAAK,EAAEoB;AAJF,GAAP;AAMD;;AAED,SAASvF,YAAT,CACEkH,cADF,EAEE5C,MAFF,EAGE6C,GAHF,EAQE;AACA,QAAMzJ,MAAwB,GAAG,EAAjC;AACA,MAAII,IAAI,GAAGwG,MAAX;;AACA,OAAK,IAAI8C,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGD,GAApB,EAAyBC,CAAC,EAA1B,EAA8B;AAC5B,UAAMC,aAAa,GAAGH,cAAc,CAACpJ,IAAD,CAApC;AAEA,UAAMwJ,cAAc,GAClBxJ,IAAI,GAAG,CAAP,GAAWf,IAAI,CAACgG,aAAL,CAAmBnG,mBAAnB,EAAwCyK,aAAa,CAACE,eAAtD,CAAX,GAAqF,MADvF;AAGA,QAAIC,QAAQ,GAAG,KAAf;AACA,QAAIC,QAAQ,GAAG,KAAf;;AACA,YAAQH,cAAR;AACE,WAAK,UAAL;AACE;;AACF,WAAK,UAAL;AACEE,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF,WAAK,UAAL;AACEC,QAAAA,QAAQ,GAAG,IAAX;AACA;;AACF;AACE,cAAM,IAAI3H,KAAJ,CAAU,kCAAV,CAAN;AAVJ;;AAaA,QAAIuH,aAAa,CAACpH,YAAd,GAA8B,CAAlC,EAAqC;AACnC,YAAMyH,GAAG,GAAG1H,YAAY,CAACkH,cAAD,EAAiBpJ,IAAI,GAAG,CAAxB,EAA2BuJ,aAAa,CAACpH,YAAzC,CAAxB;AACAnC,MAAAA,IAAI,GAAG4J,GAAG,CAAC5J,IAAX;AACAJ,MAAAA,MAAM,CAAC2J,aAAa,CAACM,IAAf,CAAN,GAA6B;AAE3BH,QAAAA,QAF2B;AAG3BC,QAAAA,QAH2B;AAI3BG,QAAAA,MAAM,EAAEF,GAAG,CAAChK;AAJe,OAA7B;AAMD,KATD,MASO;AACL,UAAImK,WAAW,GAAG9K,IAAI,CAACgG,aAAL,CAAmBjG,IAAnB,EAAyBuK,aAAa,CAACvE,IAAvC,CAAlB;;AAEA,UAAIuE,aAAa,CAACS,cAAlB,EAAkC;AAChCD,QAAAA,WAAW,GAAG9K,IAAI,CAACgG,aAAL,CAAmBrG,aAAnB,EAAkC2K,aAAa,CAACS,cAAhD,CAAd;AACD;;AAEDpK,MAAAA,MAAM,CAAC2J,aAAa,CAACM,IAAf,CAAN,GAA6B;AAC3B7E,QAAAA,IAAI,EAAE+E,WADqB;AAE3BhB,QAAAA,UAAU,EAAEQ,aAAa,CAACU,WAFC;AAG3BP,QAAAA,QAH2B;AAI3BC,QAAAA;AAJ2B,OAA7B;AAMA3J,MAAAA,IAAI;AACL;AACF;;AACD,SAAO;AAACJ,IAAAA,MAAD;AAAS4G,IAAAA,MAAT;AAAiBxG,IAAAA;AAAjB,GAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport {CursorBuffer, ParquetCodecOptions, PARQUET_CODECS} from './codecs';\nimport * as Compression from './compression';\nimport {\n ParquetBuffer,\n ParquetCodec,\n ParquetCompression,\n ParquetData,\n ParquetField,\n ParquetRecord,\n ParquetType,\n PrimitiveType,\n SchemaDefinition\n} from './schema/declare';\nimport {ParquetSchema} from './schema/schema';\nimport * as Shred from './schema/shred';\n// tslint:disable-next-line:max-line-length\nimport {\n ColumnChunk,\n CompressionCodec,\n ConvertedType,\n Encoding,\n FieldRepetitionType,\n FileMetaData,\n PageHeader,\n PageType,\n RowGroup,\n SchemaElement,\n Type\n} from './parquet-thrift';\nimport * as Util from './util';\n// import Fs = require('fs');\n\n/**\n * Parquet File Magic String\n */\nconst PARQUET_MAGIC = 'PAR1';\n\n/**\n * Parquet File Format Version\n */\nconst PARQUET_VERSION = 1;\n\n/**\n * Internal type used for repetition/definition levels\n */\nconst PARQUET_RDLVL_TYPE = 'INT32';\nconst PARQUET_RDLVL_ENCODING = 'RLE';\n\n/**\n * A parquet cursor is used to retrieve rows from a parquet file in order\n */\nexport class ParquetCursor<T> implements AsyncIterable<T> {\n public metadata: FileMetaData;\n public envelopeReader: ParquetEnvelopeReader;\n public schema: ParquetSchema;\n public columnList: string[][];\n public rowGroup: ParquetRecord[];\n public rowGroupIndex: number;\n\n /**\n * Create a new parquet reader from the file metadata and an envelope reader.\n * It is usually not recommended to call this constructor directly except for\n * advanced and internal use cases. Consider using getCursor() on the\n * ParquetReader instead\n */\n constructor(\n metadata: FileMetaData,\n envelopeReader: ParquetEnvelopeReader,\n schema: ParquetSchema,\n columnList: string[][]\n ) {\n this.metadata = metadata;\n this.envelopeReader = envelopeReader;\n this.schema = schema;\n this.columnList = columnList;\n this.rowGroup = [];\n this.rowGroupIndex = 0;\n }\n\n /**\n * Retrieve the next row from the cursor. Returns a row or NULL if the end\n * of the file was reached\n */\n async next<T = any>(): Promise<T> {\n if (this.rowGroup.length === 0) {\n if (this.rowGroupIndex >= this.metadata.row_groups.length) {\n // @ts-ignore\n return null;\n }\n const rowBuffer = await this.envelopeReader.readRowGroup(\n this.schema,\n this.metadata.row_groups[this.rowGroupIndex],\n this.columnList\n );\n this.rowGroup = Shred.materializeRecords(this.schema, rowBuffer);\n this.rowGroupIndex++;\n }\n return this.rowGroup.shift() as any;\n }\n\n /**\n * Rewind the cursor the the beginning of the file\n */\n rewind(): void {\n this.rowGroup = [];\n this.rowGroupIndex = 0;\n }\n\n /**\n * Implement AsyncIterable\n */\n // tslint:disable-next-line:function-name\n [Symbol.asyncIterator](): AsyncIterator<T> {\n let done = false;\n return {\n next: async () => {\n if (done) {\n return {done, value: null};\n }\n const value = await this.next();\n if (value === null) {\n return {done: true, value};\n }\n return {done: false, value};\n },\n return: async () => {\n done = true;\n return {done, value: null};\n },\n throw: async () => {\n done = true;\n return {done: true, value: null};\n }\n };\n }\n}\n\n/**\n * A parquet reader allows retrieving the rows from a parquet file in order.\n * The basic usage is to create a reader and then retrieve a cursor/iterator\n * which allows you to consume row after row until all rows have been read. It is\n * important that you call close() after you are finished reading the file to\n * avoid leaking file descriptors.\n */\nexport class ParquetReader<T> implements AsyncIterable<T> {\n /**\n * Open the parquet file pointed to by the specified path and return a new\n * parquet reader\n */\n static async openFile<T>(filePath: string): Promise<ParquetReader<T>> {\n const envelopeReader = await ParquetEnvelopeReader.openFile(filePath);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader<T>(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n static async openBuffer<T>(buffer: Buffer): Promise<ParquetReader<T>> {\n const envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader<T>(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n /**\n * return a new parquet reader initialized with a read function\n */\n static async openArrayBuffer<T>(arrayBuffer: ArrayBuffer): Promise<ParquetReader<T>> {\n const readFn = async (start: number, length: number) => Buffer.from(arrayBuffer, start, length);\n const closeFn = async () => {};\n const size = arrayBuffer.byteLength;\n const envelopeReader = new ParquetEnvelopeReader(readFn, closeFn, size);\n try {\n await envelopeReader.readHeader();\n const metadata = await envelopeReader.readFooter();\n return new ParquetReader(metadata, envelopeReader);\n } catch (err) {\n await envelopeReader.close();\n throw err;\n }\n }\n\n public metadata: FileMetaData;\n public envelopeReader: ParquetEnvelopeReader;\n public schema: ParquetSchema;\n\n /**\n * Create a new parquet reader from the file metadata and an envelope reader.\n * It is not recommended to call this constructor directly except for advanced\n * and internal use cases. Consider using one of the open{File,Buffer} methods\n * instead\n */\n constructor(metadata: FileMetaData, envelopeReader: ParquetEnvelopeReader) {\n if (metadata.version !== PARQUET_VERSION) {\n throw new Error('invalid parquet version');\n }\n\n this.metadata = metadata;\n this.envelopeReader = envelopeReader;\n const root = this.metadata.schema[0];\n const {schema} = decodeSchema(this.metadata.schema, 1, root.num_children!);\n this.schema = new ParquetSchema(schema);\n }\n\n /**\n * Close this parquet reader. You MUST call this method once you're finished\n * reading rows\n */\n async close(): Promise<void> {\n await this.envelopeReader.close();\n // this.envelopeReader = null;\n // this.metadata = null;\n }\n\n /**\n * Return a cursor to the file. You may open more than one cursor and use\n * them concurrently. All cursors become invalid once close() is called on\n * the reader object.\n *\n * The required_columns parameter controls which columns are actually read\n * from disk. An empty array or no value implies all columns. A list of column\n * names means that only those columns should be loaded from disk.\n */\n getCursor(): ParquetCursor<T>;\n // @ts-ignore\n getCursor<K extends keyof T>(columnList: (K | K[])[]): ParquetCursor<Pick<T, K>>;\n getCursor(columnList: (string | string[])[]): ParquetCursor<Partial<T>>;\n getCursor(columnList?: (string | string[])[]): ParquetCursor<Partial<T>> {\n if (!columnList) {\n // tslint:disable-next-line:no-parameter-reassignment\n columnList = [];\n }\n\n // tslint:disable-next-line:no-parameter-reassignment\n columnList = columnList.map((x) => (Array.isArray(x) ? x : [x]));\n\n return new ParquetCursor<T>(\n this.metadata,\n this.envelopeReader,\n this.schema,\n columnList as string[][]\n );\n }\n\n /**\n * Return the number of rows in this file. Note that the number of rows is\n * not neccessarily equal to the number of rows in each column.\n */\n getRowCount(): number {\n return Number(this.metadata.num_rows);\n }\n\n /**\n * Returns the ParquetSchema for this file\n */\n getSchema(): ParquetSchema {\n return this.schema;\n }\n\n /**\n * Returns the user (key/value) metadata for this file\n */\n getMetadata(): Record<string, string> {\n const md: Record<string, string> = {};\n for (const kv of this.metadata.key_value_metadata!) {\n md[kv.key] = kv.value!;\n }\n return md;\n }\n\n /**\n * Implement AsyncIterable\n */\n // tslint:disable-next-line:function-name\n [Symbol.asyncIterator](): AsyncIterator<T> {\n return this.getCursor()[Symbol.asyncIterator]();\n }\n}\n\n/**\n * The parquet envelope reader allows direct, unbuffered access to the individual\n * sections of the parquet file, namely the header, footer and the row groups.\n * This class is intended for advanced/internal users; if you just want to retrieve\n * rows from a parquet file use the ParquetReader instead\n */\nexport class ParquetEnvelopeReader {\n public read: (position: number, length: number) => Promise<Buffer>;\n /**\n * Close this parquet reader. You MUST call this method once you're finished\n * reading rows\n */\n public close: () => Promise<void>;\n public fileSize: number;\n\n static async openFile(filePath: string): Promise<ParquetEnvelopeReader> {\n const fileStat = await Util.fstat(filePath);\n const fileDescriptor = await Util.fopen(filePath);\n\n const readFn = Util.fread.bind(undefined, fileDescriptor);\n const closeFn = Util.fclose.bind(undefined, fileDescriptor);\n\n return new ParquetEnvelopeReader(readFn, closeFn, fileStat.size);\n }\n\n static async openBuffer(buffer: Buffer): Promise<ParquetEnvelopeReader> {\n const readFn = (position: number, length: number) =>\n Promise.resolve(buffer.slice(position, position + length));\n const closeFn = () => Promise.resolve();\n return new ParquetEnvelopeReader(readFn, closeFn, buffer.length);\n }\n\n constructor(\n read: (position: number, length: number) => Promise<Buffer>,\n close: () => Promise<void>,\n fileSize: number\n ) {\n this.read = read;\n this.close = close;\n this.fileSize = fileSize;\n }\n\n async readHeader(): Promise<void> {\n const buf = await this.read(0, PARQUET_MAGIC.length);\n\n if (buf.toString() !== PARQUET_MAGIC) {\n throw new Error('not valid parquet file');\n }\n }\n\n async readRowGroup(\n schema: ParquetSchema,\n rowGroup: RowGroup,\n columnList: string[][]\n ): Promise<ParquetBuffer> {\n const buffer: ParquetBuffer = {\n rowCount: Number(rowGroup.num_rows),\n columnData: {}\n };\n for (const colChunk of rowGroup.columns) {\n const colMetadata = colChunk.meta_data;\n const colKey = colMetadata?.path_in_schema;\n if (columnList.length > 0 && Util.fieldIndexOf(columnList, colKey!) < 0) {\n continue; // eslint-disable-line no-continue\n }\n buffer.columnData[colKey!.join()] = await this.readColumnChunk(schema, colChunk);\n }\n return buffer;\n }\n\n async readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<ParquetData> {\n if (colChunk.file_path !== undefined && colChunk.file_path !== null) {\n throw new Error('external references are not supported');\n }\n\n const field = schema.findField(colChunk.meta_data?.path_in_schema!);\n const type: PrimitiveType = Util.getThriftEnum(Type, colChunk.meta_data?.type!) as any;\n if (type !== field.primitiveType) throw new Error(`chunk type not matching schema: ${type}`);\n\n const compression: ParquetCompression = Util.getThriftEnum(\n CompressionCodec,\n colChunk.meta_data?.codec!\n ) as any;\n\n const pagesOffset = Number(colChunk.meta_data?.data_page_offset!);\n const pagesSize = Number(colChunk.meta_data?.total_compressed_size!);\n const pagesBuf = await this.read(pagesOffset, pagesSize);\n\n return decodeDataPages(pagesBuf, field, compression);\n }\n\n async readFooter(): Promise<FileMetaData> {\n const trailerLen = PARQUET_MAGIC.length + 4;\n const trailerBuf = await this.read(this.fileSize - trailerLen, trailerLen);\n\n if (trailerBuf.slice(4).toString() !== PARQUET_MAGIC) {\n throw new Error('not a valid parquet file');\n }\n\n const metadataSize = trailerBuf.readUInt32LE(0);\n const metadataOffset = this.fileSize - metadataSize - trailerLen;\n if (metadataOffset < PARQUET_MAGIC.length) {\n throw new Error('invalid metadata size');\n }\n\n const metadataBuf = await this.read(metadataOffset, metadataSize);\n // let metadata = new parquet_thrift.FileMetaData();\n // parquet_util.decodeThrift(metadata, metadataBuf);\n const {metadata} = Util.decodeFileMetadata(metadataBuf);\n return metadata;\n }\n}\n\n/**\n * Decode a consecutive array of data using one of the parquet encodings\n */\nfunction decodeValues(\n type: PrimitiveType,\n encoding: ParquetCodec,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): any[] {\n if (!(encoding in PARQUET_CODECS)) {\n throw new Error(`invalid encoding: ${encoding}`);\n }\n return PARQUET_CODECS[encoding].decodeValues(type, cursor, count, opts);\n}\n\nfunction decodeDataPages(\n buffer: Buffer,\n column: ParquetField,\n compression: ParquetCompression\n): ParquetData {\n const cursor: CursorBuffer = {\n buffer,\n offset: 0,\n size: buffer.length\n };\n\n const data: ParquetData = {\n rlevels: [],\n dlevels: [],\n values: [],\n count: 0\n };\n\n // @ts-ignore size can be undefined\n while (cursor.offset < cursor.size) {\n // const pageHeader = new parquet_thrift.PageHeader();\n // cursor.offset += parquet_util.decodeThrift(pageHeader, cursor.buffer);\n\n const {pageHeader, length} = Util.decodePageHeader(cursor.buffer);\n cursor.offset += length;\n\n const pageType = Util.getThriftEnum(PageType, pageHeader.type);\n\n let pageData: ParquetData | null = null;\n switch (pageType) {\n case 'DATA_PAGE':\n pageData = decodeDataPage(cursor, pageHeader, column, compression);\n break;\n case 'DATA_PAGE_V2':\n pageData = decodeDataPageV2(cursor, pageHeader, column, compression);\n break;\n default:\n throw new Error(`invalid page type: ${pageType}`);\n }\n\n Array.prototype.push.apply(data.rlevels, pageData.rlevels);\n Array.prototype.push.apply(data.dlevels, pageData.dlevels);\n Array.prototype.push.apply(data.values, pageData.values);\n data.count += pageData.count;\n }\n\n return data;\n}\n\nfunction decodeDataPage(\n cursor: CursorBuffer,\n header: PageHeader,\n column: ParquetField,\n compression: ParquetCompression\n): ParquetData {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n const valueCount = header.data_page_header?.num_values;\n\n // const info = {\n // path: opts.column.path.join('.'),\n // valueEncoding,\n // dLevelEncoding,\n // rLevelEncoding,\n // cursorOffset: cursor.offset,\n // cursorEnd,\n // cusrorSize: cursor.size,\n // header,\n // opts,\n // buffer: cursor.buffer.toJSON(),\n // values: null as any[],\n // valBuf: null as any\n // };\n // Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2));\n\n /* uncompress page */\n let dataCursor = cursor;\n if (compression !== 'UNCOMPRESSED') {\n const valuesBuf = Compression.inflate(\n compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n dataCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n cursor.offset = cursorEnd;\n }\n\n /* read repetition levels */\n const rLevelEncoding = Util.getThriftEnum(\n Encoding,\n header.data_page_header?.repetition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n if (column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding, dataCursor, valueCount!, {\n bitWidth: Util.getBitWidth(column.rLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n const dLevelEncoding = Util.getThriftEnum(\n Encoding,\n header.data_page_header?.definition_level_encoding!\n ) as ParquetCodec;\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding, dataCursor, valueCount!, {\n bitWidth: Util.getBitWidth(column.dLevelMax),\n disableEnvelope: false\n // column: opts.column\n });\n } else {\n dLevels.fill(0);\n }\n let valueCountNonNull = 0;\n for (const dlvl of dLevels) {\n if (dlvl === column.dLevelMax) {\n valueCountNonNull++;\n }\n }\n\n /* read values */\n const valueEncoding = Util.getThriftEnum(\n Encoding,\n header.data_page_header?.encoding!\n ) as ParquetCodec;\n const values = decodeValues(column.primitiveType!, valueEncoding, dataCursor, valueCountNonNull, {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n });\n\n // info.valBuf = uncursor.buffer.toJSON();\n // info.values = values;\n // Fs.writeFileSync(`dump/${info.path}.ts.json`, JSON.stringify(info, null, 2));\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!\n };\n}\n\nfunction decodeDataPageV2(\n cursor: CursorBuffer,\n header: PageHeader,\n column: ParquetField,\n compression: ParquetCompression\n): ParquetData {\n const cursorEnd = cursor.offset + header.compressed_page_size;\n\n const valueCount = header.data_page_header_v2?.num_values;\n // @ts-ignore\n const valueCountNonNull = valueCount - header.data_page_header_v2?.num_nulls;\n const valueEncoding = Util.getThriftEnum(\n Encoding,\n header.data_page_header_v2?.encoding!\n ) as ParquetCodec;\n\n /* read repetition levels */\n // tslint:disable-next-line:prefer-array-literal\n let rLevels = new Array(valueCount);\n if (column.rLevelMax > 0) {\n rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: Util.getBitWidth(column.rLevelMax),\n disableEnvelope: true\n });\n } else {\n rLevels.fill(0);\n }\n\n /* read definition levels */\n // tslint:disable-next-line:prefer-array-literal\n let dLevels = new Array(valueCount);\n if (column.dLevelMax > 0) {\n dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount!, {\n bitWidth: Util.getBitWidth(column.dLevelMax),\n disableEnvelope: true\n });\n } else {\n dLevels.fill(0);\n }\n\n /* read values */\n let valuesBufCursor = cursor;\n\n if (header.data_page_header_v2?.is_compressed) {\n const valuesBuf = Compression.inflate(\n compression,\n cursor.buffer.slice(cursor.offset, cursorEnd),\n header.uncompressed_page_size\n );\n\n valuesBufCursor = {\n buffer: valuesBuf,\n offset: 0,\n size: valuesBuf.length\n };\n\n cursor.offset = cursorEnd;\n }\n\n const values = decodeValues(\n column.primitiveType!,\n valueEncoding,\n valuesBufCursor,\n valueCountNonNull,\n {\n typeLength: column.typeLength,\n bitWidth: column.typeLength\n }\n );\n\n return {\n dlevels: dLevels,\n rlevels: rLevels,\n values,\n count: valueCount!\n };\n}\n\nfunction decodeSchema(\n schemaElements: SchemaElement[],\n offset: number,\n len: number\n): {\n offset: number;\n next: number;\n schema: SchemaDefinition;\n} {\n const schema: SchemaDefinition = {};\n let next = offset;\n for (let i = 0; i < len; i++) {\n const schemaElement = schemaElements[next];\n\n const repetitionType =\n next > 0 ? Util.getThriftEnum(FieldRepetitionType, schemaElement.repetition_type!) : 'ROOT';\n\n let optional = false;\n let repeated = false;\n switch (repetitionType) {\n case 'REQUIRED':\n break;\n case 'OPTIONAL':\n optional = true;\n break;\n case 'REPEATED':\n repeated = true;\n break;\n default:\n throw new Error('parquet: unknown repetition type');\n }\n\n if (schemaElement.num_children! > 0) {\n const res = decodeSchema(schemaElements, next + 1, schemaElement.num_children!);\n next = res.next;\n schema[schemaElement.name] = {\n // type: undefined,\n optional,\n repeated,\n fields: res.schema\n };\n } else {\n let logicalType = Util.getThriftEnum(Type, schemaElement.type!);\n\n if (schemaElement.converted_type) {\n logicalType = Util.getThriftEnum(ConvertedType, schemaElement.converted_type);\n }\n\n schema[schemaElement.name] = {\n type: logicalType as ParquetType,\n typeLength: schemaElement.type_length,\n optional,\n repeated\n };\n next++;\n }\n }\n return {schema, offset, next};\n}\n"],"file":"reader.js"}
|
|
@@ -124,19 +124,19 @@ function buildFields(schema, rLevelParentMax, dLevelParentMax, path) {
|
|
|
124
124
|
const typeDef = PARQUET_LOGICAL_TYPES[opts.type];
|
|
125
125
|
|
|
126
126
|
if (!typeDef) {
|
|
127
|
-
throw new Error(
|
|
127
|
+
throw new Error("invalid parquet type: ".concat(opts.type));
|
|
128
128
|
}
|
|
129
129
|
|
|
130
130
|
opts.encoding = opts.encoding || 'PLAIN';
|
|
131
131
|
|
|
132
132
|
if (!(opts.encoding in PARQUET_CODECS)) {
|
|
133
|
-
throw new Error(
|
|
133
|
+
throw new Error("unsupported parquet encoding: ".concat(opts.encoding));
|
|
134
134
|
}
|
|
135
135
|
|
|
136
136
|
opts.compression = opts.compression || 'UNCOMPRESSED';
|
|
137
137
|
|
|
138
138
|
if (!(opts.compression in PARQUET_COMPRESSION_METHODS)) {
|
|
139
|
-
throw new Error(
|
|
139
|
+
throw new Error("unsupported compression method: ".concat(opts.compression));
|
|
140
140
|
}
|
|
141
141
|
|
|
142
142
|
const cpath = path.concat([name]);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../src/parquetjs/schema/schema.ts"],"names":["PARQUET_CODECS","PARQUET_COMPRESSION_METHODS","materializeRecords","shredBuffer","shredRecord","PARQUET_LOGICAL_TYPES","ParquetSchema","constructor","schema","fields","buildFields","fieldList","listFields","findField","path","split","slice","n","length","shift","findFieldBranch","branch","push","record","buffer","compress","type","setCompress","name","node","compression","rLevelParentMax","dLevelParentMax","opts","required","optional","repeated","Boolean","rLevelMax","dLevelMax","repetitionType","cpath","concat","key","join","isNested","fieldCount","Object","keys","typeDef","Error","encoding","primitiveType","originalType","typeLength","list","k"],"mappings":";AAEA,SAAQA,cAAR,QAA6B,WAA7B;AACA,SAAQC,2BAAR,QAA0C,gBAA1C;AAUA,SAAQC,kBAAR,EAA4BC,WAA5B,EAAyCC,WAAzC,QAA2D,SAA3D;AACA,SAAQC,qBAAR,QAAoC,SAApC;AAKA,OAAO,MAAMC,aAAN,CAAoB;AAQzBC,EAAAA,WAAW,CAACC,MAAD,EAA2B;AAAA;;AAAA;;AAAA;;AACpC,SAAKA,MAAL,GAAcA,MAAd;AACA,SAAKC,MAAL,GAAcC,WAAW,CAACF,MAAD,EAAS,CAAT,EAAY,CAAZ,EAAe,EAAf,CAAzB;AACA,SAAKG,SAAL,GAAiBC,UAAU,CAAC,KAAKH,MAAN,CAA3B;AACD;;AAKDI,EAAAA,SAAS,CAACC,IAAD,EAAwC;AAC/C,QAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAE5BA,MAAAA,IAAI,GAAGA,IAAI,CAACC,KAAL,CAAW,GAAX,CAAP;AACD,KAHD,MAGO;AAELD,MAAAA,IAAI,GAAGA,IAAI,CAACE,KAAL,CAAW,CAAX,CAAP;AACD;;AAED,QAAIC,CAAC,GAAG,KAAKR,MAAb;;AACA,WAAOK,IAAI,CAACI,MAAL,GAAc,CAArB,EAAwBJ,IAAI,CAACK,KAAL,EAAxB,EAAsC;AACpCF,MAAAA,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAD,CAAWL,MAAf;AACD;;AAED,WAAOQ,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAR;AACD;;AAKDM,EAAAA,eAAe,CAACN,IAAD,EAA0C;AACvD,QAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAE5BA,MAAAA,IAAI,GAAGA,IAAI,CAACC,KAAL,CAAW,GAAX,CAAP;AACD;;AACD,UAAMM,MAAsB,GAAG,EAA/B;AACA,QAAIJ,CAAC,GAAG,KAAKR,MAAb;;AACA,WAAOK,IAAI,CAACI,MAAL,GAAc,CAArB,EAAwBJ,IAAI,CAACK,KAAL,EAAxB,EAAsC;AACpCE,MAAAA,MAAM,CAACC,IAAP,CAAYL,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAb;;AACA,UAAIA,IAAI,CAACI,MAAL,GAAc,CAAlB,EAAqB;AACnBD,QAAAA,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAD,CAAWL,MAAf;AACD;AACF;;AACD,WAAOY,MAAP;AACD;;AAEDjB,EAAAA,WAAW,CAACmB,MAAD,EAAwBC,MAAxB,EAAqD;AAC9DpB,IAAAA,WAAW,CAAC,IAAD,EAAOmB,MAAP,EAAeC,MAAf,CAAX;AACD;;AAEDtB,EAAAA,kBAAkB,CAACsB,MAAD,EAAyC;AACzD,WAAOtB,kBAAkB,CAAC,IAAD,EAAOsB,MAAP,CAAzB;AACD;;AAEDC,EAAAA,QAAQ,CAACC,IAAD,EAAiC;AACvCC,IAAAA,WAAW,CAAC,KAAKnB,MAAN,EAAckB,IAAd,CAAX;AACAC,IAAAA,WAAW,CAAC,KAAKlB,MAAN,EAAciB,IAAd,CAAX;AACA,WAAO,IAAP;AACD;;AAEDF,EAAAA,MAAM,GAAkB;AACtB,WAAOrB,WAAW,CAAC,IAAD,CAAlB;AACD;;AArEwB;;AAwE3B,SAASwB,WAAT,CAAqBnB,MAArB,EAAkCkB,IAAlC,EAA4D;AAC1D,OAAK,MAAME,IAAX,IAAmBpB,MAAnB,EAA2B;AACzB,UAAMqB,IAAI,GAAGrB,MAAM,CAACoB,IAAD,CAAnB;;AACA,QAAIC,IAAI,CAACpB,MAAT,EAAiB;AACfkB,MAAAA,WAAW,CAACE,IAAI,CAACpB,MAAN,EAAciB,IAAd,CAAX;AACD,KAFD,MAEO;AACLG,MAAAA,IAAI,CAACC,WAAL,GAAmBJ,IAAnB;AACD;AACF;AACF;;AAGD,SAAShB,WAAT,CACEF,MADF,EAEEuB,eAFF,EAGEC,eAHF,EAIElB,IAJF,EAKgC;AAC9B,QAAMH,SAAuC,GAAG,EAAhD;;AAEA,OAAK,MAAMiB,IAAX,IAAmBpB,MAAnB,EAA2B;AACzB,UAAMyB,IAAI,GAAGzB,MAAM,CAACoB,IAAD,CAAnB;AAGA,UAAMM,QAAQ,GAAG,CAACD,IAAI,CAACE,QAAvB;AACA,UAAMC,QAAQ,GAAGC,OAAO,CAACJ,IAAI,CAACG,QAAN,CAAxB;AACA,QAAIE,SAAS,GAAGP,eAAhB;AACA,QAAIQ,SAAS,GAAGP,eAAhB;AAEA,QAAIQ,cAA8B,GAAG,UAArC;;AACA,QAAI,CAACN,QAAL,EAAe;AACbM,MAAAA,cAAc,GAAG,UAAjB;AACAD,MAAAA,SAAS;AACV;;AACD,QAAIH,QAAJ,EAAc;AACZI,MAAAA,cAAc,GAAG,UAAjB;AACAF,MAAAA,SAAS;AACT,UAAIJ,QAAJ,EAAcK,SAAS;AACxB;;AAGD,QAAIN,IAAI,CAACxB,MAAT,EAAiB;AACf,YAAMgC,KAAK,GAAG3B,IAAI,CAAC4B,MAAL,CAAY,CAACd,IAAD,CAAZ,CAAd;AACAjB,MAAAA,SAAS,CAACiB,IAAD,CAAT,GAAkB;AAChBA,QAAAA,IADgB;AAEhBd,QAAAA,IAAI,EAAE2B,KAFU;AAGhBE,QAAAA,GAAG,EAAEF,KAAK,CAACG,IAAN,EAHW;AAIhBJ,QAAAA,cAJgB;AAKhBF,QAAAA,SALgB;AAMhBC,QAAAA,SANgB;AAOhBM,QAAAA,QAAQ,EAAE,IAPM;AAQhBC,QAAAA,UAAU,EAAEC,MAAM,CAACC,IAAP,CAAYf,IAAI,CAACxB,MAAjB,EAAyBS,MARrB;AAShBT,QAAAA,MAAM,EAAEC,WAAW,CAACuB,IAAI,CAACxB,MAAN,EAAc6B,SAAd,EAAyBC,SAAzB,EAAoCE,KAApC;AATH,OAAlB;AAWA;AACD;;AAED,UAAMQ,OAAY,GAAG5C,qBAAqB,CAAC4B,IAAI,CAACP,IAAN,CAA1C;;AACA,QAAI,CAACuB,OAAL,EAAc;AACZ,YAAM,IAAIC,KAAJ,CAAW,yBAAwBjB,IAAI,CAACP,IAAK,EAA7C,CAAN;AACD;;AAEDO,IAAAA,IAAI,CAACkB,QAAL,GAAgBlB,IAAI,CAACkB,QAAL,IAAiB,OAAjC;;AACA,QAAI,EAAElB,IAAI,CAACkB,QAAL,IAAiBnD,cAAnB,CAAJ,EAAwC;AACtC,YAAM,IAAIkD,KAAJ,CAAW,iCAAgCjB,IAAI,CAACkB,QAAS,EAAzD,CAAN;AACD;;AAEDlB,IAAAA,IAAI,CAACH,WAAL,GAAmBG,IAAI,CAACH,WAAL,IAAoB,cAAvC;;AACA,QAAI,EAAEG,IAAI,CAACH,WAAL,IAAoB7B,2BAAtB,CAAJ,EAAwD;AACtD,YAAM,IAAIiD,KAAJ,CAAW,mCAAkCjB,IAAI,CAACH,WAAY,EAA9D,CAAN;AACD;;AAGD,UAAMW,KAAK,GAAG3B,IAAI,CAAC4B,MAAL,CAAY,CAACd,IAAD,CAAZ,CAAd;AACAjB,IAAAA,SAAS,CAACiB,IAAD,CAAT,GAAkB;AAChBA,MAAAA,IADgB;AAEhBwB,MAAAA,aAAa,EAAEH,OAAO,CAACG,aAFP;AAGhBC,MAAAA,YAAY,EAAEJ,OAAO,CAACI,YAHN;AAIhBvC,MAAAA,IAAI,EAAE2B,KAJU;AAKhBE,MAAAA,GAAG,EAAEF,KAAK,CAACG,IAAN,EALW;AAMhBJ,MAAAA,cANgB;AAOhBW,MAAAA,QAAQ,EAAElB,IAAI,CAACkB,QAPC;AAQhBrB,MAAAA,WAAW,EAAEG,IAAI,CAACH,WARF;AAShBwB,MAAAA,UAAU,EAAErB,IAAI,CAACqB,UAAL,IAAmBL,OAAO,CAACK,UATvB;AAUhBhB,MAAAA,SAVgB;AAWhBC,MAAAA;AAXgB,KAAlB;AAaD;;AACD,SAAO5B,SAAP;AACD;;AAED,SAASC,UAAT,CAAoBH,MAApB,EAA0E;AACxE,MAAI8C,IAAoB,GAAG,EAA3B;;AACA,OAAK,MAAMC,CAAX,IAAgB/C,MAAhB,EAAwB;AACtB8C,IAAAA,IAAI,CAACjC,IAAL,CAAUb,MAAM,CAAC+C,CAAD,CAAhB;;AACA,QAAI/C,MAAM,CAAC+C,CAAD,CAAN,CAAUX,QAAd,EAAwB;AACtBU,MAAAA,IAAI,GAAGA,IAAI,CAACb,MAAL,CAAY9B,UAAU,CAACH,MAAM,CAAC+C,CAAD,CAAN,CAAU/C,MAAX,CAAtB,CAAP;AACD;AACF;;AACD,SAAO8C,IAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {PARQUET_CODECS} from '../codecs';\nimport {PARQUET_COMPRESSION_METHODS} from '../compression';\nimport {\n FieldDefinition,\n ParquetBuffer,\n ParquetCompression,\n ParquetField,\n ParquetRecord,\n RepetitionType,\n SchemaDefinition\n} from './declare';\nimport {materializeRecords, shredBuffer, shredRecord} from './shred';\nimport {PARQUET_LOGICAL_TYPES} from './types';\n\n/**\n * A parquet file schema\n */\nexport class ParquetSchema {\n public schema: Record<string, FieldDefinition>;\n public fields: Record<string, ParquetField>;\n public fieldList: ParquetField[];\n\n /**\n * Create a new schema from a JSON schema definition\n */\n constructor(schema: SchemaDefinition) {\n this.schema = schema;\n this.fields = buildFields(schema, 0, 0, []);\n this.fieldList = listFields(this.fields);\n }\n\n /**\n * Retrieve a field definition\n */\n findField(path: string | string[]): ParquetField {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n } else {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.slice(0); // clone array\n }\n\n let n = this.fields;\n for (; path.length > 1; path.shift()) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n\n return n[path[0]];\n }\n\n /**\n * Retrieve a field definition and all the field's ancestors\n */\n findFieldBranch(path: string | string[]): ParquetField[] {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n }\n const branch: ParquetField[] = [];\n let n = this.fields;\n for (; path.length > 0; path.shift()) {\n branch.push(n[path[0]]);\n if (path.length > 1) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n }\n return branch;\n }\n\n shredRecord(record: ParquetRecord, buffer: ParquetBuffer): void {\n shredRecord(this, record, buffer);\n }\n\n materializeRecords(buffer: ParquetBuffer): ParquetRecord[] {\n return materializeRecords(this, buffer);\n }\n\n compress(type: ParquetCompression): this {\n setCompress(this.schema, type);\n setCompress(this.fields, type);\n return this;\n }\n\n buffer(): ParquetBuffer {\n return shredBuffer(this);\n }\n}\n\nfunction setCompress(schema: any, type: ParquetCompression) {\n for (const name in schema) {\n const node = schema[name];\n if (node.fields) {\n setCompress(node.fields, type);\n } else {\n node.compression = type;\n }\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction buildFields(\n schema: SchemaDefinition,\n rLevelParentMax: number,\n dLevelParentMax: number,\n path: string[]\n): Record<string, ParquetField> {\n const fieldList: Record<string, ParquetField> = {};\n\n for (const name in schema) {\n const opts = schema[name];\n\n /* field repetition type */\n const required = !opts.optional;\n const repeated = Boolean(opts.repeated);\n let rLevelMax = rLevelParentMax;\n let dLevelMax = dLevelParentMax;\n\n let repetitionType: RepetitionType = 'REQUIRED';\n if (!required) {\n repetitionType = 'OPTIONAL';\n dLevelMax++;\n }\n if (repeated) {\n repetitionType = 'REPEATED';\n rLevelMax++;\n if (required) dLevelMax++;\n }\n\n /* nested field */\n if (opts.fields) {\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n rLevelMax,\n dLevelMax,\n isNested: true,\n fieldCount: Object.keys(opts.fields).length,\n fields: buildFields(opts.fields, rLevelMax, dLevelMax, cpath)\n };\n continue; // eslint-disable-line no-continue\n }\n\n const typeDef: any = PARQUET_LOGICAL_TYPES[opts.type!];\n if (!typeDef) {\n throw new Error(`invalid parquet type: ${opts.type}`);\n }\n\n opts.encoding = opts.encoding || 'PLAIN';\n if (!(opts.encoding in PARQUET_CODECS)) {\n throw new Error(`unsupported parquet encoding: ${opts.encoding}`);\n }\n\n opts.compression = opts.compression || 'UNCOMPRESSED';\n if (!(opts.compression in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`unsupported compression method: ${opts.compression}`);\n }\n\n /* add to schema */\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n primitiveType: typeDef.primitiveType,\n originalType: typeDef.originalType,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n encoding: opts.encoding,\n compression: opts.compression,\n typeLength: opts.typeLength || typeDef.typeLength,\n rLevelMax,\n dLevelMax\n };\n }\n return fieldList;\n}\n\nfunction listFields(fields: Record<string, ParquetField>): ParquetField[] {\n let list: ParquetField[] = [];\n for (const k in fields) {\n list.push(fields[k]);\n if (fields[k].isNested) {\n list = list.concat(listFields(fields[k].fields!));\n }\n }\n return list;\n}\n"],"file":"schema.js"}
|
|
1
|
+
{"version":3,"sources":["../../../../src/parquetjs/schema/schema.ts"],"names":["PARQUET_CODECS","PARQUET_COMPRESSION_METHODS","materializeRecords","shredBuffer","shredRecord","PARQUET_LOGICAL_TYPES","ParquetSchema","constructor","schema","fields","buildFields","fieldList","listFields","findField","path","split","slice","n","length","shift","findFieldBranch","branch","push","record","buffer","compress","type","setCompress","name","node","compression","rLevelParentMax","dLevelParentMax","opts","required","optional","repeated","Boolean","rLevelMax","dLevelMax","repetitionType","cpath","concat","key","join","isNested","fieldCount","Object","keys","typeDef","Error","encoding","primitiveType","originalType","typeLength","list","k"],"mappings":";AAEA,SAAQA,cAAR,QAA6B,WAA7B;AACA,SAAQC,2BAAR,QAA0C,gBAA1C;AAUA,SAAQC,kBAAR,EAA4BC,WAA5B,EAAyCC,WAAzC,QAA2D,SAA3D;AACA,SAAQC,qBAAR,QAAoC,SAApC;AAKA,OAAO,MAAMC,aAAN,CAAoB;AAQzBC,EAAAA,WAAW,CAACC,MAAD,EAA2B;AAAA;;AAAA;;AAAA;;AACpC,SAAKA,MAAL,GAAcA,MAAd;AACA,SAAKC,MAAL,GAAcC,WAAW,CAACF,MAAD,EAAS,CAAT,EAAY,CAAZ,EAAe,EAAf,CAAzB;AACA,SAAKG,SAAL,GAAiBC,UAAU,CAAC,KAAKH,MAAN,CAA3B;AACD;;AAKDI,EAAAA,SAAS,CAACC,IAAD,EAAwC;AAC/C,QAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAE5BA,MAAAA,IAAI,GAAGA,IAAI,CAACC,KAAL,CAAW,GAAX,CAAP;AACD,KAHD,MAGO;AAELD,MAAAA,IAAI,GAAGA,IAAI,CAACE,KAAL,CAAW,CAAX,CAAP;AACD;;AAED,QAAIC,CAAC,GAAG,KAAKR,MAAb;;AACA,WAAOK,IAAI,CAACI,MAAL,GAAc,CAArB,EAAwBJ,IAAI,CAACK,KAAL,EAAxB,EAAsC;AACpCF,MAAAA,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAD,CAAWL,MAAf;AACD;;AAED,WAAOQ,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAR;AACD;;AAKDM,EAAAA,eAAe,CAACN,IAAD,EAA0C;AACvD,QAAI,OAAOA,IAAP,KAAgB,QAApB,EAA8B;AAE5BA,MAAAA,IAAI,GAAGA,IAAI,CAACC,KAAL,CAAW,GAAX,CAAP;AACD;;AACD,UAAMM,MAAsB,GAAG,EAA/B;AACA,QAAIJ,CAAC,GAAG,KAAKR,MAAb;;AACA,WAAOK,IAAI,CAACI,MAAL,GAAc,CAArB,EAAwBJ,IAAI,CAACK,KAAL,EAAxB,EAAsC;AACpCE,MAAAA,MAAM,CAACC,IAAP,CAAYL,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAb;;AACA,UAAIA,IAAI,CAACI,MAAL,GAAc,CAAlB,EAAqB;AACnBD,QAAAA,CAAC,GAAGA,CAAC,CAACH,IAAI,CAAC,CAAD,CAAL,CAAD,CAAWL,MAAf;AACD;AACF;;AACD,WAAOY,MAAP;AACD;;AAEDjB,EAAAA,WAAW,CAACmB,MAAD,EAAwBC,MAAxB,EAAqD;AAC9DpB,IAAAA,WAAW,CAAC,IAAD,EAAOmB,MAAP,EAAeC,MAAf,CAAX;AACD;;AAEDtB,EAAAA,kBAAkB,CAACsB,MAAD,EAAyC;AACzD,WAAOtB,kBAAkB,CAAC,IAAD,EAAOsB,MAAP,CAAzB;AACD;;AAEDC,EAAAA,QAAQ,CAACC,IAAD,EAAiC;AACvCC,IAAAA,WAAW,CAAC,KAAKnB,MAAN,EAAckB,IAAd,CAAX;AACAC,IAAAA,WAAW,CAAC,KAAKlB,MAAN,EAAciB,IAAd,CAAX;AACA,WAAO,IAAP;AACD;;AAEDF,EAAAA,MAAM,GAAkB;AACtB,WAAOrB,WAAW,CAAC,IAAD,CAAlB;AACD;;AArEwB;;AAwE3B,SAASwB,WAAT,CAAqBnB,MAArB,EAAkCkB,IAAlC,EAA4D;AAC1D,OAAK,MAAME,IAAX,IAAmBpB,MAAnB,EAA2B;AACzB,UAAMqB,IAAI,GAAGrB,MAAM,CAACoB,IAAD,CAAnB;;AACA,QAAIC,IAAI,CAACpB,MAAT,EAAiB;AACfkB,MAAAA,WAAW,CAACE,IAAI,CAACpB,MAAN,EAAciB,IAAd,CAAX;AACD,KAFD,MAEO;AACLG,MAAAA,IAAI,CAACC,WAAL,GAAmBJ,IAAnB;AACD;AACF;AACF;;AAGD,SAAShB,WAAT,CACEF,MADF,EAEEuB,eAFF,EAGEC,eAHF,EAIElB,IAJF,EAKgC;AAC9B,QAAMH,SAAuC,GAAG,EAAhD;;AAEA,OAAK,MAAMiB,IAAX,IAAmBpB,MAAnB,EAA2B;AACzB,UAAMyB,IAAI,GAAGzB,MAAM,CAACoB,IAAD,CAAnB;AAGA,UAAMM,QAAQ,GAAG,CAACD,IAAI,CAACE,QAAvB;AACA,UAAMC,QAAQ,GAAGC,OAAO,CAACJ,IAAI,CAACG,QAAN,CAAxB;AACA,QAAIE,SAAS,GAAGP,eAAhB;AACA,QAAIQ,SAAS,GAAGP,eAAhB;AAEA,QAAIQ,cAA8B,GAAG,UAArC;;AACA,QAAI,CAACN,QAAL,EAAe;AACbM,MAAAA,cAAc,GAAG,UAAjB;AACAD,MAAAA,SAAS;AACV;;AACD,QAAIH,QAAJ,EAAc;AACZI,MAAAA,cAAc,GAAG,UAAjB;AACAF,MAAAA,SAAS;AACT,UAAIJ,QAAJ,EAAcK,SAAS;AACxB;;AAGD,QAAIN,IAAI,CAACxB,MAAT,EAAiB;AACf,YAAMgC,KAAK,GAAG3B,IAAI,CAAC4B,MAAL,CAAY,CAACd,IAAD,CAAZ,CAAd;AACAjB,MAAAA,SAAS,CAACiB,IAAD,CAAT,GAAkB;AAChBA,QAAAA,IADgB;AAEhBd,QAAAA,IAAI,EAAE2B,KAFU;AAGhBE,QAAAA,GAAG,EAAEF,KAAK,CAACG,IAAN,EAHW;AAIhBJ,QAAAA,cAJgB;AAKhBF,QAAAA,SALgB;AAMhBC,QAAAA,SANgB;AAOhBM,QAAAA,QAAQ,EAAE,IAPM;AAQhBC,QAAAA,UAAU,EAAEC,MAAM,CAACC,IAAP,CAAYf,IAAI,CAACxB,MAAjB,EAAyBS,MARrB;AAShBT,QAAAA,MAAM,EAAEC,WAAW,CAACuB,IAAI,CAACxB,MAAN,EAAc6B,SAAd,EAAyBC,SAAzB,EAAoCE,KAApC;AATH,OAAlB;AAWA;AACD;;AAED,UAAMQ,OAAY,GAAG5C,qBAAqB,CAAC4B,IAAI,CAACP,IAAN,CAA1C;;AACA,QAAI,CAACuB,OAAL,EAAc;AACZ,YAAM,IAAIC,KAAJ,iCAAmCjB,IAAI,CAACP,IAAxC,EAAN;AACD;;AAEDO,IAAAA,IAAI,CAACkB,QAAL,GAAgBlB,IAAI,CAACkB,QAAL,IAAiB,OAAjC;;AACA,QAAI,EAAElB,IAAI,CAACkB,QAAL,IAAiBnD,cAAnB,CAAJ,EAAwC;AACtC,YAAM,IAAIkD,KAAJ,yCAA2CjB,IAAI,CAACkB,QAAhD,EAAN;AACD;;AAEDlB,IAAAA,IAAI,CAACH,WAAL,GAAmBG,IAAI,CAACH,WAAL,IAAoB,cAAvC;;AACA,QAAI,EAAEG,IAAI,CAACH,WAAL,IAAoB7B,2BAAtB,CAAJ,EAAwD;AACtD,YAAM,IAAIiD,KAAJ,2CAA6CjB,IAAI,CAACH,WAAlD,EAAN;AACD;;AAGD,UAAMW,KAAK,GAAG3B,IAAI,CAAC4B,MAAL,CAAY,CAACd,IAAD,CAAZ,CAAd;AACAjB,IAAAA,SAAS,CAACiB,IAAD,CAAT,GAAkB;AAChBA,MAAAA,IADgB;AAEhBwB,MAAAA,aAAa,EAAEH,OAAO,CAACG,aAFP;AAGhBC,MAAAA,YAAY,EAAEJ,OAAO,CAACI,YAHN;AAIhBvC,MAAAA,IAAI,EAAE2B,KAJU;AAKhBE,MAAAA,GAAG,EAAEF,KAAK,CAACG,IAAN,EALW;AAMhBJ,MAAAA,cANgB;AAOhBW,MAAAA,QAAQ,EAAElB,IAAI,CAACkB,QAPC;AAQhBrB,MAAAA,WAAW,EAAEG,IAAI,CAACH,WARF;AAShBwB,MAAAA,UAAU,EAAErB,IAAI,CAACqB,UAAL,IAAmBL,OAAO,CAACK,UATvB;AAUhBhB,MAAAA,SAVgB;AAWhBC,MAAAA;AAXgB,KAAlB;AAaD;;AACD,SAAO5B,SAAP;AACD;;AAED,SAASC,UAAT,CAAoBH,MAApB,EAA0E;AACxE,MAAI8C,IAAoB,GAAG,EAA3B;;AACA,OAAK,MAAMC,CAAX,IAAgB/C,MAAhB,EAAwB;AACtB8C,IAAAA,IAAI,CAACjC,IAAL,CAAUb,MAAM,CAAC+C,CAAD,CAAhB;;AACA,QAAI/C,MAAM,CAAC+C,CAAD,CAAN,CAAUX,QAAd,EAAwB;AACtBU,MAAAA,IAAI,GAAGA,IAAI,CAACb,MAAL,CAAY9B,UAAU,CAACH,MAAM,CAAC+C,CAAD,CAAN,CAAU/C,MAAX,CAAtB,CAAP;AACD;AACF;;AACD,SAAO8C,IAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {PARQUET_CODECS} from '../codecs';\nimport {PARQUET_COMPRESSION_METHODS} from '../compression';\nimport {\n FieldDefinition,\n ParquetBuffer,\n ParquetCompression,\n ParquetField,\n ParquetRecord,\n RepetitionType,\n SchemaDefinition\n} from './declare';\nimport {materializeRecords, shredBuffer, shredRecord} from './shred';\nimport {PARQUET_LOGICAL_TYPES} from './types';\n\n/**\n * A parquet file schema\n */\nexport class ParquetSchema {\n public schema: Record<string, FieldDefinition>;\n public fields: Record<string, ParquetField>;\n public fieldList: ParquetField[];\n\n /**\n * Create a new schema from a JSON schema definition\n */\n constructor(schema: SchemaDefinition) {\n this.schema = schema;\n this.fields = buildFields(schema, 0, 0, []);\n this.fieldList = listFields(this.fields);\n }\n\n /**\n * Retrieve a field definition\n */\n findField(path: string | string[]): ParquetField {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n } else {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.slice(0); // clone array\n }\n\n let n = this.fields;\n for (; path.length > 1; path.shift()) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n\n return n[path[0]];\n }\n\n /**\n * Retrieve a field definition and all the field's ancestors\n */\n findFieldBranch(path: string | string[]): ParquetField[] {\n if (typeof path === 'string') {\n // tslint:disable-next-line:no-parameter-reassignment\n path = path.split(',');\n }\n const branch: ParquetField[] = [];\n let n = this.fields;\n for (; path.length > 0; path.shift()) {\n branch.push(n[path[0]]);\n if (path.length > 1) {\n n = n[path[0]].fields as Record<string, ParquetField>;\n }\n }\n return branch;\n }\n\n shredRecord(record: ParquetRecord, buffer: ParquetBuffer): void {\n shredRecord(this, record, buffer);\n }\n\n materializeRecords(buffer: ParquetBuffer): ParquetRecord[] {\n return materializeRecords(this, buffer);\n }\n\n compress(type: ParquetCompression): this {\n setCompress(this.schema, type);\n setCompress(this.fields, type);\n return this;\n }\n\n buffer(): ParquetBuffer {\n return shredBuffer(this);\n }\n}\n\nfunction setCompress(schema: any, type: ParquetCompression) {\n for (const name in schema) {\n const node = schema[name];\n if (node.fields) {\n setCompress(node.fields, type);\n } else {\n node.compression = type;\n }\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction buildFields(\n schema: SchemaDefinition,\n rLevelParentMax: number,\n dLevelParentMax: number,\n path: string[]\n): Record<string, ParquetField> {\n const fieldList: Record<string, ParquetField> = {};\n\n for (const name in schema) {\n const opts = schema[name];\n\n /* field repetition type */\n const required = !opts.optional;\n const repeated = Boolean(opts.repeated);\n let rLevelMax = rLevelParentMax;\n let dLevelMax = dLevelParentMax;\n\n let repetitionType: RepetitionType = 'REQUIRED';\n if (!required) {\n repetitionType = 'OPTIONAL';\n dLevelMax++;\n }\n if (repeated) {\n repetitionType = 'REPEATED';\n rLevelMax++;\n if (required) dLevelMax++;\n }\n\n /* nested field */\n if (opts.fields) {\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n rLevelMax,\n dLevelMax,\n isNested: true,\n fieldCount: Object.keys(opts.fields).length,\n fields: buildFields(opts.fields, rLevelMax, dLevelMax, cpath)\n };\n continue; // eslint-disable-line no-continue\n }\n\n const typeDef: any = PARQUET_LOGICAL_TYPES[opts.type!];\n if (!typeDef) {\n throw new Error(`invalid parquet type: ${opts.type}`);\n }\n\n opts.encoding = opts.encoding || 'PLAIN';\n if (!(opts.encoding in PARQUET_CODECS)) {\n throw new Error(`unsupported parquet encoding: ${opts.encoding}`);\n }\n\n opts.compression = opts.compression || 'UNCOMPRESSED';\n if (!(opts.compression in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`unsupported compression method: ${opts.compression}`);\n }\n\n /* add to schema */\n const cpath = path.concat([name]);\n fieldList[name] = {\n name,\n primitiveType: typeDef.primitiveType,\n originalType: typeDef.originalType,\n path: cpath,\n key: cpath.join(),\n repetitionType,\n encoding: opts.encoding,\n compression: opts.compression,\n typeLength: opts.typeLength || typeDef.typeLength,\n rLevelMax,\n dLevelMax\n };\n }\n return fieldList;\n}\n\nfunction listFields(fields: Record<string, ParquetField>): ParquetField[] {\n let list: ParquetField[] = [];\n for (const k in fields) {\n list.push(fields[k]);\n if (fields[k].isNested) {\n list = list.concat(listFields(fields[k].fields!));\n }\n }\n return list;\n}\n"],"file":"schema.js"}
|
|
@@ -52,11 +52,11 @@ function shredRecordFields(fields, record, data, rLevel, dLevel) {
|
|
|
52
52
|
}
|
|
53
53
|
|
|
54
54
|
if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {
|
|
55
|
-
throw new Error(
|
|
55
|
+
throw new Error("missing required field: ".concat(field.name));
|
|
56
56
|
}
|
|
57
57
|
|
|
58
58
|
if (values.length > 1 && field.repetitionType !== 'REPEATED') {
|
|
59
|
-
throw new Error(
|
|
59
|
+
throw new Error("too many values for field: ".concat(field.name));
|
|
60
60
|
}
|
|
61
61
|
|
|
62
62
|
if (values.length === 0) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../src/parquetjs/schema/shred.ts"],"names":["ParquetBuffer","Types","shredBuffer","schema","columnData","field","fieldList","key","dlevels","rlevels","values","count","rowCount","shredRecord","record","buffer","data","shredRecordFields","fields","Array","prototype","push","apply","rLevel","dLevel","name","undefined","constructor","length","Boolean","repetitionType","Error","isNested","i","rlvl","rLevelMax","dLevelMax","toPrimitive","originalType","primitiveType","materializeRecords","records","materializeColumn","findField","branch","findFieldBranch","rLevels","fill","vIndex","rIndex","step","ix","value","fromPrimitive"],"mappings":"AAEA,SAAQA,aAAR,QAAsE,WAAtE;AAEA,OAAO,KAAKC,KAAZ,MAAuB,SAAvB;AAEA,SAAQD,aAAR;AAEA,OAAO,SAASE,WAAT,CAAqBC,MAArB,EAA2D;AAChE,QAAMC,UAAuC,GAAG,EAAhD;;AACA,OAAK,MAAMC,KAAX,IAAoBF,MAAM,CAACG,SAA3B,EAAsC;AACpCF,IAAAA,UAAU,CAACC,KAAK,CAACE,GAAP,CAAV,GAAwB;AACtBC,MAAAA,OAAO,EAAE,EADa;AAEtBC,MAAAA,OAAO,EAAE,EAFa;AAGtBC,MAAAA,MAAM,EAAE,EAHc;AAItBC,MAAAA,KAAK,EAAE;AAJe,KAAxB;AAMD;;AACD,SAAO;AAACC,IAAAA,QAAQ,EAAE,CAAX;AAAcR,IAAAA;AAAd,GAAP;AACD;AAwBD,OAAO,SAASS,WAAT,CAAqBV,MAArB,EAA4CW,MAA5C,EAAyDC,MAAzD,EAAsF;AAE3F,QAAMC,IAAI,GAAGd,WAAW,CAACC,MAAD,CAAX,CAAoBC,UAAjC;AAEAa,EAAAA,iBAAiB,CAACd,MAAM,CAACe,MAAR,EAAgBJ,MAAhB,EAAwBE,IAAxB,EAA8B,CAA9B,EAAiC,CAAjC,CAAjB;;AAGA,MAAID,MAAM,CAACH,QAAP,KAAoB,CAAxB,EAA2B;AACzBG,IAAAA,MAAM,CAACH,QAAP,GAAkB,CAAlB;AACAG,IAAAA,MAAM,CAACX,UAAP,GAAoBY,IAApB;AACA;AACD;;AACDD,EAAAA,MAAM,CAACH,QAAP,IAAmB,CAAnB;;AACA,OAAK,MAAMP,KAAX,IAAoBF,MAAM,CAACG,SAA3B,EAAsC;AACpCa,IAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACX,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BE,OAAxD,EAAiEO,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBE,OAAjF;AACAU,IAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACX,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BC,OAAxD,EAAiEQ,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBC,OAAjF;AACAW,IAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACX,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BG,MAAxD,EAAgEM,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBG,MAAhF;AACAK,IAAAA,MAAM,CAACX,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BI,KAA7B,IAAsCK,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBI,KAAtD;AACD;AACF;;AAGD,SAASM,iBAAT,CACEC,MADF,EAEEJ,MAFF,EAGEE,IAHF,EAIEO,MAJF,EAKEC,MALF,EAME;AACA,OAAK,MAAMC,IAAX,IAAmBP,MAAnB,EAA2B;AACzB,UAAMb,KAAK,GAAGa,MAAM,CAACO,IAAD,CAApB;AAGA,QAAIf,MAAa,GAAG,EAApB;;AACA,QACEI,MAAM,IACNT,KAAK,CAACoB,IAAN,IAAcX,MADd,IAEAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,KAAuBC,SAFvB,IAGAZ,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,KAAuB,IAJzB,EAKE;AACA,UAAIX,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmBE,WAAnB,KAAmCR,KAAvC,EAA8C;AAC5CT,QAAAA,MAAM,GAAGI,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAf;AACD,OAFD,MAEO;AACLf,QAAAA,MAAM,CAACW,IAAP,CAAYP,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAlB;AACD;AACF;;AAED,QAAIf,MAAM,CAACkB,MAAP,KAAkB,CAAlB,IAAuBC,OAAO,CAACf,MAAD,CAA9B,IAA0CT,KAAK,CAACyB,cAAN,KAAyB,UAAvE,EAAmF;AACjF,YAAM,IAAIC,KAAJ,CAAW,2BAA0B1B,KAAK,CAACoB,IAAK,EAAhD,CAAN;AACD;;AACD,QAAIf,MAAM,CAACkB,MAAP,GAAgB,CAAhB,IAAqBvB,KAAK,CAACyB,cAAN,KAAyB,UAAlD,EAA8D;AAC5D,YAAM,IAAIC,KAAJ,CAAW,8BAA6B1B,KAAK,CAACoB,IAAK,EAAnD,CAAN;AACD;;AAGD,QAAIf,MAAM,CAACkB,MAAP,KAAkB,CAAtB,EAAyB;AACvB,UAAIvB,KAAK,CAAC2B,QAAV,EAAoB;AAClBf,QAAAA,iBAAiB,CAACZ,KAAK,CAACa,MAAP,EAAgB,IAAhB,EAAsBF,IAAtB,EAA4BO,MAA5B,EAAoCC,MAApC,CAAjB;AACD,OAFD,MAEO;AACLR,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBI,KAAhB,IAAyB,CAAzB;AACAK,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBE,OAAhB,CAAwBY,IAAxB,CAA6BE,MAA7B;AACAP,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBC,OAAhB,CAAwBa,IAAxB,CAA6BG,MAA7B;AACD;;AACD;AACD;;AAGD,SAAK,IAAIS,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGvB,MAAM,CAACkB,MAA3B,EAAmCK,CAAC,EAApC,EAAwC;AACtC,YAAMC,IAAI,GAAGD,CAAC,KAAK,CAAN,GAAUV,MAAV,GAAmBlB,KAAK,CAAC8B,SAAtC;;AACA,UAAI9B,KAAK,CAAC2B,QAAV,EAAoB;AAClBf,QAAAA,iBAAiB,CAACZ,KAAK,CAACa,MAAP,EAAgBR,MAAM,CAACuB,CAAD,CAAtB,EAA2BjB,IAA3B,EAAiCkB,IAAjC,EAAuC7B,KAAK,CAAC+B,SAA7C,CAAjB;AACD,OAFD,MAEO;AACLpB,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBI,KAAhB,IAAyB,CAAzB;AACAK,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBE,OAAhB,CAAwBY,IAAxB,CAA6Ba,IAA7B;AACAlB,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBC,OAAhB,CAAwBa,IAAxB,CAA6BhB,KAAK,CAAC+B,SAAnC;AACApB,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBG,MAAhB,CAAuBW,IAAvB,CACEpB,KAAK,CAACoC,WAAN,CAAmBhC,KAAK,CAACiC,YAAN,IAAsBjC,KAAK,CAACkC,aAA/C,EAAgE7B,MAAM,CAACuB,CAAD,CAAtE,CADF;AAGD;AACF;AACF;AACF;;AAqBD,OAAO,SAASO,kBAAT,CAA4BrC,MAA5B,EAAmDY,MAAnD,EAA2F;AAChG,QAAM0B,OAAwB,GAAG,EAAjC;;AACA,OAAK,IAAIR,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGlB,MAAM,CAACH,QAA3B,EAAqCqB,CAAC,EAAtC,EAA0CQ,OAAO,CAACpB,IAAR,CAAa,EAAb;;AAC1C,OAAK,MAAMd,GAAX,IAAkBQ,MAAM,CAACX,UAAzB,EAAqC;AACnCsC,IAAAA,iBAAiB,CAACvC,MAAD,EAASY,MAAT,EAAiBR,GAAjB,EAAsBkC,OAAtB,CAAjB;AACD;;AACD,SAAOA,OAAP;AACD;;AAGD,SAASC,iBAAT,CACEvC,MADF,EAEEY,MAFF,EAGER,GAHF,EAIEkC,OAJF,EAKE;AACA,QAAMzB,IAAI,GAAGD,MAAM,CAACX,UAAP,CAAkBG,GAAlB,CAAb;AACA,MAAI,CAACS,IAAI,CAACL,KAAV,EAAiB;AAEjB,QAAMN,KAAK,GAAGF,MAAM,CAACwC,SAAP,CAAiBpC,GAAjB,CAAd;AACA,QAAMqC,MAAM,GAAGzC,MAAM,CAAC0C,eAAP,CAAuBtC,GAAvB,CAAf;AAGA,QAAMuC,OAAiB,GAAG,IAAI3B,KAAJ,CAAUd,KAAK,CAAC8B,SAAN,GAAkB,CAA5B,EAA+BY,IAA/B,CAAoC,CAApC,CAA1B;AACA,MAAIC,MAAM,GAAG,CAAb;;AACA,OAAK,IAAIf,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGjB,IAAI,CAACL,KAAzB,EAAgCsB,CAAC,EAAjC,EAAqC;AACnC,UAAMT,MAAM,GAAGR,IAAI,CAACR,OAAL,CAAayB,CAAb,CAAf;AACA,UAAMV,MAAM,GAAGP,IAAI,CAACP,OAAL,CAAawB,CAAb,CAAf;AACAa,IAAAA,OAAO,CAACvB,MAAD,CAAP;AACAuB,IAAAA,OAAO,CAACC,IAAR,CAAa,CAAb,EAAgBxB,MAAM,GAAG,CAAzB;AAEA,QAAI0B,MAAM,GAAG,CAAb;AACA,QAAInC,MAAM,GAAG2B,OAAO,CAACK,OAAO,CAACG,MAAM,EAAP,CAAP,GAAoB,CAArB,CAApB;;AAGA,SAAK,MAAMC,IAAX,IAAmBN,MAAnB,EAA2B;AACzB,UAAIM,IAAI,KAAK7C,KAAb,EAAoB;AACpB,UAAImB,MAAM,GAAG0B,IAAI,CAACd,SAAlB,EAA6B;;AAC7B,UAAIc,IAAI,CAACpB,cAAL,KAAwB,UAA5B,EAAwC;AACtC,YAAI,EAAEoB,IAAI,CAACzB,IAAL,IAAaX,MAAf,CAAJ,EAA4B;AAE1BA,UAAAA,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAN,GAAoB,EAApB;AACD;;AACD,cAAM0B,EAAE,GAAGL,OAAO,CAACG,MAAM,EAAP,CAAlB;;AACA,eAAOnC,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAN,CAAkBG,MAAlB,IAA4BuB,EAAnC,EAAuC;AAErCrC,UAAAA,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAN,CAAkBJ,IAAlB,CAAuB,EAAvB;AACD;;AACDP,QAAAA,MAAM,GAAGA,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAN,CAAkB0B,EAAlB,CAAT;AACD,OAXD,MAWO;AACLrC,QAAAA,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAN,GAAoBX,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAN,IAAqB,EAAzC;AACAX,QAAAA,MAAM,GAAGA,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAf;AACD;AACF;;AAGD,QAAID,MAAM,KAAKnB,KAAK,CAAC+B,SAArB,EAAgC;AAC9B,YAAMgB,KAAK,GAAGnD,KAAK,CAACoD,aAAN,CAEZhD,KAAK,CAACiC,YAAN,IAAsBjC,KAAK,CAACkC,aAFhB,EAGZvB,IAAI,CAACN,MAAL,CAAYsC,MAAZ,CAHY,CAAd;AAKAA,MAAAA,MAAM;;AACN,UAAI3C,KAAK,CAACyB,cAAN,KAAyB,UAA7B,EAAyC;AACvC,YAAI,EAAEzB,KAAK,CAACoB,IAAN,IAAcX,MAAhB,CAAJ,EAA6B;AAE3BA,UAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,GAAqB,EAArB;AACD;;AACD,cAAM0B,EAAE,GAAGL,OAAO,CAACG,MAAD,CAAlB;;AACA,eAAOnC,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmBG,MAAnB,IAA6BuB,EAApC,EAAwC;AAEtCrC,UAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmBJ,IAAnB,CAAwB,IAAxB;AACD;;AACDP,QAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmB0B,EAAnB,IAAyBC,KAAzB;AACD,OAXD,MAWO;AACLtC,QAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,GAAqB2B,KAArB;AACD;AACF;AACF;AACF","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {ParquetBuffer, ParquetData, ParquetField, ParquetRecord} from './declare';\nimport {ParquetSchema} from './schema';\nimport * as Types from './types';\n\nexport {ParquetBuffer};\n\nexport function shredBuffer(schema: ParquetSchema): ParquetBuffer {\n const columnData: Record<string, ParquetData> = {};\n for (const field of schema.fieldList) {\n columnData[field.key] = {\n dlevels: [],\n rlevels: [],\n values: [],\n count: 0\n };\n }\n return {rowCount: 0, columnData};\n}\n\n/**\n * 'Shred' a record into a list of <value, repetition_level, definition_level>\n * tuples per column using the Google Dremel Algorithm..\n *\n * The buffer argument must point to an object into which the shredded record\n * will be returned. You may re-use the buffer for repeated calls to this function\n * to append to an existing buffer, as long as the schema is unchanged.\n *\n * The format in which the shredded records will be stored in the buffer is as\n * follows:\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void {\n /* shred the record, this may raise an exception */\n const data = shredBuffer(schema).columnData;\n\n shredRecordFields(schema.fields, record, data, 0, 0);\n\n /* if no error during shredding, add the shredded record to the buffer */\n if (buffer.rowCount === 0) {\n buffer.rowCount = 1;\n buffer.columnData = data;\n return;\n }\n buffer.rowCount += 1;\n for (const field of schema.fieldList) {\n Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);\n buffer.columnData[field.key].count += data[field.key].count;\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction shredRecordFields(\n fields: Record<string, ParquetField>,\n record: any,\n data: Record<string, ParquetData>,\n rLevel: number,\n dLevel: number\n) {\n for (const name in fields) {\n const field = fields[name];\n\n // fetch values\n let values: any[] = [];\n if (\n record &&\n field.name in record &&\n record[field.name] !== undefined &&\n record[field.name] !== null\n ) {\n if (record[field.name].constructor === Array) {\n values = record[field.name];\n } else {\n values.push(record[field.name]);\n }\n }\n // check values\n if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {\n throw new Error(`missing required field: ${field.name}`);\n }\n if (values.length > 1 && field.repetitionType !== 'REPEATED') {\n throw new Error(`too many values for field: ${field.name}`);\n }\n\n // push null\n if (values.length === 0) {\n if (field.isNested) {\n shredRecordFields(field.fields!, null, data, rLevel, dLevel);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rLevel);\n data[field.key].dlevels.push(dLevel);\n }\n continue; // eslint-disable-line no-continue\n }\n\n // push values\n for (let i = 0; i < values.length; i++) {\n const rlvl = i === 0 ? rLevel : field.rLevelMax;\n if (field.isNested) {\n shredRecordFields(field.fields!, values[i], data, rlvl, field.dLevelMax);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rlvl);\n data[field.key].dlevels.push(field.dLevelMax);\n data[field.key].values.push(\n Types.toPrimitive((field.originalType || field.primitiveType)!, values[i])\n );\n }\n }\n }\n}\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The buffer argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[] {\n const records: ParquetRecord[] = [];\n for (let i = 0; i < buffer.rowCount; i++) records.push({});\n for (const key in buffer.columnData) {\n materializeColumn(schema, buffer, key, records);\n }\n return records;\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction materializeColumn(\n schema: ParquetSchema,\n buffer: ParquetBuffer,\n key: string,\n records: ParquetRecord[]\n) {\n const data = buffer.columnData[key];\n if (!data.count) return;\n\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n for (let i = 0; i < data.count; i++) {\n const dLevel = data.dlevels[i];\n const rLevel = data.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = records[rLevels[rIndex++] - 1];\n\n // Internal nodes\n for (const step of branch) {\n if (step === field) break;\n if (dLevel < step.dLevelMax) break;\n if (step.repetitionType === 'REPEATED') {\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n } else {\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n data.values[vIndex]\n );\n vIndex++;\n if (field.repetitionType === 'REPEATED') {\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n } else {\n record[field.name] = value;\n }\n }\n }\n}\n"],"file":"shred.js"}
|
|
1
|
+
{"version":3,"sources":["../../../../src/parquetjs/schema/shred.ts"],"names":["ParquetBuffer","Types","shredBuffer","schema","columnData","field","fieldList","key","dlevels","rlevels","values","count","rowCount","shredRecord","record","buffer","data","shredRecordFields","fields","Array","prototype","push","apply","rLevel","dLevel","name","undefined","constructor","length","Boolean","repetitionType","Error","isNested","i","rlvl","rLevelMax","dLevelMax","toPrimitive","originalType","primitiveType","materializeRecords","records","materializeColumn","findField","branch","findFieldBranch","rLevels","fill","vIndex","rIndex","step","ix","value","fromPrimitive"],"mappings":"AAEA,SAAQA,aAAR,QAAsE,WAAtE;AAEA,OAAO,KAAKC,KAAZ,MAAuB,SAAvB;AAEA,SAAQD,aAAR;AAEA,OAAO,SAASE,WAAT,CAAqBC,MAArB,EAA2D;AAChE,QAAMC,UAAuC,GAAG,EAAhD;;AACA,OAAK,MAAMC,KAAX,IAAoBF,MAAM,CAACG,SAA3B,EAAsC;AACpCF,IAAAA,UAAU,CAACC,KAAK,CAACE,GAAP,CAAV,GAAwB;AACtBC,MAAAA,OAAO,EAAE,EADa;AAEtBC,MAAAA,OAAO,EAAE,EAFa;AAGtBC,MAAAA,MAAM,EAAE,EAHc;AAItBC,MAAAA,KAAK,EAAE;AAJe,KAAxB;AAMD;;AACD,SAAO;AAACC,IAAAA,QAAQ,EAAE,CAAX;AAAcR,IAAAA;AAAd,GAAP;AACD;AAwBD,OAAO,SAASS,WAAT,CAAqBV,MAArB,EAA4CW,MAA5C,EAAyDC,MAAzD,EAAsF;AAE3F,QAAMC,IAAI,GAAGd,WAAW,CAACC,MAAD,CAAX,CAAoBC,UAAjC;AAEAa,EAAAA,iBAAiB,CAACd,MAAM,CAACe,MAAR,EAAgBJ,MAAhB,EAAwBE,IAAxB,EAA8B,CAA9B,EAAiC,CAAjC,CAAjB;;AAGA,MAAID,MAAM,CAACH,QAAP,KAAoB,CAAxB,EAA2B;AACzBG,IAAAA,MAAM,CAACH,QAAP,GAAkB,CAAlB;AACAG,IAAAA,MAAM,CAACX,UAAP,GAAoBY,IAApB;AACA;AACD;;AACDD,EAAAA,MAAM,CAACH,QAAP,IAAmB,CAAnB;;AACA,OAAK,MAAMP,KAAX,IAAoBF,MAAM,CAACG,SAA3B,EAAsC;AACpCa,IAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACX,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BE,OAAxD,EAAiEO,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBE,OAAjF;AACAU,IAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACX,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BC,OAAxD,EAAiEQ,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBC,OAAjF;AACAW,IAAAA,KAAK,CAACC,SAAN,CAAgBC,IAAhB,CAAqBC,KAArB,CAA2BP,MAAM,CAACX,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BG,MAAxD,EAAgEM,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBG,MAAhF;AACAK,IAAAA,MAAM,CAACX,UAAP,CAAkBC,KAAK,CAACE,GAAxB,EAA6BI,KAA7B,IAAsCK,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBI,KAAtD;AACD;AACF;;AAGD,SAASM,iBAAT,CACEC,MADF,EAEEJ,MAFF,EAGEE,IAHF,EAIEO,MAJF,EAKEC,MALF,EAME;AACA,OAAK,MAAMC,IAAX,IAAmBP,MAAnB,EAA2B;AACzB,UAAMb,KAAK,GAAGa,MAAM,CAACO,IAAD,CAApB;AAGA,QAAIf,MAAa,GAAG,EAApB;;AACA,QACEI,MAAM,IACNT,KAAK,CAACoB,IAAN,IAAcX,MADd,IAEAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,KAAuBC,SAFvB,IAGAZ,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,KAAuB,IAJzB,EAKE;AACA,UAAIX,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmBE,WAAnB,KAAmCR,KAAvC,EAA8C;AAC5CT,QAAAA,MAAM,GAAGI,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAf;AACD,OAFD,MAEO;AACLf,QAAAA,MAAM,CAACW,IAAP,CAAYP,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAlB;AACD;AACF;;AAED,QAAIf,MAAM,CAACkB,MAAP,KAAkB,CAAlB,IAAuBC,OAAO,CAACf,MAAD,CAA9B,IAA0CT,KAAK,CAACyB,cAAN,KAAyB,UAAvE,EAAmF;AACjF,YAAM,IAAIC,KAAJ,mCAAqC1B,KAAK,CAACoB,IAA3C,EAAN;AACD;;AACD,QAAIf,MAAM,CAACkB,MAAP,GAAgB,CAAhB,IAAqBvB,KAAK,CAACyB,cAAN,KAAyB,UAAlD,EAA8D;AAC5D,YAAM,IAAIC,KAAJ,sCAAwC1B,KAAK,CAACoB,IAA9C,EAAN;AACD;;AAGD,QAAIf,MAAM,CAACkB,MAAP,KAAkB,CAAtB,EAAyB;AACvB,UAAIvB,KAAK,CAAC2B,QAAV,EAAoB;AAClBf,QAAAA,iBAAiB,CAACZ,KAAK,CAACa,MAAP,EAAgB,IAAhB,EAAsBF,IAAtB,EAA4BO,MAA5B,EAAoCC,MAApC,CAAjB;AACD,OAFD,MAEO;AACLR,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBI,KAAhB,IAAyB,CAAzB;AACAK,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBE,OAAhB,CAAwBY,IAAxB,CAA6BE,MAA7B;AACAP,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBC,OAAhB,CAAwBa,IAAxB,CAA6BG,MAA7B;AACD;;AACD;AACD;;AAGD,SAAK,IAAIS,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGvB,MAAM,CAACkB,MAA3B,EAAmCK,CAAC,EAApC,EAAwC;AACtC,YAAMC,IAAI,GAAGD,CAAC,KAAK,CAAN,GAAUV,MAAV,GAAmBlB,KAAK,CAAC8B,SAAtC;;AACA,UAAI9B,KAAK,CAAC2B,QAAV,EAAoB;AAClBf,QAAAA,iBAAiB,CAACZ,KAAK,CAACa,MAAP,EAAgBR,MAAM,CAACuB,CAAD,CAAtB,EAA2BjB,IAA3B,EAAiCkB,IAAjC,EAAuC7B,KAAK,CAAC+B,SAA7C,CAAjB;AACD,OAFD,MAEO;AACLpB,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBI,KAAhB,IAAyB,CAAzB;AACAK,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBE,OAAhB,CAAwBY,IAAxB,CAA6Ba,IAA7B;AACAlB,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBC,OAAhB,CAAwBa,IAAxB,CAA6BhB,KAAK,CAAC+B,SAAnC;AACApB,QAAAA,IAAI,CAACX,KAAK,CAACE,GAAP,CAAJ,CAAgBG,MAAhB,CAAuBW,IAAvB,CACEpB,KAAK,CAACoC,WAAN,CAAmBhC,KAAK,CAACiC,YAAN,IAAsBjC,KAAK,CAACkC,aAA/C,EAAgE7B,MAAM,CAACuB,CAAD,CAAtE,CADF;AAGD;AACF;AACF;AACF;;AAqBD,OAAO,SAASO,kBAAT,CAA4BrC,MAA5B,EAAmDY,MAAnD,EAA2F;AAChG,QAAM0B,OAAwB,GAAG,EAAjC;;AACA,OAAK,IAAIR,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGlB,MAAM,CAACH,QAA3B,EAAqCqB,CAAC,EAAtC,EAA0CQ,OAAO,CAACpB,IAAR,CAAa,EAAb;;AAC1C,OAAK,MAAMd,GAAX,IAAkBQ,MAAM,CAACX,UAAzB,EAAqC;AACnCsC,IAAAA,iBAAiB,CAACvC,MAAD,EAASY,MAAT,EAAiBR,GAAjB,EAAsBkC,OAAtB,CAAjB;AACD;;AACD,SAAOA,OAAP;AACD;;AAGD,SAASC,iBAAT,CACEvC,MADF,EAEEY,MAFF,EAGER,GAHF,EAIEkC,OAJF,EAKE;AACA,QAAMzB,IAAI,GAAGD,MAAM,CAACX,UAAP,CAAkBG,GAAlB,CAAb;AACA,MAAI,CAACS,IAAI,CAACL,KAAV,EAAiB;AAEjB,QAAMN,KAAK,GAAGF,MAAM,CAACwC,SAAP,CAAiBpC,GAAjB,CAAd;AACA,QAAMqC,MAAM,GAAGzC,MAAM,CAAC0C,eAAP,CAAuBtC,GAAvB,CAAf;AAGA,QAAMuC,OAAiB,GAAG,IAAI3B,KAAJ,CAAUd,KAAK,CAAC8B,SAAN,GAAkB,CAA5B,EAA+BY,IAA/B,CAAoC,CAApC,CAA1B;AACA,MAAIC,MAAM,GAAG,CAAb;;AACA,OAAK,IAAIf,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGjB,IAAI,CAACL,KAAzB,EAAgCsB,CAAC,EAAjC,EAAqC;AACnC,UAAMT,MAAM,GAAGR,IAAI,CAACR,OAAL,CAAayB,CAAb,CAAf;AACA,UAAMV,MAAM,GAAGP,IAAI,CAACP,OAAL,CAAawB,CAAb,CAAf;AACAa,IAAAA,OAAO,CAACvB,MAAD,CAAP;AACAuB,IAAAA,OAAO,CAACC,IAAR,CAAa,CAAb,EAAgBxB,MAAM,GAAG,CAAzB;AAEA,QAAI0B,MAAM,GAAG,CAAb;AACA,QAAInC,MAAM,GAAG2B,OAAO,CAACK,OAAO,CAACG,MAAM,EAAP,CAAP,GAAoB,CAArB,CAApB;;AAGA,SAAK,MAAMC,IAAX,IAAmBN,MAAnB,EAA2B;AACzB,UAAIM,IAAI,KAAK7C,KAAb,EAAoB;AACpB,UAAImB,MAAM,GAAG0B,IAAI,CAACd,SAAlB,EAA6B;;AAC7B,UAAIc,IAAI,CAACpB,cAAL,KAAwB,UAA5B,EAAwC;AACtC,YAAI,EAAEoB,IAAI,CAACzB,IAAL,IAAaX,MAAf,CAAJ,EAA4B;AAE1BA,UAAAA,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAN,GAAoB,EAApB;AACD;;AACD,cAAM0B,EAAE,GAAGL,OAAO,CAACG,MAAM,EAAP,CAAlB;;AACA,eAAOnC,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAN,CAAkBG,MAAlB,IAA4BuB,EAAnC,EAAuC;AAErCrC,UAAAA,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAN,CAAkBJ,IAAlB,CAAuB,EAAvB;AACD;;AACDP,QAAAA,MAAM,GAAGA,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAN,CAAkB0B,EAAlB,CAAT;AACD,OAXD,MAWO;AACLrC,QAAAA,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAN,GAAoBX,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAN,IAAqB,EAAzC;AACAX,QAAAA,MAAM,GAAGA,MAAM,CAACoC,IAAI,CAACzB,IAAN,CAAf;AACD;AACF;;AAGD,QAAID,MAAM,KAAKnB,KAAK,CAAC+B,SAArB,EAAgC;AAC9B,YAAMgB,KAAK,GAAGnD,KAAK,CAACoD,aAAN,CAEZhD,KAAK,CAACiC,YAAN,IAAsBjC,KAAK,CAACkC,aAFhB,EAGZvB,IAAI,CAACN,MAAL,CAAYsC,MAAZ,CAHY,CAAd;AAKAA,MAAAA,MAAM;;AACN,UAAI3C,KAAK,CAACyB,cAAN,KAAyB,UAA7B,EAAyC;AACvC,YAAI,EAAEzB,KAAK,CAACoB,IAAN,IAAcX,MAAhB,CAAJ,EAA6B;AAE3BA,UAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,GAAqB,EAArB;AACD;;AACD,cAAM0B,EAAE,GAAGL,OAAO,CAACG,MAAD,CAAlB;;AACA,eAAOnC,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmBG,MAAnB,IAA6BuB,EAApC,EAAwC;AAEtCrC,UAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmBJ,IAAnB,CAAwB,IAAxB;AACD;;AACDP,QAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,CAAmB0B,EAAnB,IAAyBC,KAAzB;AACD,OAXD,MAWO;AACLtC,QAAAA,MAAM,CAACT,KAAK,CAACoB,IAAP,CAAN,GAAqB2B,KAArB;AACD;AACF;AACF;AACF","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport {ParquetBuffer, ParquetData, ParquetField, ParquetRecord} from './declare';\nimport {ParquetSchema} from './schema';\nimport * as Types from './types';\n\nexport {ParquetBuffer};\n\nexport function shredBuffer(schema: ParquetSchema): ParquetBuffer {\n const columnData: Record<string, ParquetData> = {};\n for (const field of schema.fieldList) {\n columnData[field.key] = {\n dlevels: [],\n rlevels: [],\n values: [],\n count: 0\n };\n }\n return {rowCount: 0, columnData};\n}\n\n/**\n * 'Shred' a record into a list of <value, repetition_level, definition_level>\n * tuples per column using the Google Dremel Algorithm..\n *\n * The buffer argument must point to an object into which the shredded record\n * will be returned. You may re-use the buffer for repeated calls to this function\n * to append to an existing buffer, as long as the schema is unchanged.\n *\n * The format in which the shredded records will be stored in the buffer is as\n * follows:\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function shredRecord(schema: ParquetSchema, record: any, buffer: ParquetBuffer): void {\n /* shred the record, this may raise an exception */\n const data = shredBuffer(schema).columnData;\n\n shredRecordFields(schema.fields, record, data, 0, 0);\n\n /* if no error during shredding, add the shredded record to the buffer */\n if (buffer.rowCount === 0) {\n buffer.rowCount = 1;\n buffer.columnData = data;\n return;\n }\n buffer.rowCount += 1;\n for (const field of schema.fieldList) {\n Array.prototype.push.apply(buffer.columnData[field.key].rlevels, data[field.key].rlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].dlevels, data[field.key].dlevels);\n Array.prototype.push.apply(buffer.columnData[field.key].values, data[field.key].values);\n buffer.columnData[field.key].count += data[field.key].count;\n }\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction shredRecordFields(\n fields: Record<string, ParquetField>,\n record: any,\n data: Record<string, ParquetData>,\n rLevel: number,\n dLevel: number\n) {\n for (const name in fields) {\n const field = fields[name];\n\n // fetch values\n let values: any[] = [];\n if (\n record &&\n field.name in record &&\n record[field.name] !== undefined &&\n record[field.name] !== null\n ) {\n if (record[field.name].constructor === Array) {\n values = record[field.name];\n } else {\n values.push(record[field.name]);\n }\n }\n // check values\n if (values.length === 0 && Boolean(record) && field.repetitionType === 'REQUIRED') {\n throw new Error(`missing required field: ${field.name}`);\n }\n if (values.length > 1 && field.repetitionType !== 'REPEATED') {\n throw new Error(`too many values for field: ${field.name}`);\n }\n\n // push null\n if (values.length === 0) {\n if (field.isNested) {\n shredRecordFields(field.fields!, null, data, rLevel, dLevel);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rLevel);\n data[field.key].dlevels.push(dLevel);\n }\n continue; // eslint-disable-line no-continue\n }\n\n // push values\n for (let i = 0; i < values.length; i++) {\n const rlvl = i === 0 ? rLevel : field.rLevelMax;\n if (field.isNested) {\n shredRecordFields(field.fields!, values[i], data, rlvl, field.dLevelMax);\n } else {\n data[field.key].count += 1;\n data[field.key].rlevels.push(rlvl);\n data[field.key].dlevels.push(field.dLevelMax);\n data[field.key].values.push(\n Types.toPrimitive((field.originalType || field.primitiveType)!, values[i])\n );\n }\n }\n }\n}\n\n/**\n * 'Materialize' a list of <value, repetition_level, definition_level>\n * tuples back to nested records (objects/arrays) using the Google Dremel\n * Algorithm..\n *\n * The buffer argument must point to an object with the following structure (i.e.\n * the same structure that is returned by shredRecords):\n *\n * buffer = {\n * columnData: [\n * 'my_col': {\n * dlevels: [d1, d2, .. dN],\n * rlevels: [r1, r2, .. rN],\n * values: [v1, v2, .. vN],\n * }, ...\n * ],\n * rowCount: X,\n * }\n */\nexport function materializeRecords(schema: ParquetSchema, buffer: ParquetBuffer): ParquetRecord[] {\n const records: ParquetRecord[] = [];\n for (let i = 0; i < buffer.rowCount; i++) records.push({});\n for (const key in buffer.columnData) {\n materializeColumn(schema, buffer, key, records);\n }\n return records;\n}\n\n// eslint-disable-next-line max-statements, complexity\nfunction materializeColumn(\n schema: ParquetSchema,\n buffer: ParquetBuffer,\n key: string,\n records: ParquetRecord[]\n) {\n const data = buffer.columnData[key];\n if (!data.count) return;\n\n const field = schema.findField(key);\n const branch = schema.findFieldBranch(key);\n\n // tslint:disable-next-line:prefer-array-literal\n const rLevels: number[] = new Array(field.rLevelMax + 1).fill(0);\n let vIndex = 0;\n for (let i = 0; i < data.count; i++) {\n const dLevel = data.dlevels[i];\n const rLevel = data.rlevels[i];\n rLevels[rLevel]++;\n rLevels.fill(0, rLevel + 1);\n\n let rIndex = 0;\n let record = records[rLevels[rIndex++] - 1];\n\n // Internal nodes\n for (const step of branch) {\n if (step === field) break;\n if (dLevel < step.dLevelMax) break;\n if (step.repetitionType === 'REPEATED') {\n if (!(step.name in record)) {\n // eslint-disable max-depth\n record[step.name] = [];\n }\n const ix = rLevels[rIndex++];\n while (record[step.name].length <= ix) {\n // eslint-disable max-depth\n record[step.name].push({});\n }\n record = record[step.name][ix];\n } else {\n record[step.name] = record[step.name] || {};\n record = record[step.name];\n }\n }\n\n // Leaf node\n if (dLevel === field.dLevelMax) {\n const value = Types.fromPrimitive(\n // @ts-ignore\n field.originalType || field.primitiveType,\n data.values[vIndex]\n );\n vIndex++;\n if (field.repetitionType === 'REPEATED') {\n if (!(field.name in record)) {\n // eslint-disable max-depth\n record[field.name] = [];\n }\n const ix = rLevels[rIndex];\n while (record[field.name].length <= ix) {\n // eslint-disable max-depth\n record[field.name].push(null);\n }\n record[field.name][ix] = value;\n } else {\n record[field.name] = value;\n }\n }\n }\n}\n"],"file":"shred.js"}
|
|
@@ -129,14 +129,14 @@ export const PARQUET_LOGICAL_TYPES = {
|
|
|
129
129
|
};
|
|
130
130
|
export function toPrimitive(type, value) {
|
|
131
131
|
if (!(type in PARQUET_LOGICAL_TYPES)) {
|
|
132
|
-
throw new Error(
|
|
132
|
+
throw new Error("invalid type: ".concat(type));
|
|
133
133
|
}
|
|
134
134
|
|
|
135
135
|
return PARQUET_LOGICAL_TYPES[type].toPrimitive(value);
|
|
136
136
|
}
|
|
137
137
|
export function fromPrimitive(type, value) {
|
|
138
138
|
if (!(type in PARQUET_LOGICAL_TYPES)) {
|
|
139
|
-
throw new Error(
|
|
139
|
+
throw new Error("invalid type: ".concat(type));
|
|
140
140
|
}
|
|
141
141
|
|
|
142
142
|
if ('fromPrimitive' in PARQUET_LOGICAL_TYPES[type]) {
|
|
@@ -160,7 +160,7 @@ function toPrimitive_FLOAT(value) {
|
|
|
160
160
|
const v = parseFloat(value);
|
|
161
161
|
|
|
162
162
|
if (isNaN(v)) {
|
|
163
|
-
throw new Error(
|
|
163
|
+
throw new Error("invalid value for FLOAT: ".concat(value));
|
|
164
164
|
}
|
|
165
165
|
|
|
166
166
|
return v;
|
|
@@ -170,7 +170,7 @@ function toPrimitive_DOUBLE(value) {
|
|
|
170
170
|
const v = parseFloat(value);
|
|
171
171
|
|
|
172
172
|
if (isNaN(v)) {
|
|
173
|
-
throw new Error(
|
|
173
|
+
throw new Error("invalid value for DOUBLE: ".concat(value));
|
|
174
174
|
}
|
|
175
175
|
|
|
176
176
|
return v;
|
|
@@ -180,7 +180,7 @@ function toPrimitive_INT8(value) {
|
|
|
180
180
|
const v = parseInt(value, 10);
|
|
181
181
|
|
|
182
182
|
if (v < -0x80 || v > 0x7f || isNaN(v)) {
|
|
183
|
-
throw new Error(
|
|
183
|
+
throw new Error("invalid value for INT8: ".concat(value));
|
|
184
184
|
}
|
|
185
185
|
|
|
186
186
|
return v;
|
|
@@ -190,7 +190,7 @@ function toPrimitive_UINT8(value) {
|
|
|
190
190
|
const v = parseInt(value, 10);
|
|
191
191
|
|
|
192
192
|
if (v < 0 || v > 0xff || isNaN(v)) {
|
|
193
|
-
throw new Error(
|
|
193
|
+
throw new Error("invalid value for UINT8: ".concat(value));
|
|
194
194
|
}
|
|
195
195
|
|
|
196
196
|
return v;
|
|
@@ -200,7 +200,7 @@ function toPrimitive_INT16(value) {
|
|
|
200
200
|
const v = parseInt(value, 10);
|
|
201
201
|
|
|
202
202
|
if (v < -0x8000 || v > 0x7fff || isNaN(v)) {
|
|
203
|
-
throw new Error(
|
|
203
|
+
throw new Error("invalid value for INT16: ".concat(value));
|
|
204
204
|
}
|
|
205
205
|
|
|
206
206
|
return v;
|
|
@@ -210,7 +210,7 @@ function toPrimitive_UINT16(value) {
|
|
|
210
210
|
const v = parseInt(value, 10);
|
|
211
211
|
|
|
212
212
|
if (v < 0 || v > 0xffff || isNaN(v)) {
|
|
213
|
-
throw new Error(
|
|
213
|
+
throw new Error("invalid value for UINT16: ".concat(value));
|
|
214
214
|
}
|
|
215
215
|
|
|
216
216
|
return v;
|
|
@@ -220,7 +220,7 @@ function toPrimitive_INT32(value) {
|
|
|
220
220
|
const v = parseInt(value, 10);
|
|
221
221
|
|
|
222
222
|
if (v < -0x80000000 || v > 0x7fffffff || isNaN(v)) {
|
|
223
|
-
throw new Error(
|
|
223
|
+
throw new Error("invalid value for INT32: ".concat(value));
|
|
224
224
|
}
|
|
225
225
|
|
|
226
226
|
return v;
|
|
@@ -230,7 +230,7 @@ function toPrimitive_UINT32(value) {
|
|
|
230
230
|
const v = parseInt(value, 10);
|
|
231
231
|
|
|
232
232
|
if (v < 0 || v > 0xffffffffffff || isNaN(v)) {
|
|
233
|
-
throw new Error(
|
|
233
|
+
throw new Error("invalid value for UINT32: ".concat(value));
|
|
234
234
|
}
|
|
235
235
|
|
|
236
236
|
return v;
|
|
@@ -240,7 +240,7 @@ function toPrimitive_INT64(value) {
|
|
|
240
240
|
const v = parseInt(value, 10);
|
|
241
241
|
|
|
242
242
|
if (isNaN(v)) {
|
|
243
|
-
throw new Error(
|
|
243
|
+
throw new Error("invalid value for INT64: ".concat(value));
|
|
244
244
|
}
|
|
245
245
|
|
|
246
246
|
return v;
|
|
@@ -250,7 +250,7 @@ function toPrimitive_UINT64(value) {
|
|
|
250
250
|
const v = parseInt(value, 10);
|
|
251
251
|
|
|
252
252
|
if (v < 0 || isNaN(v)) {
|
|
253
|
-
throw new Error(
|
|
253
|
+
throw new Error("invalid value for UINT64: ".concat(value));
|
|
254
254
|
}
|
|
255
255
|
|
|
256
256
|
return v;
|
|
@@ -260,7 +260,7 @@ function toPrimitive_INT96(value) {
|
|
|
260
260
|
const v = parseInt(value, 10);
|
|
261
261
|
|
|
262
262
|
if (isNaN(v)) {
|
|
263
|
-
throw new Error(
|
|
263
|
+
throw new Error("invalid value for INT96: ".concat(value));
|
|
264
264
|
}
|
|
265
265
|
|
|
266
266
|
return v;
|
|
@@ -298,7 +298,7 @@ function toPrimitive_TIME_MILLIS(value) {
|
|
|
298
298
|
const v = parseInt(value, 10);
|
|
299
299
|
|
|
300
300
|
if (v < 0 || v > 0xffffffffffffffff || isNaN(v)) {
|
|
301
|
-
throw new Error(
|
|
301
|
+
throw new Error("invalid value for TIME_MILLIS: ".concat(value));
|
|
302
302
|
}
|
|
303
303
|
|
|
304
304
|
return v;
|
|
@@ -308,7 +308,7 @@ function toPrimitive_TIME_MICROS(value) {
|
|
|
308
308
|
const v = parseInt(value, 10);
|
|
309
309
|
|
|
310
310
|
if (v < 0 || isNaN(v)) {
|
|
311
|
-
throw new Error(
|
|
311
|
+
throw new Error("invalid value for TIME_MICROS: ".concat(value));
|
|
312
312
|
}
|
|
313
313
|
|
|
314
314
|
return v;
|
|
@@ -325,7 +325,7 @@ function toPrimitive_DATE(value) {
|
|
|
325
325
|
const v = parseInt(value, 10);
|
|
326
326
|
|
|
327
327
|
if (v < 0 || isNaN(v)) {
|
|
328
|
-
throw new Error(
|
|
328
|
+
throw new Error("invalid value for DATE: ".concat(value));
|
|
329
329
|
}
|
|
330
330
|
|
|
331
331
|
return v;
|
|
@@ -345,7 +345,7 @@ function toPrimitive_TIMESTAMP_MILLIS(value) {
|
|
|
345
345
|
const v = parseInt(value, 10);
|
|
346
346
|
|
|
347
347
|
if (v < 0 || isNaN(v)) {
|
|
348
|
-
throw new Error(
|
|
348
|
+
throw new Error("invalid value for TIMESTAMP_MILLIS: ".concat(value));
|
|
349
349
|
}
|
|
350
350
|
|
|
351
351
|
return v;
|
|
@@ -365,7 +365,7 @@ function toPrimitive_TIMESTAMP_MICROS(value) {
|
|
|
365
365
|
const v = parseInt(value, 10);
|
|
366
366
|
|
|
367
367
|
if (v < 0 || isNaN(v)) {
|
|
368
|
-
throw new Error(
|
|
368
|
+
throw new Error("invalid value for TIMESTAMP_MICROS: ".concat(value));
|
|
369
369
|
}
|
|
370
370
|
|
|
371
371
|
return v;
|