@loaders.gl/parquet 3.4.0-alpha.1 → 3.4.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +19 -19
- package/dist/dist.min.js.map +3 -3
- package/dist/es5/index.js +49 -8
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
- package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js +20 -0
- package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js +98 -0
- package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
- package/dist/es5/lib/{convert-schema.js → arrow/convert-schema-to-parquet.js} +5 -31
- package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
- package/dist/es5/lib/geo/decode-geo-metadata.js +82 -0
- package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
- package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
- package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js +177 -0
- package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
- package/dist/es5/lib/{parse-parquet.js → parsers/parse-parquet-to-rows.js} +51 -27
- package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
- package/dist/es5/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/dist/es5/parquet-loader.js +4 -2
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-wasm-loader.js +1 -1
- package/dist/es5/parquet-wasm-writer.js +1 -1
- package/dist/es5/parquet-writer.js +1 -1
- package/dist/es5/parquetjs/compression.js +15 -5
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/encoder/{writer.js → parquet-encoder.js} +70 -158
- package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js +553 -222
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/es5/parquetjs/schema/declare.js +3 -1
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +39 -33
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/utils/file-utils.js +2 -3
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/esm/index.js +13 -3
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
- package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js +10 -0
- package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
- package/dist/esm/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +32 -16
- package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js +40 -0
- package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
- package/dist/esm/lib/geo/decode-geo-metadata.js +64 -0
- package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
- package/dist/esm/lib/geo/geoparquet-schema.js +78 -0
- package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js +37 -0
- package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js +19 -0
- package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
- package/dist/esm/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/dist/esm/parquet-loader.js +4 -2
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-wasm-loader.js +1 -1
- package/dist/esm/parquet-wasm-writer.js +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquetjs/compression.js +10 -1
- package/dist/esm/parquetjs/compression.js.map +1 -1
- package/dist/esm/parquetjs/encoder/{writer.js → parquet-encoder.js} +7 -37
- package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-reader.js +158 -72
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
- package/dist/esm/parquetjs/schema/declare.js +1 -0
- package/dist/esm/parquetjs/schema/declare.js.map +1 -1
- package/dist/esm/parquetjs/schema/shred.js +42 -34
- package/dist/esm/parquetjs/schema/shred.js.map +1 -1
- package/dist/esm/parquetjs/schema/types.js.map +1 -1
- package/dist/esm/parquetjs/utils/file-utils.js +1 -1
- package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
- package/dist/index.d.ts +24 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +26 -9
- package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
- package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
- package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
- package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
- package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
- package/dist/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -18
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
- package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
- package/dist/lib/arrow/convert-schema-to-parquet.js +72 -0
- package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
- package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
- package/dist/lib/geo/decode-geo-metadata.js +73 -0
- package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
- package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
- package/dist/lib/geo/geoparquet-schema.js +69 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
- package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-columns.js +40 -0
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts +4 -0
- package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
- package/dist/lib/parsers/parse-parquet-to-rows.js +40 -0
- package/dist/parquet-loader.d.ts +2 -0
- package/dist/parquet-loader.d.ts.map +1 -1
- package/dist/parquet-loader.js +3 -1
- package/dist/parquet-worker.js +20 -20
- package/dist/parquet-worker.js.map +3 -3
- package/dist/parquetjs/compression.d.ts.map +1 -1
- package/dist/parquetjs/compression.js +16 -5
- package/dist/parquetjs/encoder/{writer.d.ts → parquet-encoder.d.ts} +10 -19
- package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
- package/dist/parquetjs/encoder/{writer.js → parquet-encoder.js} +39 -37
- package/dist/parquetjs/parser/parquet-reader.d.ts +47 -57
- package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
- package/dist/parquetjs/parser/parquet-reader.js +168 -102
- package/dist/parquetjs/schema/declare.d.ts +14 -7
- package/dist/parquetjs/schema/declare.d.ts.map +1 -1
- package/dist/parquetjs/schema/declare.js +2 -0
- package/dist/parquetjs/schema/shred.d.ts +115 -0
- package/dist/parquetjs/schema/shred.d.ts.map +1 -1
- package/dist/parquetjs/schema/shred.js +161 -43
- package/dist/parquetjs/schema/types.d.ts +2 -2
- package/dist/parquetjs/schema/types.d.ts.map +1 -1
- package/dist/parquetjs/utils/file-utils.d.ts +3 -4
- package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
- package/dist/parquetjs/utils/file-utils.js +2 -5
- package/package.json +7 -5
- package/src/index.ts +24 -4
- package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
- package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
- package/src/lib/{convert-schema.ts → arrow/convert-schema-from-parquet.ts} +41 -22
- package/src/lib/arrow/convert-schema-to-parquet.ts +102 -0
- package/src/lib/geo/decode-geo-metadata.ts +99 -0
- package/src/lib/geo/geoparquet-schema.ts +69 -0
- package/src/lib/parsers/parse-parquet-to-columns.ts +49 -0
- package/src/lib/parsers/parse-parquet-to-rows.ts +40 -0
- package/src/lib/wip/convert-schema-deep.java.disabled +910 -0
- package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
- package/src/parquet-loader.ts +5 -1
- package/src/parquetjs/compression.ts +14 -1
- package/src/parquetjs/encoder/{writer.ts → parquet-encoder.ts} +22 -28
- package/src/parquetjs/parser/parquet-reader.ts +239 -122
- package/src/parquetjs/schema/declare.ts +17 -9
- package/src/parquetjs/schema/shred.ts +157 -28
- package/src/parquetjs/schema/types.ts +21 -27
- package/src/parquetjs/utils/file-utils.ts +3 -4
- package/dist/es5/lib/convert-schema.js.map +0 -1
- package/dist/es5/lib/parse-parquet.js.map +0 -1
- package/dist/es5/lib/read-array-buffer.js +0 -43
- package/dist/es5/lib/read-array-buffer.js.map +0 -1
- package/dist/es5/parquetjs/encoder/writer.js.map +0 -1
- package/dist/es5/parquetjs/file.js +0 -94
- package/dist/es5/parquetjs/file.js.map +0 -1
- package/dist/es5/parquetjs/parser/parquet-cursor.js +0 -183
- package/dist/es5/parquetjs/parser/parquet-cursor.js.map +0 -1
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +0 -327
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js +0 -19
- package/dist/es5/parquetjs/utils/buffer-utils.js.map +0 -1
- package/dist/esm/lib/convert-schema.js.map +0 -1
- package/dist/esm/lib/parse-parquet.js +0 -25
- package/dist/esm/lib/parse-parquet.js.map +0 -1
- package/dist/esm/lib/read-array-buffer.js +0 -10
- package/dist/esm/lib/read-array-buffer.js.map +0 -1
- package/dist/esm/parquetjs/encoder/writer.js.map +0 -1
- package/dist/esm/parquetjs/file.js +0 -81
- package/dist/esm/parquetjs/file.js.map +0 -1
- package/dist/esm/parquetjs/parser/parquet-cursor.js +0 -78
- package/dist/esm/parquetjs/parser/parquet-cursor.js.map +0 -1
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +0 -129
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
- package/dist/esm/parquetjs/utils/buffer-utils.js +0 -13
- package/dist/esm/parquetjs/utils/buffer-utils.js.map +0 -1
- package/dist/lib/convert-schema.d.ts +0 -8
- package/dist/lib/convert-schema.d.ts.map +0 -1
- package/dist/lib/parse-parquet.d.ts +0 -4
- package/dist/lib/parse-parquet.d.ts.map +0 -1
- package/dist/lib/parse-parquet.js +0 -28
- package/dist/lib/read-array-buffer.d.ts +0 -19
- package/dist/lib/read-array-buffer.d.ts.map +0 -1
- package/dist/lib/read-array-buffer.js +0 -29
- package/dist/parquetjs/encoder/writer.d.ts.map +0 -1
- package/dist/parquetjs/file.d.ts +0 -10
- package/dist/parquetjs/file.d.ts.map +0 -1
- package/dist/parquetjs/file.js +0 -99
- package/dist/parquetjs/parser/parquet-cursor.d.ts +0 -36
- package/dist/parquetjs/parser/parquet-cursor.d.ts.map +0 -1
- package/dist/parquetjs/parser/parquet-cursor.js +0 -74
- package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +0 -40
- package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +0 -1
- package/dist/parquetjs/parser/parquet-envelope-reader.js +0 -136
- package/dist/parquetjs/utils/buffer-utils.d.ts +0 -10
- package/dist/parquetjs/utils/buffer-utils.d.ts.map +0 -1
- package/dist/parquetjs/utils/buffer-utils.js +0 -22
- package/src/lib/parse-parquet.ts +0 -27
- package/src/lib/read-array-buffer.ts +0 -31
- package/src/parquetjs/file.ts +0 -90
- package/src/parquetjs/parser/parquet-cursor.ts +0 -94
- package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -199
- package/src/parquetjs/utils/buffer-utils.ts +0 -18
|
@@ -4,11 +4,13 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
4
4
|
value: true
|
|
5
5
|
});
|
|
6
6
|
exports._typecheckParquetLoader = exports.ParquetLoader = void 0;
|
|
7
|
-
var VERSION = typeof "3.4.0-alpha.
|
|
7
|
+
var VERSION = typeof "3.4.0-alpha.2" !== 'undefined' ? "3.4.0-alpha.2" : 'latest';
|
|
8
8
|
var DEFAULT_PARQUET_LOADER_OPTIONS = {
|
|
9
9
|
parquet: {
|
|
10
10
|
type: 'object-row-table',
|
|
11
|
-
url: undefined
|
|
11
|
+
url: undefined,
|
|
12
|
+
columnList: [],
|
|
13
|
+
geoparquet: true
|
|
12
14
|
}
|
|
13
15
|
};
|
|
14
16
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"parquet-loader.js","names":["VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","parquet","type","url","undefined","ParquetLoader","name","id","module","version","worker","category","extensions","mimeTypes","binary","tests","options","_typecheckParquetLoader"],"sources":["../../src/parquet-loader.ts"],"sourcesContent":["import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';\n\n// __VERSION__ is injected by babel-plugin-version-inline\n// @ts-ignore TS2304: Cannot find name '__VERSION__'.\nconst VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';\n\nexport type ParquetLoaderOptions = LoaderOptions & {\n parquet?: {\n type?: 'object-row-table';\n url?: string;\n };\n};\n\nconst DEFAULT_PARQUET_LOADER_OPTIONS: ParquetLoaderOptions = {\n parquet: {\n type: 'object-row-table',\n url: undefined\n }\n};\n\n/** ParquetJS table loader */\nexport const ParquetLoader = {\n name: 'Apache Parquet',\n id: 'parquet',\n module: 'parquet',\n version: VERSION,\n worker: true,\n category: 'table',\n extensions: ['parquet'],\n mimeTypes: ['application/octet-stream'],\n binary: true,\n tests: ['PAR1', 'PARE'],\n options: DEFAULT_PARQUET_LOADER_OPTIONS\n};\n\nexport const _typecheckParquetLoader: Loader = ParquetLoader;\n"],"mappings":";;;;;;AAIA,IAAMA,OAAO,GAAG,sBAAkB,KAAK,WAAW,qBAAiB,QAAQ;
|
|
1
|
+
{"version":3,"file":"parquet-loader.js","names":["VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","parquet","type","url","undefined","columnList","geoparquet","ParquetLoader","name","id","module","version","worker","category","extensions","mimeTypes","binary","tests","options","_typecheckParquetLoader"],"sources":["../../src/parquet-loader.ts"],"sourcesContent":["import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';\n\n// __VERSION__ is injected by babel-plugin-version-inline\n// @ts-ignore TS2304: Cannot find name '__VERSION__'.\nconst VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';\n\nexport type ParquetLoaderOptions = LoaderOptions & {\n parquet?: {\n type?: 'object-row-table';\n url?: string;\n columnList?: string[] | string[][];\n geoparquet?: boolean;\n };\n};\n\nconst DEFAULT_PARQUET_LOADER_OPTIONS: ParquetLoaderOptions = {\n parquet: {\n type: 'object-row-table',\n url: undefined,\n columnList: [],\n geoparquet: true\n }\n};\n\n/** ParquetJS table loader */\nexport const ParquetLoader = {\n name: 'Apache Parquet',\n id: 'parquet',\n module: 'parquet',\n version: VERSION,\n worker: true,\n category: 'table',\n extensions: ['parquet'],\n mimeTypes: ['application/octet-stream'],\n binary: true,\n tests: ['PAR1', 'PARE'],\n options: DEFAULT_PARQUET_LOADER_OPTIONS\n};\n\nexport const _typecheckParquetLoader: Loader = ParquetLoader;\n"],"mappings":";;;;;;AAIA,IAAMA,OAAO,GAAG,sBAAkB,KAAK,WAAW,qBAAiB,QAAQ;AAW3E,IAAMC,8BAAoD,GAAG;EAC3DC,OAAO,EAAE;IACPC,IAAI,EAAE,kBAAkB;IACxBC,GAAG,EAAEC,SAAS;IACdC,UAAU,EAAE,EAAE;IACdC,UAAU,EAAE;EACd;AACF,CAAC;;AAGM,IAAMC,aAAa,GAAG;EAC3BC,IAAI,EAAE,gBAAgB;EACtBC,EAAE,EAAE,SAAS;EACbC,MAAM,EAAE,SAAS;EACjBC,OAAO,EAAEZ,OAAO;EAChBa,MAAM,EAAE,IAAI;EACZC,QAAQ,EAAE,OAAO;EACjBC,UAAU,EAAE,CAAC,SAAS,CAAC;EACvBC,SAAS,EAAE,CAAC,0BAA0B,CAAC;EACvCC,MAAM,EAAE,IAAI;EACZC,KAAK,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC;EACvBC,OAAO,EAAElB;AACX,CAAC;AAAC;AAEK,IAAMmB,uBAA+B,GAAGZ,aAAa;AAAC"}
|
|
@@ -4,7 +4,7 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
4
4
|
value: true
|
|
5
5
|
});
|
|
6
6
|
exports._typecheckParquetLoader = exports.ParquetWasmLoader = void 0;
|
|
7
|
-
var VERSION = typeof "3.4.0-alpha.
|
|
7
|
+
var VERSION = typeof "3.4.0-alpha.2" !== 'undefined' ? "3.4.0-alpha.2" : 'latest';
|
|
8
8
|
var DEFAULT_PARQUET_LOADER_OPTIONS = {
|
|
9
9
|
parquet: {
|
|
10
10
|
type: 'arrow-table',
|
|
@@ -5,7 +5,7 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
5
5
|
});
|
|
6
6
|
exports.ParquetWasmWriter = void 0;
|
|
7
7
|
var _encodeParquetWasm = require("./lib/wasm/encode-parquet-wasm");
|
|
8
|
-
var VERSION = typeof "3.4.0-alpha.
|
|
8
|
+
var VERSION = typeof "3.4.0-alpha.2" !== 'undefined' ? "3.4.0-alpha.2" : 'latest';
|
|
9
9
|
var DEFAULT_PARQUET_WRITER_OPTIONS = {
|
|
10
10
|
parquet: {
|
|
11
11
|
wasmUrl: 'https://unpkg.com/parquet-wasm@0.3.1/esm2/arrow1_bg.wasm'
|
|
@@ -4,7 +4,7 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
4
4
|
value: true
|
|
5
5
|
});
|
|
6
6
|
exports.ParquetWriter = void 0;
|
|
7
|
-
var VERSION = typeof "3.4.0-alpha.
|
|
7
|
+
var VERSION = typeof "3.4.0-alpha.2" !== 'undefined' ? "3.4.0-alpha.2" : 'latest';
|
|
8
8
|
var DEFAULT_PARQUET_LOADER_OPTIONS = {};
|
|
9
9
|
var ParquetWriter = {
|
|
10
10
|
name: 'Apache Parquet',
|
|
@@ -12,10 +12,20 @@ exports.preloadCompressions = preloadCompressions;
|
|
|
12
12
|
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
|
|
13
13
|
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
|
|
14
14
|
var _compression = require("@loaders.gl/compression");
|
|
15
|
-
var _bufferUtils = require("./utils/buffer-utils");
|
|
16
15
|
var _lz4js = _interopRequireDefault(require("lz4js"));
|
|
17
16
|
var _lzo = _interopRequireDefault(require("lzo"));
|
|
18
17
|
|
|
18
|
+
function toBuffer(arrayBuffer) {
|
|
19
|
+
return Buffer.from(arrayBuffer);
|
|
20
|
+
}
|
|
21
|
+
function toArrayBuffer(buffer) {
|
|
22
|
+
if (Buffer.isBuffer(buffer)) {
|
|
23
|
+
var typedArray = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.length);
|
|
24
|
+
return typedArray.slice().buffer;
|
|
25
|
+
}
|
|
26
|
+
return buffer;
|
|
27
|
+
}
|
|
28
|
+
|
|
19
29
|
var modules = {
|
|
20
30
|
lz4js: _lz4js.default,
|
|
21
31
|
lzo: _lzo.default
|
|
@@ -86,12 +96,12 @@ function _deflate() {
|
|
|
86
96
|
}
|
|
87
97
|
throw new Error("parquet: invalid compression method: ".concat(method));
|
|
88
98
|
case 3:
|
|
89
|
-
inputArrayBuffer =
|
|
99
|
+
inputArrayBuffer = toArrayBuffer(value);
|
|
90
100
|
_context2.next = 6;
|
|
91
101
|
return compression.compress(inputArrayBuffer);
|
|
92
102
|
case 6:
|
|
93
103
|
compressedArrayBuffer = _context2.sent;
|
|
94
|
-
return _context2.abrupt("return",
|
|
104
|
+
return _context2.abrupt("return", toBuffer(compressedArrayBuffer));
|
|
95
105
|
case 8:
|
|
96
106
|
case "end":
|
|
97
107
|
return _context2.stop();
|
|
@@ -118,12 +128,12 @@ function _decompress() {
|
|
|
118
128
|
}
|
|
119
129
|
throw new Error("parquet: invalid compression method: ".concat(method));
|
|
120
130
|
case 3:
|
|
121
|
-
inputArrayBuffer =
|
|
131
|
+
inputArrayBuffer = toArrayBuffer(value);
|
|
122
132
|
_context3.next = 6;
|
|
123
133
|
return compression.decompress(inputArrayBuffer, size);
|
|
124
134
|
case 6:
|
|
125
135
|
compressedArrayBuffer = _context3.sent;
|
|
126
|
-
return _context3.abrupt("return",
|
|
136
|
+
return _context3.abrupt("return", toBuffer(compressedArrayBuffer));
|
|
127
137
|
case 8:
|
|
128
138
|
case "end":
|
|
129
139
|
return _context3.stop();
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"compression.js","names":["modules","lz4js","lzo","PARQUET_COMPRESSION_METHODS","UNCOMPRESSED","NoCompression","GZIP","GZipCompression","SNAPPY","SnappyCompression","BROTLI","BrotliCompression","LZ4","LZ4Compression","LZ4_RAW","LZO","LZOCompression","ZSTD","ZstdCompression","preloadCompressions","options","compressions","Object","values","Promise","all","map","compression","preload","deflate","method","value","Error","inputArrayBuffer","
|
|
1
|
+
{"version":3,"file":"compression.js","names":["toBuffer","arrayBuffer","Buffer","from","toArrayBuffer","buffer","isBuffer","typedArray","Uint8Array","byteOffset","length","slice","modules","lz4js","lzo","PARQUET_COMPRESSION_METHODS","UNCOMPRESSED","NoCompression","GZIP","GZipCompression","SNAPPY","SnappyCompression","BROTLI","BrotliCompression","LZ4","LZ4Compression","LZ4_RAW","LZO","LZOCompression","ZSTD","ZstdCompression","preloadCompressions","options","compressions","Object","values","Promise","all","map","compression","preload","deflate","method","value","Error","inputArrayBuffer","compress","compressedArrayBuffer","decompress","size","inflate"],"sources":["../../../src/parquetjs/compression.ts"],"sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/* eslint-disable camelcase */\n// Forked from https://github.com/ironSource/parquetjs under MIT license\n\nimport {\n Compression,\n NoCompression,\n GZipCompression,\n SnappyCompression,\n BrotliCompression,\n LZOCompression,\n LZ4Compression,\n ZstdCompression\n} from '@loaders.gl/compression';\n\nimport {ParquetCompression} from './schema/declare';\n\n/** We can't use loaders-util buffer handling since we are dependent on buffers even in the browser */\nfunction toBuffer(arrayBuffer: ArrayBuffer): Buffer {\n return Buffer.from(arrayBuffer);\n}\n\nfunction toArrayBuffer(buffer: Buffer): ArrayBuffer {\n // TODO - per docs we should just be able to call buffer.buffer, but there are issues\n if (Buffer.isBuffer(buffer)) {\n const typedArray = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.length);\n return typedArray.slice().buffer;\n }\n return buffer;\n}\n\n// TODO switch to worker compression to avoid bundling...\n\n// import brotli from 'brotli'; - brotli has problems with decompress in browsers\n// import brotliDecompress from 'brotli/decompress';\nimport lz4js from 'lz4js';\nimport lzo from 'lzo';\n// import {ZstdCodec} from 'zstd-codec';\n\n// Inject large dependencies through Compression constructor options\nconst modules = {\n // brotli has problems with decompress in browsers\n // brotli: {\n // decompress: brotliDecompress,\n // compress: () => {\n // throw new Error('brotli compress');\n // }\n // },\n lz4js,\n lzo\n // 'zstd-codec': ZstdCodec\n};\n\n// See https://github.com/apache/parquet-format/blob/master/Compression.md\nexport const PARQUET_COMPRESSION_METHODS: Record<ParquetCompression, Compression> = {\n UNCOMPRESSED: new NoCompression(),\n GZIP: new GZipCompression(),\n SNAPPY: new SnappyCompression(),\n BROTLI: new BrotliCompression({modules}),\n // TODO: Understand difference between LZ4 and LZ4_RAW\n LZ4: new LZ4Compression({modules}),\n LZ4_RAW: new LZ4Compression({modules}),\n LZO: new LZOCompression({modules}),\n ZSTD: new ZstdCompression({modules})\n};\n\n/**\n * Register compressions that have big external libraries\n * @param options.modules External library dependencies\n */\nexport async function preloadCompressions(options?: {modules: {[key: string]: any}}) {\n const compressions = Object.values(PARQUET_COMPRESSION_METHODS);\n return await Promise.all(compressions.map((compression) => compression.preload()));\n}\n\n/**\n * Deflate a value using compression method `method`\n */\nexport async function deflate(method: ParquetCompression, value: Buffer): Promise<Buffer> {\n const compression = PARQUET_COMPRESSION_METHODS[method];\n if (!compression) {\n throw new Error(`parquet: invalid compression method: ${method}`);\n }\n const inputArrayBuffer = toArrayBuffer(value);\n const compressedArrayBuffer = await compression.compress(inputArrayBuffer);\n return toBuffer(compressedArrayBuffer);\n}\n\n/**\n * Inflate a value using compression method `method`\n */\nexport async function decompress(\n method: ParquetCompression,\n value: Buffer,\n size: number\n): Promise<Buffer> {\n const compression = PARQUET_COMPRESSION_METHODS[method];\n if (!compression) {\n throw new Error(`parquet: invalid compression method: ${method}`);\n }\n const inputArrayBuffer = toArrayBuffer(value);\n const compressedArrayBuffer = await compression.decompress(inputArrayBuffer, size);\n return toBuffer(compressedArrayBuffer);\n}\n\n/*\n * Inflate a value using compression method `method`\n */\nexport function inflate(method: ParquetCompression, value: Buffer, size: number): Buffer {\n if (!(method in PARQUET_COMPRESSION_METHODS)) {\n throw new Error(`invalid compression method: ${method}`);\n }\n // @ts-ignore\n return PARQUET_COMPRESSION_METHODS[method].inflate(value, size);\n}\n\n/*\nfunction deflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction deflate_gzip(value: Buffer): Buffer {\n return zlib.gzipSync(value);\n}\n\nfunction deflate_snappy(value: Buffer): Buffer {\n return snappyjs.compress(value);\n}\n\nfunction deflate_lzo(value: Buffer): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.compress(value);\n}\n\nfunction deflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n const result = brotli.compress(value, {\n mode: 0,\n quality: 8,\n lgwin: 22\n });\n return result ? Buffer.from(result) : Buffer.alloc(0);\n}\n\nfunction deflate_lz4(value: Buffer): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(lz4js.encodeBound(value.length));\n // const compressedSize = lz4.encodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, compressedSize);\n // return result;\n return Buffer.from(lz4js.compress(value));\n } catch (err) {\n throw err;\n }\n}\nfunction inflate_identity(value: Buffer): Buffer {\n return value;\n}\n\nfunction inflate_gzip(value: Buffer): Buffer {\n return zlib.gunzipSync(value);\n}\n\nfunction inflate_snappy(value: Buffer): Buffer {\n return snappyjs.uncompress(value);\n}\n\nfunction inflate_lzo(value: Buffer, size: number): Buffer {\n lzo = lzo || Util.load('lzo');\n return lzo.decompress(value, size);\n}\n\nfunction inflate_lz4(value: Buffer, size: number): Buffer {\n lz4js = lz4js || Util.load('lz4js');\n try {\n // let result = Buffer.alloc(size);\n // const uncompressedSize = lz4js.decodeBlock(value, result);\n // // remove unnecessary bytes\n // result = result.slice(0, uncompressedSize);\n // return result;\n return Buffer.from(lz4js.decompress(value, size));\n } catch (err) {\n throw err;\n }\n}\n\nfunction inflate_brotli(value: Buffer): Buffer {\n brotli = brotli || Util.load('brotli');\n if (!value.length) {\n return Buffer.alloc(0);\n }\n return Buffer.from(brotli.decompress(value));\n}\n*/\n"],"mappings":";;;;;;;;;;;;;AAIA;AA+BA;AACA;;AAlBA,SAASA,QAAQ,CAACC,WAAwB,EAAU;EAClD,OAAOC,MAAM,CAACC,IAAI,CAACF,WAAW,CAAC;AACjC;AAEA,SAASG,aAAa,CAACC,MAAc,EAAe;EAElD,IAAIH,MAAM,CAACI,QAAQ,CAACD,MAAM,CAAC,EAAE;IAC3B,IAAME,UAAU,GAAG,IAAIC,UAAU,CAACH,MAAM,CAACA,MAAM,EAAEA,MAAM,CAACI,UAAU,EAAEJ,MAAM,CAACK,MAAM,CAAC;IAClF,OAAOH,UAAU,CAACI,KAAK,EAAE,CAACN,MAAM;EAClC;EACA,OAAOA,MAAM;AACf;;AAWA,IAAMO,OAAO,GAAG;EAQdC,KAAK,EAALA,cAAK;EACLC,GAAG,EAAHA;AAEF,CAAC;;AAGM,IAAMC,2BAAoE,GAAG;EAClFC,YAAY,EAAE,IAAIC,0BAAa,EAAE;EACjCC,IAAI,EAAE,IAAIC,4BAAe,EAAE;EAC3BC,MAAM,EAAE,IAAIC,8BAAiB,EAAE;EAC/BC,MAAM,EAAE,IAAIC,8BAAiB,CAAC;IAACX,OAAO,EAAPA;EAAO,CAAC,CAAC;EAExCY,GAAG,EAAE,IAAIC,2BAAc,CAAC;IAACb,OAAO,EAAPA;EAAO,CAAC,CAAC;EAClCc,OAAO,EAAE,IAAID,2BAAc,CAAC;IAACb,OAAO,EAAPA;EAAO,CAAC,CAAC;EACtCe,GAAG,EAAE,IAAIC,2BAAc,CAAC;IAAChB,OAAO,EAAPA;EAAO,CAAC,CAAC;EAClCiB,IAAI,EAAE,IAAIC,4BAAe,CAAC;IAAClB,OAAO,EAAPA;EAAO,CAAC;AACrC,CAAC;;AAAC;AAAA,SAMoBmB,mBAAmB;EAAA;AAAA;AAAA;EAAA,iFAAlC,iBAAmCC,OAAyC;IAAA;IAAA;MAAA;QAAA;UAAA;YAC3EC,YAAY,GAAGC,MAAM,CAACC,MAAM,CAACpB,2BAA2B,CAAC;YAAA;YAAA,OAClDqB,OAAO,CAACC,GAAG,CAACJ,YAAY,CAACK,GAAG,CAAC,UAACC,WAAW;cAAA,OAAKA,WAAW,CAACC,OAAO,EAAE;YAAA,EAAC,CAAC;UAAA;YAAA;UAAA;UAAA;YAAA;QAAA;MAAA;IAAA;EAAA,CACnF;EAAA;AAAA;AAAA,SAKqBC,OAAO;EAAA;AAAA;AAAA;EAAA,qEAAtB,kBAAuBC,MAA0B,EAAEC,KAAa;IAAA;IAAA;MAAA;QAAA;UAAA;YAC/DJ,WAAW,GAAGxB,2BAA2B,CAAC2B,MAAM,CAAC;YAAA,IAClDH,WAAW;cAAA;cAAA;YAAA;YAAA,MACR,IAAIK,KAAK,gDAAyCF,MAAM,EAAG;UAAA;YAE7DG,gBAAgB,GAAGzC,aAAa,CAACuC,KAAK,CAAC;YAAA;YAAA,OACTJ,WAAW,CAACO,QAAQ,CAACD,gBAAgB,CAAC;UAAA;YAApEE,qBAAqB;YAAA,kCACpB/C,QAAQ,CAAC+C,qBAAqB,CAAC;UAAA;UAAA;YAAA;QAAA;MAAA;IAAA;EAAA,CACvC;EAAA;AAAA;AAAA,SAKqBC,UAAU;EAAA;AAAA;AAAA;EAAA,wEAAzB,kBACLN,MAA0B,EAC1BC,KAAa,EACbM,IAAY;IAAA;IAAA;MAAA;QAAA;UAAA;YAENV,WAAW,GAAGxB,2BAA2B,CAAC2B,MAAM,CAAC;YAAA,IAClDH,WAAW;cAAA;cAAA;YAAA;YAAA,MACR,IAAIK,KAAK,gDAAyCF,MAAM,EAAG;UAAA;YAE7DG,gBAAgB,GAAGzC,aAAa,CAACuC,KAAK,CAAC;YAAA;YAAA,OACTJ,WAAW,CAACS,UAAU,CAACH,gBAAgB,EAAEI,IAAI,CAAC;UAAA;YAA5EF,qBAAqB;YAAA,kCACpB/C,QAAQ,CAAC+C,qBAAqB,CAAC;UAAA;UAAA;YAAA;QAAA;MAAA;IAAA;EAAA,CACvC;EAAA;AAAA;AAKM,SAASG,OAAO,CAACR,MAA0B,EAAEC,KAAa,EAAEM,IAAY,EAAU;EACvF,IAAI,EAAEP,MAAM,IAAI3B,2BAA2B,CAAC,EAAE;IAC5C,MAAM,IAAI6B,KAAK,uCAAgCF,MAAM,EAAG;EAC1D;EAEA,OAAO3B,2BAA2B,CAAC2B,MAAM,CAAC,CAACQ,OAAO,CAACP,KAAK,EAAEM,IAAI,CAAC;AACjE"}
|
|
@@ -5,17 +5,12 @@ var _typeof = require("@babel/runtime/helpers/typeof");
|
|
|
5
5
|
Object.defineProperty(exports, "__esModule", {
|
|
6
6
|
value: true
|
|
7
7
|
});
|
|
8
|
-
exports.
|
|
8
|
+
exports.ParquetEnvelopeWriter = exports.ParquetEncoder = void 0;
|
|
9
9
|
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
|
|
10
|
-
var _assertThisInitialized2 = _interopRequireDefault(require("@babel/runtime/helpers/assertThisInitialized"));
|
|
11
|
-
var _inherits2 = _interopRequireDefault(require("@babel/runtime/helpers/inherits"));
|
|
12
|
-
var _possibleConstructorReturn2 = _interopRequireDefault(require("@babel/runtime/helpers/possibleConstructorReturn"));
|
|
13
|
-
var _getPrototypeOf2 = _interopRequireDefault(require("@babel/runtime/helpers/getPrototypeOf"));
|
|
14
10
|
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
|
|
15
11
|
var _classCallCheck2 = _interopRequireDefault(require("@babel/runtime/helpers/classCallCheck"));
|
|
16
12
|
var _createClass2 = _interopRequireDefault(require("@babel/runtime/helpers/createClass"));
|
|
17
13
|
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
|
|
18
|
-
var _stream = require("stream");
|
|
19
14
|
var _codecs = require("../codecs");
|
|
20
15
|
var Compression = _interopRequireWildcard(require("../compression"));
|
|
21
16
|
var Shred = _interopRequireWildcard(require("../schema/shred"));
|
|
@@ -28,8 +23,6 @@ function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj &&
|
|
|
28
23
|
function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
|
|
29
24
|
function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
|
|
30
25
|
function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
|
|
31
|
-
function _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = (0, _getPrototypeOf2.default)(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = (0, _getPrototypeOf2.default)(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return (0, _possibleConstructorReturn2.default)(this, result); }; }
|
|
32
|
-
function _isNativeReflectConstruct() { if (typeof Reflect === "undefined" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === "function") return true; try { Boolean.prototype.valueOf.call(Reflect.construct(Boolean, [], function () {})); return true; } catch (e) { return false; } }
|
|
33
26
|
var PARQUET_MAGIC = 'PAR1';
|
|
34
27
|
|
|
35
28
|
var PARQUET_VERSION = 1;
|
|
@@ -39,9 +32,9 @@ var PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
|
|
|
39
32
|
|
|
40
33
|
var PARQUET_RDLVL_TYPE = 'INT32';
|
|
41
34
|
var PARQUET_RDLVL_ENCODING = 'RLE';
|
|
42
|
-
var
|
|
43
|
-
function
|
|
44
|
-
(0, _classCallCheck2.default)(this,
|
|
35
|
+
var ParquetEncoder = function () {
|
|
36
|
+
function ParquetEncoder(schema, envelopeWriter, opts) {
|
|
37
|
+
(0, _classCallCheck2.default)(this, ParquetEncoder);
|
|
45
38
|
(0, _defineProperty2.default)(this, "schema", void 0);
|
|
46
39
|
(0, _defineProperty2.default)(this, "envelopeWriter", void 0);
|
|
47
40
|
(0, _defineProperty2.default)(this, "rowBuffer", void 0);
|
|
@@ -57,7 +50,7 @@ var ParquetWriter = function () {
|
|
|
57
50
|
|
|
58
51
|
this.writeHeader();
|
|
59
52
|
}
|
|
60
|
-
(0, _createClass2.default)(
|
|
53
|
+
(0, _createClass2.default)(ParquetEncoder, [{
|
|
61
54
|
key: "writeHeader",
|
|
62
55
|
value: function () {
|
|
63
56
|
var _writeHeader = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee() {
|
|
@@ -193,7 +186,7 @@ var ParquetWriter = function () {
|
|
|
193
186
|
return (0, _fileUtils.osopen)(path, opts);
|
|
194
187
|
case 2:
|
|
195
188
|
outputStream = _context4.sent;
|
|
196
|
-
return _context4.abrupt("return",
|
|
189
|
+
return _context4.abrupt("return", ParquetEncoder.openStream(schema, outputStream, opts));
|
|
197
190
|
case 4:
|
|
198
191
|
case "end":
|
|
199
192
|
return _context4.stop();
|
|
@@ -209,20 +202,20 @@ var ParquetWriter = function () {
|
|
|
209
202
|
}, {
|
|
210
203
|
key: "openStream",
|
|
211
204
|
value: function () {
|
|
212
|
-
var _openStream = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(schema, outputStream
|
|
213
|
-
var
|
|
205
|
+
var _openStream = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee5(schema, outputStream) {
|
|
206
|
+
var opts,
|
|
207
|
+
envelopeWriter,
|
|
208
|
+
_args5 = arguments;
|
|
214
209
|
return _regenerator.default.wrap(function _callee5$(_context5) {
|
|
215
210
|
while (1) {
|
|
216
211
|
switch (_context5.prev = _context5.next) {
|
|
217
212
|
case 0:
|
|
218
|
-
|
|
219
|
-
opts = {};
|
|
220
|
-
}
|
|
213
|
+
opts = _args5.length > 2 && _args5[2] !== undefined ? _args5[2] : {};
|
|
221
214
|
_context5.next = 3;
|
|
222
215
|
return ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
|
|
223
216
|
case 3:
|
|
224
217
|
envelopeWriter = _context5.sent;
|
|
225
|
-
return _context5.abrupt("return", new
|
|
218
|
+
return _context5.abrupt("return", new ParquetEncoder(schema, envelopeWriter, opts));
|
|
226
219
|
case 5:
|
|
227
220
|
case "end":
|
|
228
221
|
return _context5.stop();
|
|
@@ -230,15 +223,15 @@ var ParquetWriter = function () {
|
|
|
230
223
|
}
|
|
231
224
|
}, _callee5);
|
|
232
225
|
}));
|
|
233
|
-
function openStream(_x6, _x7
|
|
226
|
+
function openStream(_x6, _x7) {
|
|
234
227
|
return _openStream.apply(this, arguments);
|
|
235
228
|
}
|
|
236
229
|
return openStream;
|
|
237
230
|
}()
|
|
238
231
|
}]);
|
|
239
|
-
return
|
|
232
|
+
return ParquetEncoder;
|
|
240
233
|
}();
|
|
241
|
-
exports.
|
|
234
|
+
exports.ParquetEncoder = ParquetEncoder;
|
|
242
235
|
var ParquetEnvelopeWriter = function () {
|
|
243
236
|
function ParquetEnvelopeWriter(schema, writeFn, closeFn, fileOffset, opts) {
|
|
244
237
|
(0, _classCallCheck2.default)(this, ParquetEnvelopeWriter);
|
|
@@ -303,7 +296,7 @@ var ParquetEnvelopeWriter = function () {
|
|
|
303
296
|
}
|
|
304
297
|
}, _callee6, this);
|
|
305
298
|
}));
|
|
306
|
-
function writeRowGroup(
|
|
299
|
+
function writeRowGroup(_x8) {
|
|
307
300
|
return _writeRowGroup.apply(this, arguments);
|
|
308
301
|
}
|
|
309
302
|
return writeRowGroup;
|
|
@@ -343,7 +336,7 @@ var ParquetEnvelopeWriter = function () {
|
|
|
343
336
|
}
|
|
344
337
|
}, _callee7);
|
|
345
338
|
}));
|
|
346
|
-
function openStream(_x10, _x11
|
|
339
|
+
function openStream(_x9, _x10, _x11) {
|
|
347
340
|
return _openStream2.apply(this, arguments);
|
|
348
341
|
}
|
|
349
342
|
return openStream;
|
|
@@ -352,88 +345,7 @@ var ParquetEnvelopeWriter = function () {
|
|
|
352
345
|
return ParquetEnvelopeWriter;
|
|
353
346
|
}();
|
|
354
347
|
exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
|
|
355
|
-
var ParquetTransformer = function (_Transform) {
|
|
356
|
-
(0, _inherits2.default)(ParquetTransformer, _Transform);
|
|
357
|
-
var _super = _createSuper(ParquetTransformer);
|
|
358
|
-
function ParquetTransformer(schema) {
|
|
359
|
-
var _this;
|
|
360
|
-
var opts = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
|
|
361
|
-
(0, _classCallCheck2.default)(this, ParquetTransformer);
|
|
362
|
-
_this = _super.call(this, {
|
|
363
|
-
objectMode: true
|
|
364
|
-
});
|
|
365
|
-
(0, _defineProperty2.default)((0, _assertThisInitialized2.default)(_this), "writer", void 0);
|
|
366
|
-
var writeProxy = function (t) {
|
|
367
|
-
return function () {
|
|
368
|
-
var _ref = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8(b) {
|
|
369
|
-
return _regenerator.default.wrap(function _callee8$(_context8) {
|
|
370
|
-
while (1) {
|
|
371
|
-
switch (_context8.prev = _context8.next) {
|
|
372
|
-
case 0:
|
|
373
|
-
t.push(b);
|
|
374
|
-
case 1:
|
|
375
|
-
case "end":
|
|
376
|
-
return _context8.stop();
|
|
377
|
-
}
|
|
378
|
-
}
|
|
379
|
-
}, _callee8);
|
|
380
|
-
}));
|
|
381
|
-
return function (_x13) {
|
|
382
|
-
return _ref.apply(this, arguments);
|
|
383
|
-
};
|
|
384
|
-
}();
|
|
385
|
-
}((0, _assertThisInitialized2.default)(_this));
|
|
386
|
-
_this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9() {
|
|
387
|
-
return _regenerator.default.wrap(function _callee9$(_context9) {
|
|
388
|
-
while (1) {
|
|
389
|
-
switch (_context9.prev = _context9.next) {
|
|
390
|
-
case 0:
|
|
391
|
-
case "end":
|
|
392
|
-
return _context9.stop();
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
|
-
}, _callee9);
|
|
396
|
-
})), 0, opts), opts);
|
|
397
|
-
return _this;
|
|
398
|
-
}
|
|
399
348
|
|
|
400
|
-
(0, _createClass2.default)(ParquetTransformer, [{
|
|
401
|
-
key: "_transform",
|
|
402
|
-
value:
|
|
403
|
-
function _transform(row, encoding, callback) {
|
|
404
|
-
if (row) {
|
|
405
|
-
return this.writer.appendRow(row).then(callback);
|
|
406
|
-
}
|
|
407
|
-
callback();
|
|
408
|
-
return Promise.resolve();
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
}, {
|
|
412
|
-
key: "_flush",
|
|
413
|
-
value: function () {
|
|
414
|
-
var _flush2 = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee10(callback) {
|
|
415
|
-
return _regenerator.default.wrap(function _callee10$(_context10) {
|
|
416
|
-
while (1) {
|
|
417
|
-
switch (_context10.prev = _context10.next) {
|
|
418
|
-
case 0:
|
|
419
|
-
_context10.next = 2;
|
|
420
|
-
return this.writer.close(callback);
|
|
421
|
-
case 2:
|
|
422
|
-
case "end":
|
|
423
|
-
return _context10.stop();
|
|
424
|
-
}
|
|
425
|
-
}
|
|
426
|
-
}, _callee10, this);
|
|
427
|
-
}));
|
|
428
|
-
function _flush(_x14) {
|
|
429
|
-
return _flush2.apply(this, arguments);
|
|
430
|
-
}
|
|
431
|
-
return _flush;
|
|
432
|
-
}()
|
|
433
|
-
}]);
|
|
434
|
-
return ParquetTransformer;
|
|
435
|
-
}(_stream.Transform);
|
|
436
|
-
exports.ParquetTransformer = ParquetTransformer;
|
|
437
349
|
function encodeValues(type, encoding, values, opts) {
|
|
438
350
|
if (!(encoding in _codecs.PARQUET_CODECS)) {
|
|
439
351
|
throw new Error("invalid encoding: ".concat(encoding));
|
|
@@ -441,15 +353,15 @@ function encodeValues(type, encoding, values, opts) {
|
|
|
441
353
|
return _codecs.PARQUET_CODECS[encoding].encodeValues(type, values, opts);
|
|
442
354
|
}
|
|
443
355
|
|
|
444
|
-
function encodeDataPage(
|
|
356
|
+
function encodeDataPage(_x12, _x13) {
|
|
445
357
|
return _encodeDataPage.apply(this, arguments);
|
|
446
358
|
}
|
|
447
359
|
function _encodeDataPage() {
|
|
448
|
-
_encodeDataPage = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function
|
|
360
|
+
_encodeDataPage = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee8(column, data) {
|
|
449
361
|
var rLevelsBuf, dLevelsBuf, valuesBuf, dataBuf, compressedBuf, header, headerBuf, page;
|
|
450
|
-
return _regenerator.default.wrap(function
|
|
362
|
+
return _regenerator.default.wrap(function _callee8$(_context8) {
|
|
451
363
|
while (1) {
|
|
452
|
-
switch (
|
|
364
|
+
switch (_context8.prev = _context8.next) {
|
|
453
365
|
case 0:
|
|
454
366
|
rLevelsBuf = Buffer.alloc(0);
|
|
455
367
|
if (column.rLevelMax > 0) {
|
|
@@ -469,10 +381,10 @@ function _encodeDataPage() {
|
|
|
469
381
|
bitWidth: column.typeLength
|
|
470
382
|
});
|
|
471
383
|
dataBuf = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]);
|
|
472
|
-
|
|
384
|
+
_context8.next = 8;
|
|
473
385
|
return Compression.deflate(column.compression, dataBuf);
|
|
474
386
|
case 8:
|
|
475
|
-
compressedBuf =
|
|
387
|
+
compressedBuf = _context8.sent;
|
|
476
388
|
header = new _parquetThrift.PageHeader({
|
|
477
389
|
type: _parquetThrift.PageType.DATA_PAGE,
|
|
478
390
|
data_page_header: new _parquetThrift.DataPageHeader({
|
|
@@ -487,38 +399,38 @@ function _encodeDataPage() {
|
|
|
487
399
|
});
|
|
488
400
|
headerBuf = (0, _readUtils.serializeThrift)(header);
|
|
489
401
|
page = Buffer.concat([headerBuf, compressedBuf]);
|
|
490
|
-
return
|
|
402
|
+
return _context8.abrupt("return", {
|
|
491
403
|
header: header,
|
|
492
404
|
headerSize: headerBuf.length,
|
|
493
405
|
page: page
|
|
494
406
|
});
|
|
495
407
|
case 13:
|
|
496
408
|
case "end":
|
|
497
|
-
return
|
|
409
|
+
return _context8.stop();
|
|
498
410
|
}
|
|
499
411
|
}
|
|
500
|
-
},
|
|
412
|
+
}, _callee8);
|
|
501
413
|
}));
|
|
502
414
|
return _encodeDataPage.apply(this, arguments);
|
|
503
415
|
}
|
|
504
|
-
function encodeDataPageV2(
|
|
416
|
+
function encodeDataPageV2(_x14, _x15, _x16) {
|
|
505
417
|
return _encodeDataPageV.apply(this, arguments);
|
|
506
418
|
}
|
|
507
419
|
function _encodeDataPageV() {
|
|
508
|
-
_encodeDataPageV = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function
|
|
420
|
+
_encodeDataPageV = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee9(column, data, rowCount) {
|
|
509
421
|
var valuesBuf, compressedBuf, rLevelsBuf, dLevelsBuf, header, headerBuf, page;
|
|
510
|
-
return _regenerator.default.wrap(function
|
|
422
|
+
return _regenerator.default.wrap(function _callee9$(_context9) {
|
|
511
423
|
while (1) {
|
|
512
|
-
switch (
|
|
424
|
+
switch (_context9.prev = _context9.next) {
|
|
513
425
|
case 0:
|
|
514
426
|
valuesBuf = encodeValues(column.primitiveType, column.encoding, data.values, {
|
|
515
427
|
typeLength: column.typeLength,
|
|
516
428
|
bitWidth: column.typeLength
|
|
517
429
|
});
|
|
518
|
-
|
|
430
|
+
_context9.next = 3;
|
|
519
431
|
return Compression.deflate(column.compression, valuesBuf);
|
|
520
432
|
case 3:
|
|
521
|
-
compressedBuf =
|
|
433
|
+
compressedBuf = _context9.sent;
|
|
522
434
|
rLevelsBuf = Buffer.alloc(0);
|
|
523
435
|
if (column.rLevelMax > 0) {
|
|
524
436
|
rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, data.rlevels, {
|
|
@@ -550,51 +462,51 @@ function _encodeDataPageV() {
|
|
|
550
462
|
});
|
|
551
463
|
headerBuf = (0, _readUtils.serializeThrift)(header);
|
|
552
464
|
page = Buffer.concat([headerBuf, rLevelsBuf, dLevelsBuf, compressedBuf]);
|
|
553
|
-
return
|
|
465
|
+
return _context9.abrupt("return", {
|
|
554
466
|
header: header,
|
|
555
467
|
headerSize: headerBuf.length,
|
|
556
468
|
page: page
|
|
557
469
|
});
|
|
558
470
|
case 12:
|
|
559
471
|
case "end":
|
|
560
|
-
return
|
|
472
|
+
return _context9.stop();
|
|
561
473
|
}
|
|
562
474
|
}
|
|
563
|
-
},
|
|
475
|
+
}, _callee9);
|
|
564
476
|
}));
|
|
565
477
|
return _encodeDataPageV.apply(this, arguments);
|
|
566
478
|
}
|
|
567
|
-
function encodeColumnChunk(
|
|
479
|
+
function encodeColumnChunk(_x17, _x18, _x19, _x20) {
|
|
568
480
|
return _encodeColumnChunk.apply(this, arguments);
|
|
569
481
|
}
|
|
570
482
|
function _encodeColumnChunk() {
|
|
571
|
-
_encodeColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function
|
|
483
|
+
_encodeColumnChunk = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee10(column, buffer, offset, opts) {
|
|
572
484
|
var data, baseOffset, pageBuf, total_uncompressed_size, total_compressed_size, result, metadata, metadataOffset, body;
|
|
573
|
-
return _regenerator.default.wrap(function
|
|
485
|
+
return _regenerator.default.wrap(function _callee10$(_context10) {
|
|
574
486
|
while (1) {
|
|
575
|
-
switch (
|
|
487
|
+
switch (_context10.prev = _context10.next) {
|
|
576
488
|
case 0:
|
|
577
489
|
data = buffer.columnData[column.path.join()];
|
|
578
490
|
baseOffset = (opts.baseOffset || 0) + offset;
|
|
579
491
|
total_uncompressed_size = 0;
|
|
580
492
|
total_compressed_size = 0;
|
|
581
493
|
if (!opts.useDataPageV2) {
|
|
582
|
-
|
|
494
|
+
_context10.next = 10;
|
|
583
495
|
break;
|
|
584
496
|
}
|
|
585
|
-
|
|
497
|
+
_context10.next = 7;
|
|
586
498
|
return encodeDataPageV2(column, data, buffer.rowCount);
|
|
587
499
|
case 7:
|
|
588
|
-
|
|
589
|
-
|
|
500
|
+
_context10.t0 = _context10.sent;
|
|
501
|
+
_context10.next = 13;
|
|
590
502
|
break;
|
|
591
503
|
case 10:
|
|
592
|
-
|
|
504
|
+
_context10.next = 12;
|
|
593
505
|
return encodeDataPage(column, data);
|
|
594
506
|
case 12:
|
|
595
|
-
|
|
507
|
+
_context10.t0 = _context10.sent;
|
|
596
508
|
case 13:
|
|
597
|
-
result =
|
|
509
|
+
result = _context10.t0;
|
|
598
510
|
pageBuf = result.page;
|
|
599
511
|
total_uncompressed_size += result.header.uncompressed_page_size + result.headerSize;
|
|
600
512
|
total_compressed_size += result.header.compressed_page_size + result.headerSize;
|
|
@@ -613,29 +525,29 @@ function _encodeColumnChunk() {
|
|
|
613
525
|
|
|
614
526
|
metadataOffset = baseOffset + pageBuf.length;
|
|
615
527
|
body = Buffer.concat([pageBuf, (0, _readUtils.serializeThrift)(metadata)]);
|
|
616
|
-
return
|
|
528
|
+
return _context10.abrupt("return", {
|
|
617
529
|
body: body,
|
|
618
530
|
metadata: metadata,
|
|
619
531
|
metadataOffset: metadataOffset
|
|
620
532
|
});
|
|
621
533
|
case 23:
|
|
622
534
|
case "end":
|
|
623
|
-
return
|
|
535
|
+
return _context10.stop();
|
|
624
536
|
}
|
|
625
537
|
}
|
|
626
|
-
},
|
|
538
|
+
}, _callee10);
|
|
627
539
|
}));
|
|
628
540
|
return _encodeColumnChunk.apply(this, arguments);
|
|
629
541
|
}
|
|
630
|
-
function encodeRowGroup(
|
|
542
|
+
function encodeRowGroup(_x21, _x22, _x23) {
|
|
631
543
|
return _encodeRowGroup.apply(this, arguments);
|
|
632
544
|
}
|
|
633
545
|
function _encodeRowGroup() {
|
|
634
|
-
_encodeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function
|
|
546
|
+
_encodeRowGroup = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee11(schema, data, opts) {
|
|
635
547
|
var metadata, body, _iterator2, _step2, field, cchunkData, cchunk;
|
|
636
|
-
return _regenerator.default.wrap(function
|
|
548
|
+
return _regenerator.default.wrap(function _callee11$(_context11) {
|
|
637
549
|
while (1) {
|
|
638
|
-
switch (
|
|
550
|
+
switch (_context11.prev = _context11.next) {
|
|
639
551
|
case 0:
|
|
640
552
|
metadata = new _parquetThrift.RowGroup({
|
|
641
553
|
num_rows: data.rowCount,
|
|
@@ -644,24 +556,24 @@ function _encodeRowGroup() {
|
|
|
644
556
|
});
|
|
645
557
|
body = Buffer.alloc(0);
|
|
646
558
|
_iterator2 = _createForOfIteratorHelper(schema.fieldList);
|
|
647
|
-
|
|
559
|
+
_context11.prev = 3;
|
|
648
560
|
_iterator2.s();
|
|
649
561
|
case 5:
|
|
650
562
|
if ((_step2 = _iterator2.n()).done) {
|
|
651
|
-
|
|
563
|
+
_context11.next = 18;
|
|
652
564
|
break;
|
|
653
565
|
}
|
|
654
566
|
field = _step2.value;
|
|
655
567
|
if (!field.isNested) {
|
|
656
|
-
|
|
568
|
+
_context11.next = 9;
|
|
657
569
|
break;
|
|
658
570
|
}
|
|
659
|
-
return
|
|
571
|
+
return _context11.abrupt("continue", 16);
|
|
660
572
|
case 9:
|
|
661
|
-
|
|
573
|
+
_context11.next = 11;
|
|
662
574
|
return encodeColumnChunk(field, data, body.length, opts);
|
|
663
575
|
case 11:
|
|
664
|
-
cchunkData =
|
|
576
|
+
cchunkData = _context11.sent;
|
|
665
577
|
cchunk = new _parquetThrift.ColumnChunk({
|
|
666
578
|
file_offset: cchunkData.metadataOffset,
|
|
667
579
|
meta_data: cchunkData.metadata
|
|
@@ -670,30 +582,30 @@ function _encodeRowGroup() {
|
|
|
670
582
|
metadata.total_byte_size = new _nodeInt.default(Number(metadata.total_byte_size) + cchunkData.body.length);
|
|
671
583
|
body = Buffer.concat([body, cchunkData.body]);
|
|
672
584
|
case 16:
|
|
673
|
-
|
|
585
|
+
_context11.next = 5;
|
|
674
586
|
break;
|
|
675
587
|
case 18:
|
|
676
|
-
|
|
588
|
+
_context11.next = 23;
|
|
677
589
|
break;
|
|
678
590
|
case 20:
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
_iterator2.e(
|
|
591
|
+
_context11.prev = 20;
|
|
592
|
+
_context11.t0 = _context11["catch"](3);
|
|
593
|
+
_iterator2.e(_context11.t0);
|
|
682
594
|
case 23:
|
|
683
|
-
|
|
595
|
+
_context11.prev = 23;
|
|
684
596
|
_iterator2.f();
|
|
685
|
-
return
|
|
597
|
+
return _context11.finish(23);
|
|
686
598
|
case 26:
|
|
687
|
-
return
|
|
599
|
+
return _context11.abrupt("return", {
|
|
688
600
|
body: body,
|
|
689
601
|
metadata: metadata
|
|
690
602
|
});
|
|
691
603
|
case 27:
|
|
692
604
|
case "end":
|
|
693
|
-
return
|
|
605
|
+
return _context11.stop();
|
|
694
606
|
}
|
|
695
607
|
}
|
|
696
|
-
},
|
|
608
|
+
}, _callee11, null, [[3, 20, 23, 26]]);
|
|
697
609
|
}));
|
|
698
610
|
return _encodeRowGroup.apply(this, arguments);
|
|
699
611
|
}
|
|
@@ -754,4 +666,4 @@ function encodeFooter(schema, rowCount, rowGroups, userMetadata) {
|
|
|
754
666
|
footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4);
|
|
755
667
|
return footerEncoded;
|
|
756
668
|
}
|
|
757
|
-
//# sourceMappingURL=
|
|
669
|
+
//# sourceMappingURL=parquet-encoder.js.map
|