@loaders.gl/parquet 3.0.12 → 3.1.0-alpha.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dist.min.js +7 -18
- package/dist/dist.min.js.map +1 -1
- package/dist/es5/bundle.js +2 -4
- package/dist/es5/bundle.js.map +1 -1
- package/dist/es5/constants.js +17 -0
- package/dist/es5/constants.js.map +1 -0
- package/dist/es5/index.js +53 -21
- package/dist/es5/index.js.map +1 -1
- package/dist/es5/lib/convert-schema.js +82 -0
- package/dist/es5/lib/convert-schema.js.map +1 -0
- package/dist/es5/lib/parse-parquet.js +173 -0
- package/dist/es5/lib/parse-parquet.js.map +1 -0
- package/dist/es5/lib/read-array-buffer.js +53 -0
- package/dist/es5/lib/read-array-buffer.js.map +1 -0
- package/dist/es5/parquet-loader.js +6 -79
- package/dist/es5/parquet-loader.js.map +1 -1
- package/dist/es5/parquet-writer.js +1 -1
- package/dist/es5/parquet-writer.js.map +1 -1
- package/dist/es5/parquetjs/codecs/dictionary.js +30 -0
- package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -0
- package/dist/es5/parquetjs/codecs/index.js +10 -0
- package/dist/es5/parquetjs/codecs/index.js.map +1 -1
- package/dist/es5/parquetjs/codecs/rle.js +2 -2
- package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
- package/dist/es5/parquetjs/compression.js +138 -104
- package/dist/es5/parquetjs/compression.js.map +1 -1
- package/dist/es5/parquetjs/{writer.js → encoder/writer.js} +397 -228
- package/dist/es5/parquetjs/encoder/writer.js.map +1 -0
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
- package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
- package/dist/es5/parquetjs/parser/decoders.js +495 -0
- package/dist/es5/parquetjs/parser/decoders.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-cursor.js +215 -0
- package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +452 -0
- package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js +413 -0
- package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -0
- package/dist/es5/parquetjs/schema/declare.js.map +1 -1
- package/dist/es5/parquetjs/schema/schema.js +2 -0
- package/dist/es5/parquetjs/schema/schema.js.map +1 -1
- package/dist/es5/parquetjs/schema/shred.js +2 -1
- package/dist/es5/parquetjs/schema/shred.js.map +1 -1
- package/dist/es5/parquetjs/schema/types.js +79 -4
- package/dist/es5/parquetjs/schema/types.js.map +1 -1
- package/dist/es5/parquetjs/utils/buffer-utils.js +21 -0
- package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -0
- package/dist/es5/parquetjs/utils/file-utils.js +108 -0
- package/dist/es5/parquetjs/utils/file-utils.js.map +1 -0
- package/dist/es5/parquetjs/{util.js → utils/read-utils.js} +13 -113
- package/dist/es5/parquetjs/utils/read-utils.js.map +1 -0
- package/dist/esm/bundle.js +2 -4
- package/dist/esm/bundle.js.map +1 -1
- package/dist/esm/constants.js +6 -0
- package/dist/esm/constants.js.map +1 -0
- package/dist/esm/index.js +14 -4
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/lib/convert-schema.js +71 -0
- package/dist/esm/lib/convert-schema.js.map +1 -0
- package/dist/esm/lib/parse-parquet.js +28 -0
- package/dist/esm/lib/parse-parquet.js.map +1 -0
- package/dist/esm/lib/read-array-buffer.js +9 -0
- package/dist/esm/lib/read-array-buffer.js.map +1 -0
- package/dist/esm/parquet-loader.js +4 -24
- package/dist/esm/parquet-loader.js.map +1 -1
- package/dist/esm/parquet-writer.js +1 -1
- package/dist/esm/parquet-writer.js.map +1 -1
- package/dist/esm/parquetjs/codecs/dictionary.js +12 -0
- package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -0
- package/dist/esm/parquetjs/codecs/index.js +9 -0
- package/dist/esm/parquetjs/codecs/index.js.map +1 -1
- package/dist/esm/parquetjs/codecs/rle.js +2 -2
- package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
- package/dist/esm/parquetjs/compression.js +54 -105
- package/dist/esm/parquetjs/compression.js.map +1 -1
- package/dist/esm/parquetjs/{writer.js → encoder/writer.js} +32 -35
- package/dist/esm/parquetjs/encoder/writer.js.map +1 -0
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
- package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
- package/dist/esm/parquetjs/parser/decoders.js +300 -0
- package/dist/esm/parquetjs/parser/decoders.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-cursor.js +90 -0
- package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +164 -0
- package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
- package/dist/esm/parquetjs/parser/parquet-reader.js +133 -0
- package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -0
- package/dist/esm/parquetjs/schema/declare.js.map +1 -1
- package/dist/esm/parquetjs/schema/schema.js +2 -0
- package/dist/esm/parquetjs/schema/schema.js.map +1 -1
- package/dist/esm/parquetjs/schema/shred.js +2 -1
- package/dist/esm/parquetjs/schema/shred.js.map +1 -1
- package/dist/esm/parquetjs/schema/types.js +78 -4
- package/dist/esm/parquetjs/schema/types.js.map +1 -1
- package/dist/esm/parquetjs/utils/buffer-utils.js +12 -0
- package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -0
- package/dist/esm/parquetjs/utils/file-utils.js +79 -0
- package/dist/esm/parquetjs/utils/file-utils.js.map +1 -0
- package/dist/esm/parquetjs/{util.js → utils/read-utils.js} +11 -89
- package/dist/esm/parquetjs/utils/read-utils.js.map +1 -0
- package/dist/parquet-worker.js +7 -18
- package/dist/parquet-worker.js.map +1 -1
- package/package.json +10 -10
- package/src/bundle.ts +2 -3
- package/src/constants.ts +17 -0
- package/src/index.ts +30 -4
- package/src/lib/convert-schema.ts +95 -0
- package/src/lib/parse-parquet.ts +27 -0
- package/{dist/es5/libs → src/lib}/read-array-buffer.ts +0 -0
- package/src/parquet-loader.ts +4 -24
- package/src/parquetjs/codecs/dictionary.ts +11 -0
- package/src/parquetjs/codecs/index.ts +13 -0
- package/src/parquetjs/codecs/rle.ts +4 -2
- package/src/parquetjs/compression.ts +89 -50
- package/src/parquetjs/{writer.ts → encoder/writer.ts} +46 -45
- package/src/parquetjs/parquet-thrift/CompressionCodec.ts +2 -1
- package/src/parquetjs/parser/decoders.ts +448 -0
- package/src/parquetjs/parser/parquet-cursor.ts +94 -0
- package/src/parquetjs/parser/parquet-envelope-reader.ts +210 -0
- package/src/parquetjs/parser/parquet-reader.ts +179 -0
- package/src/parquetjs/schema/declare.ts +48 -2
- package/src/parquetjs/schema/schema.ts +2 -0
- package/src/parquetjs/schema/shred.ts +3 -1
- package/src/parquetjs/schema/types.ts +82 -5
- package/src/parquetjs/utils/buffer-utils.ts +18 -0
- package/src/parquetjs/utils/file-utils.ts +96 -0
- package/src/parquetjs/{util.ts → utils/read-utils.ts} +13 -110
- package/dist/dist.es5.min.js +0 -51
- package/dist/dist.es5.min.js.map +0 -1
- package/dist/es5/parquetjs/compression.ts.disabled +0 -105
- package/dist/es5/parquetjs/reader.js +0 -1078
- package/dist/es5/parquetjs/reader.js.map +0 -1
- package/dist/es5/parquetjs/util.js.map +0 -1
- package/dist/es5/parquetjs/writer.js.map +0 -1
- package/dist/esm/libs/read-array-buffer.ts +0 -31
- package/dist/esm/parquetjs/compression.ts.disabled +0 -105
- package/dist/esm/parquetjs/reader.js +0 -524
- package/dist/esm/parquetjs/reader.js.map +0 -1
- package/dist/esm/parquetjs/util.js.map +0 -1
- package/dist/esm/parquetjs/writer.js.map +0 -1
- package/src/libs/read-array-buffer.ts +0 -31
- package/src/parquetjs/compression.ts.disabled +0 -105
- package/src/parquetjs/reader.ts +0 -707
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../src/lib/parse-parquet.ts"],"names":["parseParquet","arrayBuffer","options","blob","Blob","parseParquetFileInBatches","batch","ParquetReader","openBlob","reader","rows","cursor","getCursor","next","record","push","close"],"mappings":";;;;;;;;;;;;;;;;;;;;AAGA;;SAEsBA,Y;;;;;4EAAf,kBAA4BC,WAA5B,EAAsDC,OAAtD;AAAA;;AAAA;AAAA;AAAA;AAAA;AACCC,YAAAA,IADD,GACQ,IAAIC,IAAJ,CAAS,CAACH,WAAD,CAAT,CADR;AAAA;AAAA;AAAA;AAAA,qDAEqBI,yBAAyB,CAACF,IAAD,EAAOD,OAAP,CAF9C;;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAEYI,YAAAA,KAFZ;AAAA,8CAGIA,KAHJ;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA,8CAKE,IALF;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;SAQgBD,yB;;;;;2FAAhB,iBAA0CF,IAA1C,EAAsDD,OAAtD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,sDACgBK,6BAAcC,QAAd,CAAuBL,IAAvB,CADhB;;AAAA;AACCM,YAAAA,MADD;AAECC,YAAAA,IAFD,GAEiB,EAFjB;AAAA;AAIGC,YAAAA,MAJH,GAIYF,MAAM,CAACG,SAAP,EAJZ;;AAAA;AAAA;AAAA,sDAMoBD,MAAM,CAACE,IAAP,EANpB;;AAAA;AAAA,kBAMKC,MANL;AAAA;AAAA;AAAA;;AAODJ,YAAAA,IAAI,CAACK,IAAL,CAAUD,MAAV;AAPC;AAAA;;AAAA;AAAA;AAAA;AAAA,sDAUGL,MAAM,CAACO,KAAP,EAVH;;AAAA;AAAA;;AAAA;AAAA;AAYL,mBAAMN,IAAN;;AAZK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G","sourcesContent":["// import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';\nimport type {ParquetLoaderOptions} from '../parquet-loader';\n\nimport {ParquetReader} from '../parquetjs/parser/parquet-reader';\n\nexport async function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {\n const blob = new Blob([arrayBuffer]);\n for await (const batch of parseParquetFileInBatches(blob, options)) {\n return batch;\n }\n return null;\n}\n\nexport async function* parseParquetFileInBatches(blob: Blob, options?: ParquetLoaderOptions) {\n const reader = await ParquetReader.openBlob(blob);\n const rows: any[][] = [];\n try {\n const cursor = reader.getCursor();\n let record: any[] | null;\n while ((record = await cursor.next())) {\n rows.push(record);\n }\n } finally {\n await reader.close();\n }\n yield rows;\n}\n"],"file":"parse-parquet.js"}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
|
|
4
|
+
|
|
5
|
+
Object.defineProperty(exports, "__esModule", {
|
|
6
|
+
value: true
|
|
7
|
+
});
|
|
8
|
+
exports.readArrayBuffer = readArrayBuffer;
|
|
9
|
+
|
|
10
|
+
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
|
|
11
|
+
|
|
12
|
+
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
|
|
13
|
+
|
|
14
|
+
function readArrayBuffer(_x, _x2, _x3) {
|
|
15
|
+
return _readArrayBuffer.apply(this, arguments);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function _readArrayBuffer() {
|
|
19
|
+
_readArrayBuffer = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee(file, start, length) {
|
|
20
|
+
var slice;
|
|
21
|
+
return _regenerator.default.wrap(function _callee$(_context) {
|
|
22
|
+
while (1) {
|
|
23
|
+
switch (_context.prev = _context.next) {
|
|
24
|
+
case 0:
|
|
25
|
+
if (!(file instanceof Blob)) {
|
|
26
|
+
_context.next = 5;
|
|
27
|
+
break;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
slice = file.slice(start, start + length);
|
|
31
|
+
_context.next = 4;
|
|
32
|
+
return slice.arrayBuffer();
|
|
33
|
+
|
|
34
|
+
case 4:
|
|
35
|
+
return _context.abrupt("return", _context.sent);
|
|
36
|
+
|
|
37
|
+
case 5:
|
|
38
|
+
_context.next = 7;
|
|
39
|
+
return file.read(start, start + length);
|
|
40
|
+
|
|
41
|
+
case 7:
|
|
42
|
+
return _context.abrupt("return", _context.sent);
|
|
43
|
+
|
|
44
|
+
case 8:
|
|
45
|
+
case "end":
|
|
46
|
+
return _context.stop();
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}, _callee);
|
|
50
|
+
}));
|
|
51
|
+
return _readArrayBuffer.apply(this, arguments);
|
|
52
|
+
}
|
|
53
|
+
//# sourceMappingURL=read-array-buffer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../src/lib/read-array-buffer.ts"],"names":["readArrayBuffer","file","start","length","Blob","slice","arrayBuffer","read"],"mappings":";;;;;;;;;;;;;SAEsBA,e;;;;;+EAAf,iBACLC,IADK,EAELC,KAFK,EAGLC,MAHK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAKDF,IAAI,YAAYG,IALf;AAAA;AAAA;AAAA;;AAMGC,YAAAA,KANH,GAMWJ,IAAI,CAACI,KAAL,CAAWH,KAAX,EAAkBA,KAAK,GAAGC,MAA1B,CANX;AAAA;AAAA,mBAOUE,KAAK,CAACC,WAAN,EAPV;;AAAA;AAAA;;AAAA;AAAA;AAAA,mBASQL,IAAI,CAACM,IAAL,CAAUL,KAAV,EAAiBA,KAAK,GAAGC,MAAzB,CATR;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G","sourcesContent":["// Random-Access read\n\nexport async function readArrayBuffer(\n file: Blob | ArrayBuffer | any,\n start: number,\n length: number\n): Promise<ArrayBuffer> {\n if (file instanceof Blob) {\n const slice = file.slice(start, start + length);\n return await slice.arrayBuffer();\n }\n return await file.read(start, start + length);\n}\n\n/**\n * Read a slice of a Blob or File, without loading the entire file into memory\n * The trick when reading File objects is to read successive \"slices\" of the File\n * Per spec https://w3c.github.io/FileAPI/, slicing a File only updates the start and end fields\n * Actually reading from file happens in `readAsArrayBuffer`\n * @param blob to read\n export async function readBlob(blob: Blob): Promise<ArrayBuffer> {\n return await new Promise((resolve, reject) => {\n const fileReader = new FileReader();\n fileReader.onload = (event: ProgressEvent<FileReader>) =>\n resolve(event?.target?.result as ArrayBuffer);\n // TODO - reject with a proper Error\n fileReader.onerror = (error: ProgressEvent<FileReader>) => reject(error);\n fileReader.readAsArrayBuffer(blob);\n });\n}\n*/\n"],"file":"read-array-buffer.js"}
|
|
@@ -1,32 +1,17 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
|
|
3
|
-
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
|
|
4
|
-
|
|
5
3
|
Object.defineProperty(exports, "__esModule", {
|
|
6
4
|
value: true
|
|
7
5
|
});
|
|
8
|
-
exports.
|
|
9
|
-
|
|
10
|
-
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
|
|
11
|
-
|
|
12
|
-
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
|
|
13
|
-
|
|
14
|
-
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
|
|
15
|
-
|
|
16
|
-
var _reader = require("./parquetjs/reader");
|
|
17
|
-
|
|
18
|
-
function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) { symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); } keys.push.apply(keys, symbols); } return keys; }
|
|
19
|
-
|
|
20
|
-
function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { (0, _defineProperty2.default)(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; }
|
|
21
|
-
|
|
22
|
-
var VERSION = typeof "3.0.12" !== 'undefined' ? "3.0.12" : 'latest';
|
|
6
|
+
exports._typecheckParquetLoader = exports.ParquetLoader = void 0;
|
|
7
|
+
var VERSION = typeof "3.1.0-alpha.4" !== 'undefined' ? "3.1.0-alpha.4" : 'latest';
|
|
23
8
|
var DEFAULT_PARQUET_LOADER_OPTIONS = {
|
|
24
9
|
parquet: {
|
|
25
10
|
type: 'object-row-table',
|
|
26
11
|
url: undefined
|
|
27
12
|
}
|
|
28
13
|
};
|
|
29
|
-
var
|
|
14
|
+
var ParquetLoader = {
|
|
30
15
|
name: 'Apache Parquet',
|
|
31
16
|
id: 'parquet',
|
|
32
17
|
module: 'parquet',
|
|
@@ -36,68 +21,10 @@ var ParquetWorkerLoader = {
|
|
|
36
21
|
extensions: ['parquet'],
|
|
37
22
|
mimeTypes: ['application/octet-stream'],
|
|
38
23
|
binary: true,
|
|
24
|
+
tests: ['PAR1', 'PARE'],
|
|
39
25
|
options: DEFAULT_PARQUET_LOADER_OPTIONS
|
|
40
26
|
};
|
|
41
|
-
exports.ParquetWorkerLoader = ParquetWorkerLoader;
|
|
42
|
-
|
|
43
|
-
var ParquetLoader = _objectSpread(_objectSpread({}, ParquetWorkerLoader), {}, {
|
|
44
|
-
parse: parse
|
|
45
|
-
});
|
|
46
|
-
|
|
47
27
|
exports.ParquetLoader = ParquetLoader;
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
return _parse.apply(this, arguments);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
function _parse() {
|
|
54
|
-
_parse = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee(arrayBuffer, options) {
|
|
55
|
-
var reader, rows, cursor, record;
|
|
56
|
-
return _regenerator.default.wrap(function _callee$(_context) {
|
|
57
|
-
while (1) {
|
|
58
|
-
switch (_context.prev = _context.next) {
|
|
59
|
-
case 0:
|
|
60
|
-
_context.next = 2;
|
|
61
|
-
return _reader.ParquetReader.openArrayBuffer(arrayBuffer);
|
|
62
|
-
|
|
63
|
-
case 2:
|
|
64
|
-
reader = _context.sent;
|
|
65
|
-
rows = [];
|
|
66
|
-
_context.prev = 4;
|
|
67
|
-
cursor = reader.getCursor();
|
|
68
|
-
|
|
69
|
-
case 6:
|
|
70
|
-
_context.next = 8;
|
|
71
|
-
return cursor.next();
|
|
72
|
-
|
|
73
|
-
case 8:
|
|
74
|
-
if (!(record = _context.sent)) {
|
|
75
|
-
_context.next = 12;
|
|
76
|
-
break;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
rows.push(record);
|
|
80
|
-
_context.next = 6;
|
|
81
|
-
break;
|
|
82
|
-
|
|
83
|
-
case 12:
|
|
84
|
-
_context.prev = 12;
|
|
85
|
-
_context.next = 15;
|
|
86
|
-
return reader.close();
|
|
87
|
-
|
|
88
|
-
case 15:
|
|
89
|
-
return _context.finish(12);
|
|
90
|
-
|
|
91
|
-
case 16:
|
|
92
|
-
return _context.abrupt("return", rows);
|
|
93
|
-
|
|
94
|
-
case 17:
|
|
95
|
-
case "end":
|
|
96
|
-
return _context.stop();
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
}, _callee, null, [[4,, 12, 16]]);
|
|
100
|
-
}));
|
|
101
|
-
return _parse.apply(this, arguments);
|
|
102
|
-
}
|
|
28
|
+
var _typecheckParquetLoader = ParquetLoader;
|
|
29
|
+
exports._typecheckParquetLoader = _typecheckParquetLoader;
|
|
103
30
|
//# sourceMappingURL=parquet-loader.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/parquet-loader.ts"],"names":["VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","parquet","type","url","undefined","
|
|
1
|
+
{"version":3,"sources":["../../src/parquet-loader.ts"],"names":["VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","parquet","type","url","undefined","ParquetLoader","name","id","module","version","worker","category","extensions","mimeTypes","binary","tests","options","_typecheckParquetLoader"],"mappings":";;;;;;AAIA,IAAMA,OAAO,GAAG,2BAAuB,WAAvB,qBAAmD,QAAnE;AASA,IAAMC,8BAAoD,GAAG;AAC3DC,EAAAA,OAAO,EAAE;AACPC,IAAAA,IAAI,EAAE,kBADC;AAEPC,IAAAA,GAAG,EAAEC;AAFE;AADkD,CAA7D;AAQO,IAAMC,aAAa,GAAG;AAC3BC,EAAAA,IAAI,EAAE,gBADqB;AAE3BC,EAAAA,EAAE,EAAE,SAFuB;AAG3BC,EAAAA,MAAM,EAAE,SAHmB;AAI3BC,EAAAA,OAAO,EAAEV,OAJkB;AAK3BW,EAAAA,MAAM,EAAE,IALmB;AAM3BC,EAAAA,QAAQ,EAAE,OANiB;AAO3BC,EAAAA,UAAU,EAAE,CAAC,SAAD,CAPe;AAQ3BC,EAAAA,SAAS,EAAE,CAAC,0BAAD,CARgB;AAS3BC,EAAAA,MAAM,EAAE,IATmB;AAU3BC,EAAAA,KAAK,EAAE,CAAC,MAAD,EAAS,MAAT,CAVoB;AAW3BC,EAAAA,OAAO,EAAEhB;AAXkB,CAAtB;;AAcA,IAAMiB,uBAA+B,GAAGZ,aAAxC","sourcesContent":["import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';\n\n// __VERSION__ is injected by babel-plugin-version-inline\n// @ts-ignore TS2304: Cannot find name '__VERSION__'.\nconst VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';\n\nexport type ParquetLoaderOptions = LoaderOptions & {\n parquet?: {\n type?: 'object-row-table';\n url?: string;\n };\n};\n\nconst DEFAULT_PARQUET_LOADER_OPTIONS: ParquetLoaderOptions = {\n parquet: {\n type: 'object-row-table',\n url: undefined\n }\n};\n\n/** ParquetJS table loader */\nexport const ParquetLoader = {\n name: 'Apache Parquet',\n id: 'parquet',\n module: 'parquet',\n version: VERSION,\n worker: true,\n category: 'table',\n extensions: ['parquet'],\n mimeTypes: ['application/octet-stream'],\n binary: true,\n tests: ['PAR1', 'PARE'],\n options: DEFAULT_PARQUET_LOADER_OPTIONS\n};\n\nexport const _typecheckParquetLoader: Loader = ParquetLoader;\n"],"file":"parquet-loader.js"}
|
|
@@ -4,7 +4,7 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
4
4
|
value: true
|
|
5
5
|
});
|
|
6
6
|
exports.ParquetWriter = void 0;
|
|
7
|
-
var VERSION = typeof "3.0.
|
|
7
|
+
var VERSION = typeof "3.1.0-alpha.4" !== 'undefined' ? "3.1.0-alpha.4" : 'latest';
|
|
8
8
|
var DEFAULT_PARQUET_LOADER_OPTIONS = {};
|
|
9
9
|
var ParquetWriter = {
|
|
10
10
|
name: 'Apache Parquet',
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/parquet-writer.ts"],"names":["VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","ParquetWriter","name","id","module","version","extensions","mimeTypes","encodeSync","binary","options","data","ArrayBuffer"],"mappings":";;;;;;AAIA,IAAMA,OAAO,GAAG,
|
|
1
|
+
{"version":3,"sources":["../../src/parquet-writer.ts"],"names":["VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","ParquetWriter","name","id","module","version","extensions","mimeTypes","encodeSync","binary","options","data","ArrayBuffer"],"mappings":";;;;;;AAIA,IAAMA,OAAO,GAAG,2BAAuB,WAAvB,qBAAmD,QAAnE;AAIA,IAAMC,8BAA8B,GAAG,EAAvC;AAEO,IAAMC,aAAqB,GAAG;AACnCC,EAAAA,IAAI,EAAE,gBAD6B;AAEnCC,EAAAA,EAAE,EAAE,SAF+B;AAGnCC,EAAAA,MAAM,EAAE,SAH2B;AAInCC,EAAAA,OAAO,EAAEN,OAJ0B;AAKnCO,EAAAA,UAAU,EAAE,CAAC,SAAD,CALuB;AAMnCC,EAAAA,SAAS,EAAE,CAAC,0BAAD,CANwB;AAOnCC,EAAAA,UAAU,EAAVA,UAPmC;AAQnCC,EAAAA,MAAM,EAAE,IAR2B;AASnCC,EAAAA,OAAO,EAAEV;AAT0B,CAA9B;;;AAYP,SAASQ,UAAT,CAAoBG,IAApB,EAA0BD,OAA1B,EAA0D;AACxD,SAAO,IAAIE,WAAJ,CAAgB,CAAhB,CAAP;AACD","sourcesContent":["import type {Writer} from '@loaders.gl/loader-utils';\n\n// __VERSION__ is injected by babel-plugin-version-inline\n// @ts-ignore TS2304: Cannot find name '__VERSION__'.\nconst VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';\n\nexport type ParquetWriterOptions = {};\n\nconst DEFAULT_PARQUET_LOADER_OPTIONS = {};\n\nexport const ParquetWriter: Writer = {\n name: 'Apache Parquet',\n id: 'parquet',\n module: 'parquet',\n version: VERSION,\n extensions: ['parquet'],\n mimeTypes: ['application/octet-stream'],\n encodeSync,\n binary: true,\n options: DEFAULT_PARQUET_LOADER_OPTIONS\n};\n\nfunction encodeSync(data, options?: ParquetWriterOptions) {\n return new ArrayBuffer(0);\n}\n"],"file":"parquet-writer.js"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
|
|
4
|
+
|
|
5
|
+
Object.defineProperty(exports, "__esModule", {
|
|
6
|
+
value: true
|
|
7
|
+
});
|
|
8
|
+
exports.decodeValues = decodeValues;
|
|
9
|
+
exports.encodeValues = encodeValues;
|
|
10
|
+
|
|
11
|
+
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
|
|
12
|
+
|
|
13
|
+
var _rle = require("./rle");
|
|
14
|
+
|
|
15
|
+
function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) { symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); } keys.push.apply(keys, symbols); } return keys; }
|
|
16
|
+
|
|
17
|
+
function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { (0, _defineProperty2.default)(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; }
|
|
18
|
+
|
|
19
|
+
function decodeValues(type, cursor, count, opts) {
|
|
20
|
+
opts.bitWidth = cursor.buffer.slice(cursor.offset, cursor.offset + 1).readInt8(0);
|
|
21
|
+
cursor.offset += 1;
|
|
22
|
+
return (0, _rle.decodeValues)(type, cursor, count, _objectSpread(_objectSpread({}, opts), {}, {
|
|
23
|
+
disableEnvelope: true
|
|
24
|
+
}));
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function encodeValues(type, cursor, count, opts) {
|
|
28
|
+
throw new Error('Encode dictionary functionality is not supported');
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=dictionary.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../../../src/parquetjs/codecs/dictionary.ts"],"names":["decodeValues","type","cursor","count","opts","bitWidth","buffer","slice","offset","readInt8","disableEnvelope","encodeValues","Error"],"mappings":";;;;;;;;;;;;AAAA;;;;;;AAEO,SAASA,YAAT,CAAsBC,IAAtB,EAA4BC,MAA5B,EAAoCC,KAApC,EAA2CC,IAA3C,EAAiD;AACtDA,EAAAA,IAAI,CAACC,QAAL,GAAgBH,MAAM,CAACI,MAAP,CAAcC,KAAd,CAAoBL,MAAM,CAACM,MAA3B,EAAmCN,MAAM,CAACM,MAAP,GAAgB,CAAnD,EAAsDC,QAAtD,CAA+D,CAA/D,CAAhB;AACAP,EAAAA,MAAM,CAACM,MAAP,IAAiB,CAAjB;AACA,SAAO,uBAAgBP,IAAhB,EAAsBC,MAAtB,EAA8BC,KAA9B,kCAAyCC,IAAzC;AAA+CM,IAAAA,eAAe,EAAE;AAAhE,KAAP;AACD;;AAEM,SAASC,YAAT,CAAsBV,IAAtB,EAA4BC,MAA5B,EAAoCC,KAApC,EAA2CC,IAA3C,EAAiD;AACtD,QAAM,IAAIQ,KAAJ,CAAU,kDAAV,CAAN;AACD","sourcesContent":["import {decodeValues as decodeRleValues} from './rle';\n\nexport function decodeValues(type, cursor, count, opts) {\n opts.bitWidth = cursor.buffer.slice(cursor.offset, cursor.offset + 1).readInt8(0);\n cursor.offset += 1;\n return decodeRleValues(type, cursor, count, {...opts, disableEnvelope: true});\n}\n\nexport function encodeValues(type, cursor, count, opts) {\n throw new Error('Encode dictionary functionality is not supported');\n}\n"],"file":"dictionary.js"}
|
|
@@ -14,6 +14,8 @@ var PLAIN = _interopRequireWildcard(require("./plain"));
|
|
|
14
14
|
|
|
15
15
|
var RLE = _interopRequireWildcard(require("./rle"));
|
|
16
16
|
|
|
17
|
+
var DICTIONARY = _interopRequireWildcard(require("./dictionary"));
|
|
18
|
+
|
|
17
19
|
var _declare = require("./declare");
|
|
18
20
|
|
|
19
21
|
Object.keys(_declare).forEach(function (key) {
|
|
@@ -40,6 +42,14 @@ var PARQUET_CODECS = {
|
|
|
40
42
|
RLE: {
|
|
41
43
|
encodeValues: RLE.encodeValues,
|
|
42
44
|
decodeValues: RLE.decodeValues
|
|
45
|
+
},
|
|
46
|
+
PLAIN_DICTIONARY: {
|
|
47
|
+
encodeValues: DICTIONARY.encodeValues,
|
|
48
|
+
decodeValues: DICTIONARY.decodeValues
|
|
49
|
+
},
|
|
50
|
+
RLE_DICTIONARY: {
|
|
51
|
+
encodeValues: DICTIONARY.encodeValues,
|
|
52
|
+
decodeValues: DICTIONARY.decodeValues
|
|
43
53
|
}
|
|
44
54
|
};
|
|
45
55
|
exports.PARQUET_CODECS = PARQUET_CODECS;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../src/parquetjs/codecs/index.ts"],"names":["PARQUET_CODECS","PLAIN","encodeValues","decodeValues","RLE"],"mappings":";;;;;;;;;;;;AAGA;;AACA;;AAEA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;;;;AAEO,IAAMA,cAAqD,GAAG;AACnEC,EAAAA,KAAK,EAAE;AACLC,IAAAA,YAAY,EAAED,KAAK,CAACC,YADf;AAELC,IAAAA,YAAY,EAAEF,KAAK,CAACE;AAFf,GAD4D;AAKnEC,EAAAA,GAAG,EAAE;AACHF,IAAAA,YAAY,EAAEE,GAAG,CAACF,YADf;AAEHC,IAAAA,YAAY,EAAEC,GAAG,CAACD;AAFf;
|
|
1
|
+
{"version":3,"sources":["../../../../src/parquetjs/codecs/index.ts"],"names":["PARQUET_CODECS","PLAIN","encodeValues","decodeValues","RLE","PLAIN_DICTIONARY","DICTIONARY","RLE_DICTIONARY"],"mappings":";;;;;;;;;;;;AAGA;;AACA;;AACA;;AAEA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;;;;AAEO,IAAMA,cAAqD,GAAG;AACnEC,EAAAA,KAAK,EAAE;AACLC,IAAAA,YAAY,EAAED,KAAK,CAACC,YADf;AAELC,IAAAA,YAAY,EAAEF,KAAK,CAACE;AAFf,GAD4D;AAKnEC,EAAAA,GAAG,EAAE;AACHF,IAAAA,YAAY,EAAEE,GAAG,CAACF,YADf;AAEHC,IAAAA,YAAY,EAAEC,GAAG,CAACD;AAFf,GAL8D;AAUnEE,EAAAA,gBAAgB,EAAE;AAEhBH,IAAAA,YAAY,EAAEI,UAAU,CAACJ,YAFT;AAGhBC,IAAAA,YAAY,EAAEG,UAAU,CAACH;AAHT,GAViD;AAgBnEI,EAAAA,cAAc,EAAE;AAEdL,IAAAA,YAAY,EAAEI,UAAU,CAACJ,YAFX;AAGdC,IAAAA,YAAY,EAAEG,UAAU,CAACH;AAHX;AAhBmD,CAA9D","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport type {ParquetCodec} from '../schema/declare';\nimport type {ParquetCodecKit} from './declare';\nimport * as PLAIN from './plain';\nimport * as RLE from './rle';\nimport * as DICTIONARY from './dictionary';\n\nexport * from './declare';\n\nexport const PARQUET_CODECS: Record<ParquetCodec, ParquetCodecKit> = {\n PLAIN: {\n encodeValues: PLAIN.encodeValues,\n decodeValues: PLAIN.decodeValues\n },\n RLE: {\n encodeValues: RLE.encodeValues,\n decodeValues: RLE.decodeValues\n },\n // Using the PLAIN_DICTIONARY enum value is deprecated in the Parquet 2.0 specification.\n PLAIN_DICTIONARY: {\n // @ts-ignore\n encodeValues: DICTIONARY.encodeValues,\n decodeValues: DICTIONARY.decodeValues\n },\n // Prefer using RLE_DICTIONARY in a data page and PLAIN in a dictionary page for Parquet 2.0+ files.\n RLE_DICTIONARY: {\n // @ts-ignore\n encodeValues: DICTIONARY.encodeValues,\n decodeValues: DICTIONARY.decodeValues\n }\n};\n"],"file":"index.js"}
|
|
@@ -134,7 +134,7 @@ function decodeRunRepeated(cursor, count, opts) {
|
|
|
134
134
|
var value = 0;
|
|
135
135
|
|
|
136
136
|
for (var i = 0; i < Math.ceil(bitWidth / 8); i++) {
|
|
137
|
-
value
|
|
137
|
+
value << 8;
|
|
138
138
|
value += cursor.buffer[cursor.offset];
|
|
139
139
|
cursor.offset += 1;
|
|
140
140
|
}
|
|
@@ -166,7 +166,7 @@ function encodeRunRepeated(value, count, opts) {
|
|
|
166
166
|
|
|
167
167
|
for (var i = 0; i < buf.length; i++) {
|
|
168
168
|
buf.writeUInt8(value & 0xff, i);
|
|
169
|
-
value
|
|
169
|
+
value >> 8;
|
|
170
170
|
}
|
|
171
171
|
|
|
172
172
|
return Buffer.concat([Buffer.from(_varint.default.encode(count << 1)), buf]);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../../../src/parquetjs/codecs/rle.ts"],"names":["encodeValues","type","values","opts","Error","map","x","parseInt","buf","Buffer","alloc","run","repeats","i","length","concat","encodeRunBitpacked","encodeRunRepeated","push","disableEnvelope","envelope","writeUInt32LE","undefined","copy","decodeValues","cursor","count","offset","header","varint","decode","buffer","encodingLength","decodeRunBitpacked","decodeRunRepeated","slice","bitWidth","Array","fill","b","Math","floor","value","ceil","from","encode","writeUInt8"],"mappings":";;;;;;;;;;;;AAIA;;AAGO,SAASA,YAAT,CACLC,IADK,EAELC,MAFK,EAGLC,IAHK,EAIG;AACR,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,UAAQH,IAAR;AACE,SAAK,SAAL;AACA,SAAK,OAAL;AACA,SAAK,OAAL;AAEEC,MAAAA,MAAM,GAAGA,MAAM,CAACG,GAAP,CAAW,UAACC,CAAD;AAAA,eAAOC,QAAQ,CAACD,CAAD,EAAI,EAAJ,CAAf;AAAA,OAAX,CAAT;AACA;;AAEF;AACE,YAAM,IAAIF,KAAJ,6BAA+BH,IAA/B,EAAN;AATJ;;AAYA,MAAIO,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa,CAAb,CAAV;AACA,MAAIC,GAAU,GAAG,EAAjB;AACA,MAAIC,OAAO,GAAG,CAAd;;AAEA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAA3B,EAAmCD,CAAC,EAApC,EAAwC;AAGtC,QAAID,OAAO,KAAK,CAAZ,IAAiBD,GAAG,CAACG,MAAJ,GAAa,CAAb,KAAmB,CAApC,IAAyCZ,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAjE,EAA0E;AAExE,UAAIF,GAAG,CAACG,MAAR,EAAgB;AACdN,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACAQ,QAAAA,GAAG,GAAG,EAAN;AACD;;AACDC,MAAAA,OAAO,GAAG,CAAV;AACD,KAPD,MAOO,IAAIA,OAAO,GAAG,CAAV,IAAeV,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAvC,EAAgD;AACrDD,MAAAA,OAAO,IAAI,CAAX;AACD,KAFM,MAEA;AAEL,UAAIA,OAAJ,EAAa;AACXJ,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACW,CAAC,GAAG,CAAL,CAAP,EAAgBD,OAAhB,EAAyBT,IAAzB,CAAvB,CAAd,CAAN;AACAS,QAAAA,OAAO,GAAG,CAAV;AACD;;AACDD,MAAAA,GAAG,CAACO,IAAJ,CAAShB,MAAM,CAACW,CAAD,CAAf;AACD;AACF;;AAED,MAAID,OAAJ,EAAa;AACXJ,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACA,MAAM,CAACY,MAAP,GAAgB,CAAjB,CAAP,EAA4BF,OAA5B,EAAqCT,IAArC,CAAvB,CAAd,CAAN;AACD,GAFD,MAEO,IAAIQ,GAAG,CAACG,MAAR,EAAgB;AACrBN,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACD;;AAED,MAAIA,IAAI,CAACgB,eAAT,EAA0B;AACxB,WAAOX,GAAP;AACD;;AAED,MAAMY,QAAQ,GAAGX,MAAM,CAACC,KAAP,CAAaF,GAAG,CAACM,MAAJ,GAAa,CAA1B,CAAjB;AACAM,EAAAA,QAAQ,CAACC,aAAT,CAAuBb,GAAG,CAACM,MAA3B,EAAmCQ,SAAnC;AACAd,EAAAA,GAAG,CAACe,IAAJ,CAASH,QAAT,EAAmB,CAAnB;AAEA,SAAOA,QAAP;AACD;;AAEM,SAASI,YAAT,CACLvB,IADK,EAELwB,MAFK,EAGLC,KAHK,EAILvB,IAJK,EAKK;AACV,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,MAAI,CAACD,IAAI,CAACgB,eAAV,EAA2B;AACzBM,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAED,MAAIzB,MAAgB,GAAG,EAAvB;;AACA,SAAOA,MAAM,CAACY,MAAP,GAAgBY,KAAvB,EAA8B;AAC5B,QAAME,MAAM,GAAGC,gBAAOC,MAAP,CAAcL,MAAM,CAACM,MAArB,EAA6BN,MAAM,CAACE,MAApC,CAAf;;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiBE,gBAAOG,cAAP,CAAsBJ,MAAtB,CAAjB;;AACA,QAAIA,MAAM,GAAG,CAAb,EAAgB;AAAA;;AACd,UAAMF,MAAK,GAAG,CAACE,MAAM,IAAI,CAAX,IAAgB,CAA9B;;AACA,iBAAA1B,MAAM,EAACgB,IAAP,iDAAee,kBAAkB,CAACR,MAAD,EAASC,MAAT,EAAgBvB,IAAhB,CAAjC;AACD,KAHD,MAGO;AAAA;;AACL,UAAMuB,OAAK,GAAGE,MAAM,IAAI,CAAxB;;AACA,kBAAA1B,MAAM,EAACgB,IAAP,kDAAegB,iBAAiB,CAACT,MAAD,EAASC,OAAT,EAAgBvB,IAAhB,CAAhC;AACD;AACF;;AACDD,EAAAA,MAAM,GAAGA,MAAM,CAACiC,KAAP,CAAa,CAAb,EAAgBT,KAAhB,CAAT;;AAEA,MAAIxB,MAAM,CAACY,MAAP,KAAkBY,KAAtB,EAA6B;AAC3B,UAAM,IAAItB,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,SAAOF,MAAP;AACD;;AAED,SAAS+B,kBAAT,CACER,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;;AAEA,MAAIV,KAAK,GAAG,CAAR,KAAc,CAAlB,EAAqB;AACnB,UAAM,IAAItB,KAAJ,CAAU,yBAAV,CAAN;AACD;;AAGD,MAAMF,MAAM,GAAG,IAAImC,KAAJ,CAAUX,KAAV,EAAiBY,IAAjB,CAAsB,CAAtB,CAAf;;AACA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGV,KAA/B,EAAsCa,CAAC,EAAvC,EAA2C;AACzC,QAAId,MAAM,CAACM,MAAP,CAAcN,MAAM,CAACE,MAAP,GAAgBa,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAA9B,IAAoD,KAAKA,CAAC,GAAG,CAAjE,EAAqE;AACnErC,MAAAA,MAAM,CAACsC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,IAAoC,KAAKG,CAAC,GAAGH,QAA7C;AACD;AACF;;AAEDX,EAAAA,MAAM,CAACE,MAAP,IAAiBS,QAAQ,IAAIV,KAAK,GAAG,CAAZ,CAAzB;AACA,SAAOxB,MAAP;AACD;;AAED,SAASgC,iBAAT,CACET,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;AAEA,MAAIM,KAAK,GAAG,CAAZ;;AACA,OAAK,IAAI7B,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAG2B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAApB,EAA6CvB,CAAC,EAA9C,EAAkD;AAChD6B,IAAAA,KAAK,KAAK,CAAV;AACAA,IAAAA,KAAK,IAAIjB,MAAM,CAACM,MAAP,CAAcN,MAAM,CAACE,MAArB,CAAT;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAGD,SAAO,IAAIU,KAAJ,CAAUX,KAAV,EAAiBY,IAAjB,CAAsBI,KAAtB,CAAP;AACD;;AAED,SAAS1B,kBAAT,CAA4Bd,MAA5B,EAA8CC,IAA9C,EAAiF;AAE/E,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;;AAEA,OAAK,IAAIvB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAAP,GAAgB,CAApC,EAAuCD,CAAC,EAAxC,EAA4C;AAC1CX,IAAAA,MAAM,CAACgB,IAAP,CAAY,CAAZ;AACD;;AAED,MAAMV,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa8B,IAAI,CAACG,IAAL,CAAUP,QAAQ,IAAIlC,MAAM,CAACY,MAAP,GAAgB,CAApB,CAAlB,CAAb,CAAZ;;AACA,OAAK,IAAIyB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGlC,MAAM,CAACY,MAAtC,EAA8CyB,CAAC,EAA/C,EAAmD;AACjD,QAAI,CAACrC,MAAM,CAACsC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,GAAoC,KAAKG,CAAC,GAAGH,QAA9C,IAA2D,CAA/D,EAAkE;AAChE5B,MAAAA,GAAG,CAACgC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAAD,CAAH,IAA0B,KAAKA,CAAC,GAAG,CAAnC;AACD;AACF;;AAED,SAAO9B,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACmC,IAAP,CAAYf,gBAAOgB,MAAP,CAAgB3C,MAAM,CAACY,MAAP,GAAgB,CAAjB,IAAuB,CAAxB,GAA6B,CAA3C,CAAZ,CAAD,EAA6DN,GAA7D,CAAd,CAAP;AACD;;AAED,SAASS,iBAAT,CAA2ByB,KAA3B,EAA0ChB,KAA1C,EAAyDvB,IAAzD,EAA4F;AAE1F,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;AAEA,MAAM5B,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa8B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAAb,CAAZ;;AAEA,OAAK,IAAIvB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGL,GAAG,CAACM,MAAxB,EAAgCD,CAAC,EAAjC,EAAqC;AACnCL,IAAAA,GAAG,CAACsC,UAAJ,CAAeJ,KAAK,GAAG,IAAvB,EAA6B7B,CAA7B;AACA6B,IAAAA,KAAK,KAAK,CAAV;AACD;;AAED,SAAOjC,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACmC,IAAP,CAAYf,gBAAOgB,MAAP,CAAcnB,KAAK,IAAI,CAAvB,CAAZ,CAAD,EAAyClB,GAAzC,CAAd,CAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport type {PrimitiveType} from '../schema/declare';\nimport type {CursorBuffer, ParquetCodecOptions} from './declare';\nimport varint from 'varint';\n\n// eslint-disable-next-line max-statements, complexity\nexport function encodeValues(\n type: PrimitiveType,\n values: any[],\n opts: ParquetCodecOptions\n): Buffer {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n switch (type) {\n case 'BOOLEAN':\n case 'INT32':\n case 'INT64':\n // tslint:disable-next-line:no-parameter-reassignment\n values = values.map((x) => parseInt(x, 10));\n break;\n\n default:\n throw new Error(`unsupported type: ${type}`);\n }\n\n let buf = Buffer.alloc(0);\n let run: any[] = [];\n let repeats = 0;\n\n for (let i = 0; i < values.length; i++) {\n // If we are at the beginning of a run and the next value is same we start\n // collecting repeated values\n if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {\n // If we have any data in runs we need to encode them\n if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n run = [];\n }\n repeats = 1;\n } else if (repeats > 0 && values[i] === values[i - 1]) {\n repeats += 1;\n } else {\n // If values changes we need to post any previous repeated values\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);\n repeats = 0;\n }\n run.push(values[i]);\n }\n }\n\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);\n } else if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n }\n\n if (opts.disableEnvelope) {\n return buf;\n }\n\n const envelope = Buffer.alloc(buf.length + 4);\n envelope.writeUInt32LE(buf.length, undefined);\n buf.copy(envelope, 4);\n\n return envelope;\n}\n\nexport function decodeValues(\n type: PrimitiveType,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n if (!opts.disableEnvelope) {\n cursor.offset += 4;\n }\n\n let values: number[] = [];\n while (values.length < count) {\n const header = varint.decode(cursor.buffer, cursor.offset);\n cursor.offset += varint.encodingLength(header);\n if (header & 1) {\n const count = (header >> 1) * 8;\n values.push(...decodeRunBitpacked(cursor, count, opts));\n } else {\n const count = header >> 1;\n values.push(...decodeRunRepeated(cursor, count, opts));\n }\n }\n values = values.slice(0, count);\n\n if (values.length !== count) {\n throw new Error('invalid RLE encoding');\n }\n\n return values;\n}\n\nfunction decodeRunBitpacked(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n if (count % 8 !== 0) {\n throw new Error('must be a multiple of 8');\n }\n\n // tslint:disable-next-line:prefer-array-literal\n const values = new Array(count).fill(0);\n for (let b = 0; b < bitWidth * count; b++) {\n if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) {\n values[Math.floor(b / bitWidth)] |= 1 << b % bitWidth;\n }\n }\n\n cursor.offset += bitWidth * (count / 8);\n return values;\n}\n\nfunction decodeRunRepeated(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n let value = 0;\n for (let i = 0; i < Math.ceil(bitWidth / 8); i++) {\n value <<= 8;\n value += cursor.buffer[cursor.offset];\n cursor.offset += 1;\n }\n\n // tslint:disable-next-line:prefer-array-literal\n return new Array(count).fill(value);\n}\n\nfunction encodeRunBitpacked(values: number[], opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n for (let i = 0; i < values.length % 8; i++) {\n values.push(0);\n }\n\n const buf = Buffer.alloc(Math.ceil(bitWidth * (values.length / 8)));\n for (let b = 0; b < bitWidth * values.length; b++) {\n if ((values[Math.floor(b / bitWidth)] & (1 << b % bitWidth)) > 0) {\n buf[Math.floor(b / 8)] |= 1 << b % 8;\n }\n }\n\n return Buffer.concat([Buffer.from(varint.encode(((values.length / 8) << 1) | 1)), buf]);\n}\n\nfunction encodeRunRepeated(value: number, count: number, opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n const buf = Buffer.alloc(Math.ceil(bitWidth / 8));\n\n for (let i = 0; i < buf.length; i++) {\n buf.writeUInt8(value & 0xff, i);\n value >>= 8;\n }\n\n return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf]);\n}\n"],"file":"rle.js"}
|
|
1
|
+
{"version":3,"sources":["../../../../src/parquetjs/codecs/rle.ts"],"names":["encodeValues","type","values","opts","Error","map","x","parseInt","buf","Buffer","alloc","run","repeats","i","length","concat","encodeRunBitpacked","encodeRunRepeated","push","disableEnvelope","envelope","writeUInt32LE","undefined","copy","decodeValues","cursor","count","offset","header","varint","decode","buffer","encodingLength","decodeRunBitpacked","decodeRunRepeated","slice","bitWidth","Array","fill","b","Math","floor","value","ceil","from","encode","writeUInt8"],"mappings":";;;;;;;;;;;;AAIA;;AAGO,SAASA,YAAT,CACLC,IADK,EAELC,MAFK,EAGLC,IAHK,EAIG;AACR,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,UAAQH,IAAR;AACE,SAAK,SAAL;AACA,SAAK,OAAL;AACA,SAAK,OAAL;AAEEC,MAAAA,MAAM,GAAGA,MAAM,CAACG,GAAP,CAAW,UAACC,CAAD;AAAA,eAAOC,QAAQ,CAACD,CAAD,EAAI,EAAJ,CAAf;AAAA,OAAX,CAAT;AACA;;AAEF;AACE,YAAM,IAAIF,KAAJ,6BAA+BH,IAA/B,EAAN;AATJ;;AAYA,MAAIO,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa,CAAb,CAAV;AACA,MAAIC,GAAU,GAAG,EAAjB;AACA,MAAIC,OAAO,GAAG,CAAd;;AAEA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAA3B,EAAmCD,CAAC,EAApC,EAAwC;AAGtC,QAAID,OAAO,KAAK,CAAZ,IAAiBD,GAAG,CAACG,MAAJ,GAAa,CAAb,KAAmB,CAApC,IAAyCZ,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAjE,EAA0E;AAExE,UAAIF,GAAG,CAACG,MAAR,EAAgB;AACdN,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACAQ,QAAAA,GAAG,GAAG,EAAN;AACD;;AACDC,MAAAA,OAAO,GAAG,CAAV;AACD,KAPD,MAOO,IAAIA,OAAO,GAAG,CAAV,IAAeV,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAvC,EAAgD;AACrDD,MAAAA,OAAO,IAAI,CAAX;AACD,KAFM,MAEA;AAEL,UAAIA,OAAJ,EAAa;AACXJ,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACW,CAAC,GAAG,CAAL,CAAP,EAAgBD,OAAhB,EAAyBT,IAAzB,CAAvB,CAAd,CAAN;AACAS,QAAAA,OAAO,GAAG,CAAV;AACD;;AACDD,MAAAA,GAAG,CAACO,IAAJ,CAAShB,MAAM,CAACW,CAAD,CAAf;AACD;AACF;;AAED,MAAID,OAAJ,EAAa;AACXJ,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACA,MAAM,CAACY,MAAP,GAAgB,CAAjB,CAAP,EAA4BF,OAA5B,EAAqCT,IAArC,CAAvB,CAAd,CAAN;AACD,GAFD,MAEO,IAAIQ,GAAG,CAACG,MAAR,EAAgB;AACrBN,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACD;;AAED,MAAIA,IAAI,CAACgB,eAAT,EAA0B;AACxB,WAAOX,GAAP;AACD;;AAED,MAAMY,QAAQ,GAAGX,MAAM,CAACC,KAAP,CAAaF,GAAG,CAACM,MAAJ,GAAa,CAA1B,CAAjB;AACAM,EAAAA,QAAQ,CAACC,aAAT,CAAuBb,GAAG,CAACM,MAA3B,EAAmCQ,SAAnC;AACAd,EAAAA,GAAG,CAACe,IAAJ,CAASH,QAAT,EAAmB,CAAnB;AAEA,SAAOA,QAAP;AACD;;AAEM,SAASI,YAAT,CACLvB,IADK,EAELwB,MAFK,EAGLC,KAHK,EAILvB,IAJK,EAKK;AACV,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,MAAI,CAACD,IAAI,CAACgB,eAAV,EAA2B;AACzBM,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAED,MAAIzB,MAAgB,GAAG,EAAvB;;AACA,SAAOA,MAAM,CAACY,MAAP,GAAgBY,KAAvB,EAA8B;AAC5B,QAAME,MAAM,GAAGC,gBAAOC,MAAP,CAAcL,MAAM,CAACM,MAArB,EAA6BN,MAAM,CAACE,MAApC,CAAf;;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiBE,gBAAOG,cAAP,CAAsBJ,MAAtB,CAAjB;;AACA,QAAIA,MAAM,GAAG,CAAb,EAAgB;AAAA;;AACd,UAAMF,MAAK,GAAG,CAACE,MAAM,IAAI,CAAX,IAAgB,CAA9B;;AACA,iBAAA1B,MAAM,EAACgB,IAAP,iDAAee,kBAAkB,CAACR,MAAD,EAASC,MAAT,EAAgBvB,IAAhB,CAAjC;AACD,KAHD,MAGO;AAAA;;AACL,UAAMuB,OAAK,GAAGE,MAAM,IAAI,CAAxB;;AACA,kBAAA1B,MAAM,EAACgB,IAAP,kDAAegB,iBAAiB,CAACT,MAAD,EAASC,OAAT,EAAgBvB,IAAhB,CAAhC;AACD;AACF;;AACDD,EAAAA,MAAM,GAAGA,MAAM,CAACiC,KAAP,CAAa,CAAb,EAAgBT,KAAhB,CAAT;;AAEA,MAAIxB,MAAM,CAACY,MAAP,KAAkBY,KAAtB,EAA6B;AAC3B,UAAM,IAAItB,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,SAAOF,MAAP;AACD;;AAED,SAAS+B,kBAAT,CACER,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;;AAEA,MAAIV,KAAK,GAAG,CAAR,KAAc,CAAlB,EAAqB;AACnB,UAAM,IAAItB,KAAJ,CAAU,yBAAV,CAAN;AACD;;AAGD,MAAMF,MAAM,GAAG,IAAImC,KAAJ,CAAUX,KAAV,EAAiBY,IAAjB,CAAsB,CAAtB,CAAf;;AACA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGV,KAA/B,EAAsCa,CAAC,EAAvC,EAA2C;AACzC,QAAId,MAAM,CAACM,MAAP,CAAcN,MAAM,CAACE,MAAP,GAAgBa,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAA9B,IAAoD,KAAKA,CAAC,GAAG,CAAjE,EAAqE;AACnErC,MAAAA,MAAM,CAACsC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,IAAoC,KAAKG,CAAC,GAAGH,QAA7C;AACD;AACF;;AAEDX,EAAAA,MAAM,CAACE,MAAP,IAAiBS,QAAQ,IAAIV,KAAK,GAAG,CAAZ,CAAzB;AACA,SAAOxB,MAAP;AACD;;AAED,SAASgC,iBAAT,CACET,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;AAEA,MAAIM,KAAK,GAAG,CAAZ;;AACA,OAAK,IAAI7B,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAG2B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAApB,EAA6CvB,CAAC,EAA9C,EAAkD;AAEhD6B,IAAAA,KAAK,IAAI,CAAT;AACAA,IAAAA,KAAK,IAAIjB,MAAM,CAACM,MAAP,CAAcN,MAAM,CAACE,MAArB,CAAT;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAGD,SAAO,IAAIU,KAAJ,CAAUX,KAAV,EAAiBY,IAAjB,CAAsBI,KAAtB,CAAP;AACD;;AAED,SAAS1B,kBAAT,CAA4Bd,MAA5B,EAA8CC,IAA9C,EAAiF;AAE/E,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;;AAEA,OAAK,IAAIvB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAAP,GAAgB,CAApC,EAAuCD,CAAC,EAAxC,EAA4C;AAC1CX,IAAAA,MAAM,CAACgB,IAAP,CAAY,CAAZ;AACD;;AAED,MAAMV,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa8B,IAAI,CAACG,IAAL,CAAUP,QAAQ,IAAIlC,MAAM,CAACY,MAAP,GAAgB,CAApB,CAAlB,CAAb,CAAZ;;AACA,OAAK,IAAIyB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGlC,MAAM,CAACY,MAAtC,EAA8CyB,CAAC,EAA/C,EAAmD;AACjD,QAAI,CAACrC,MAAM,CAACsC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,GAAoC,KAAKG,CAAC,GAAGH,QAA9C,IAA2D,CAA/D,EAAkE;AAChE5B,MAAAA,GAAG,CAACgC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAAD,CAAH,IAA0B,KAAKA,CAAC,GAAG,CAAnC;AACD;AACF;;AAED,SAAO9B,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACmC,IAAP,CAAYf,gBAAOgB,MAAP,CAAgB3C,MAAM,CAACY,MAAP,GAAgB,CAAjB,IAAuB,CAAxB,GAA6B,CAA3C,CAAZ,CAAD,EAA6DN,GAA7D,CAAd,CAAP;AACD;;AAED,SAASS,iBAAT,CAA2ByB,KAA3B,EAA0ChB,KAA1C,EAAyDvB,IAAzD,EAA4F;AAE1F,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;AAEA,MAAM5B,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa8B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAAb,CAAZ;;AAEA,OAAK,IAAIvB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGL,GAAG,CAACM,MAAxB,EAAgCD,CAAC,EAAjC,EAAqC;AACnCL,IAAAA,GAAG,CAACsC,UAAJ,CAAeJ,KAAK,GAAG,IAAvB,EAA6B7B,CAA7B;AAEA6B,IAAAA,KAAK,IAAI,CAAT;AACD;;AAED,SAAOjC,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACmC,IAAP,CAAYf,gBAAOgB,MAAP,CAAcnB,KAAK,IAAI,CAAvB,CAAZ,CAAD,EAAyClB,GAAzC,CAAd,CAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport type {PrimitiveType} from '../schema/declare';\nimport type {CursorBuffer, ParquetCodecOptions} from './declare';\nimport varint from 'varint';\n\n// eslint-disable-next-line max-statements, complexity\nexport function encodeValues(\n type: PrimitiveType,\n values: any[],\n opts: ParquetCodecOptions\n): Buffer {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n switch (type) {\n case 'BOOLEAN':\n case 'INT32':\n case 'INT64':\n // tslint:disable-next-line:no-parameter-reassignment\n values = values.map((x) => parseInt(x, 10));\n break;\n\n default:\n throw new Error(`unsupported type: ${type}`);\n }\n\n let buf = Buffer.alloc(0);\n let run: any[] = [];\n let repeats = 0;\n\n for (let i = 0; i < values.length; i++) {\n // If we are at the beginning of a run and the next value is same we start\n // collecting repeated values\n if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {\n // If we have any data in runs we need to encode them\n if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n run = [];\n }\n repeats = 1;\n } else if (repeats > 0 && values[i] === values[i - 1]) {\n repeats += 1;\n } else {\n // If values changes we need to post any previous repeated values\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);\n repeats = 0;\n }\n run.push(values[i]);\n }\n }\n\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);\n } else if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n }\n\n if (opts.disableEnvelope) {\n return buf;\n }\n\n const envelope = Buffer.alloc(buf.length + 4);\n envelope.writeUInt32LE(buf.length, undefined);\n buf.copy(envelope, 4);\n\n return envelope;\n}\n\nexport function decodeValues(\n type: PrimitiveType,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n if (!opts.disableEnvelope) {\n cursor.offset += 4;\n }\n\n let values: number[] = [];\n while (values.length < count) {\n const header = varint.decode(cursor.buffer, cursor.offset);\n cursor.offset += varint.encodingLength(header);\n if (header & 1) {\n const count = (header >> 1) * 8;\n values.push(...decodeRunBitpacked(cursor, count, opts));\n } else {\n const count = header >> 1;\n values.push(...decodeRunRepeated(cursor, count, opts));\n }\n }\n values = values.slice(0, count);\n\n if (values.length !== count) {\n throw new Error('invalid RLE encoding');\n }\n\n return values;\n}\n\nfunction decodeRunBitpacked(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n if (count % 8 !== 0) {\n throw new Error('must be a multiple of 8');\n }\n\n // tslint:disable-next-line:prefer-array-literal\n const values = new Array(count).fill(0);\n for (let b = 0; b < bitWidth * count; b++) {\n if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) {\n values[Math.floor(b / bitWidth)] |= 1 << b % bitWidth;\n }\n }\n\n cursor.offset += bitWidth * (count / 8);\n return values;\n}\n\nfunction decodeRunRepeated(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n let value = 0;\n for (let i = 0; i < Math.ceil(bitWidth / 8); i++) {\n // eslint-disable-next-line\n value << 8; // TODO - this looks wrong\n value += cursor.buffer[cursor.offset];\n cursor.offset += 1;\n }\n\n // tslint:disable-next-line:prefer-array-literal\n return new Array(count).fill(value);\n}\n\nfunction encodeRunBitpacked(values: number[], opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n for (let i = 0; i < values.length % 8; i++) {\n values.push(0);\n }\n\n const buf = Buffer.alloc(Math.ceil(bitWidth * (values.length / 8)));\n for (let b = 0; b < bitWidth * values.length; b++) {\n if ((values[Math.floor(b / bitWidth)] & (1 << b % bitWidth)) > 0) {\n buf[Math.floor(b / 8)] |= 1 << b % 8;\n }\n }\n\n return Buffer.concat([Buffer.from(varint.encode(((values.length / 8) << 1) | 1)), buf]);\n}\n\nfunction encodeRunRepeated(value: number, count: number, opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n const buf = Buffer.alloc(Math.ceil(bitWidth / 8));\n\n for (let i = 0; i < buf.length; i++) {\n buf.writeUInt8(value & 0xff, i);\n // eslint-disable-next-line\n value >> 8; // TODO - this looks wrong\n }\n\n return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf]);\n}\n"],"file":"rle.js"}
|
|
@@ -2,99 +2,170 @@
|
|
|
2
2
|
|
|
3
3
|
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
|
|
4
4
|
|
|
5
|
-
var _typeof = require("@babel/runtime/helpers/typeof");
|
|
6
|
-
|
|
7
5
|
Object.defineProperty(exports, "__esModule", {
|
|
8
6
|
value: true
|
|
9
7
|
});
|
|
8
|
+
exports.preloadCompressions = preloadCompressions;
|
|
10
9
|
exports.deflate = deflate;
|
|
10
|
+
exports.decompress = decompress;
|
|
11
11
|
exports.inflate = inflate;
|
|
12
12
|
exports.PARQUET_COMPRESSION_METHODS = void 0;
|
|
13
13
|
|
|
14
|
-
var
|
|
14
|
+
var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
|
|
15
15
|
|
|
16
|
-
var
|
|
16
|
+
var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
|
|
17
17
|
|
|
18
|
-
var
|
|
18
|
+
var _compression = require("@loaders.gl/compression");
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
var _bufferUtils = require("./utils/buffer-utils");
|
|
21
21
|
|
|
22
|
-
|
|
22
|
+
var _decompress2 = _interopRequireDefault(require("brotli/decompress"));
|
|
23
23
|
|
|
24
|
-
var
|
|
25
|
-
|
|
26
|
-
var
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
SNAPPY: {
|
|
37
|
-
deflate: deflate_snappy,
|
|
38
|
-
inflate: inflate_snappy
|
|
39
|
-
},
|
|
40
|
-
LZO: {
|
|
41
|
-
deflate: deflate_lzo,
|
|
42
|
-
inflate: inflate_lzo
|
|
43
|
-
},
|
|
44
|
-
BROTLI: {
|
|
45
|
-
deflate: deflate_brotli,
|
|
46
|
-
inflate: inflate_brotli
|
|
24
|
+
var _lz4js = _interopRequireDefault(require("lz4js"));
|
|
25
|
+
|
|
26
|
+
var _lzo = _interopRequireDefault(require("lzo"));
|
|
27
|
+
|
|
28
|
+
var _zstdCodec = require("zstd-codec");
|
|
29
|
+
|
|
30
|
+
var modules = {
|
|
31
|
+
brotli: {
|
|
32
|
+
decompress: _decompress2.default,
|
|
33
|
+
compress: function compress() {
|
|
34
|
+
throw new Error('brotli compress');
|
|
35
|
+
}
|
|
47
36
|
},
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
37
|
+
lz4js: _lz4js.default,
|
|
38
|
+
lzo: _lzo.default,
|
|
39
|
+
'zstd-codec': _zstdCodec.ZstdCodec
|
|
40
|
+
};
|
|
41
|
+
var PARQUET_COMPRESSION_METHODS = {
|
|
42
|
+
UNCOMPRESSED: new _compression.NoCompression(),
|
|
43
|
+
GZIP: new _compression.GZipCompression(),
|
|
44
|
+
SNAPPY: new _compression.SnappyCompression(),
|
|
45
|
+
BROTLI: new _compression.BrotliCompression({
|
|
46
|
+
modules: modules
|
|
47
|
+
}),
|
|
48
|
+
LZ4: new _compression.LZ4Compression({
|
|
49
|
+
modules: modules
|
|
50
|
+
}),
|
|
51
|
+
LZ4_RAW: new _compression.LZ4Compression({
|
|
52
|
+
modules: modules
|
|
53
|
+
}),
|
|
54
|
+
LZO: new _compression.LZOCompression({
|
|
55
|
+
modules: modules
|
|
56
|
+
}),
|
|
57
|
+
ZSTD: new _compression.ZstdCompression({
|
|
58
|
+
modules: modules
|
|
59
|
+
})
|
|
52
60
|
};
|
|
53
61
|
exports.PARQUET_COMPRESSION_METHODS = PARQUET_COMPRESSION_METHODS;
|
|
54
62
|
|
|
55
|
-
function
|
|
56
|
-
|
|
57
|
-
throw new Error("invalid compression method: ".concat(method));
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
return PARQUET_COMPRESSION_METHODS[method].deflate(value);
|
|
63
|
+
function preloadCompressions(_x) {
|
|
64
|
+
return _preloadCompressions.apply(this, arguments);
|
|
61
65
|
}
|
|
62
66
|
|
|
63
|
-
function
|
|
64
|
-
|
|
67
|
+
function _preloadCompressions() {
|
|
68
|
+
_preloadCompressions = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee(options) {
|
|
69
|
+
var compressions;
|
|
70
|
+
return _regenerator.default.wrap(function _callee$(_context) {
|
|
71
|
+
while (1) {
|
|
72
|
+
switch (_context.prev = _context.next) {
|
|
73
|
+
case 0:
|
|
74
|
+
compressions = Object.values(PARQUET_COMPRESSION_METHODS);
|
|
75
|
+
_context.next = 3;
|
|
76
|
+
return Promise.all(compressions.map(function (compression) {
|
|
77
|
+
return compression.preload();
|
|
78
|
+
}));
|
|
79
|
+
|
|
80
|
+
case 3:
|
|
81
|
+
return _context.abrupt("return", _context.sent);
|
|
82
|
+
|
|
83
|
+
case 4:
|
|
84
|
+
case "end":
|
|
85
|
+
return _context.stop();
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}, _callee);
|
|
89
|
+
}));
|
|
90
|
+
return _preloadCompressions.apply(this, arguments);
|
|
65
91
|
}
|
|
66
92
|
|
|
67
|
-
function
|
|
68
|
-
return
|
|
93
|
+
function deflate(_x2, _x3) {
|
|
94
|
+
return _deflate.apply(this, arguments);
|
|
69
95
|
}
|
|
70
96
|
|
|
71
|
-
function
|
|
72
|
-
|
|
97
|
+
function _deflate() {
|
|
98
|
+
_deflate = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(method, value) {
|
|
99
|
+
var compression, inputArrayBuffer, compressedArrayBuffer;
|
|
100
|
+
return _regenerator.default.wrap(function _callee2$(_context2) {
|
|
101
|
+
while (1) {
|
|
102
|
+
switch (_context2.prev = _context2.next) {
|
|
103
|
+
case 0:
|
|
104
|
+
compression = PARQUET_COMPRESSION_METHODS[method];
|
|
105
|
+
|
|
106
|
+
if (compression) {
|
|
107
|
+
_context2.next = 3;
|
|
108
|
+
break;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
throw new Error("parquet: invalid compression method: ".concat(method));
|
|
112
|
+
|
|
113
|
+
case 3:
|
|
114
|
+
inputArrayBuffer = (0, _bufferUtils.toArrayBuffer)(value);
|
|
115
|
+
_context2.next = 6;
|
|
116
|
+
return compression.compress(inputArrayBuffer);
|
|
117
|
+
|
|
118
|
+
case 6:
|
|
119
|
+
compressedArrayBuffer = _context2.sent;
|
|
120
|
+
return _context2.abrupt("return", (0, _bufferUtils.toBuffer)(compressedArrayBuffer));
|
|
121
|
+
|
|
122
|
+
case 8:
|
|
123
|
+
case "end":
|
|
124
|
+
return _context2.stop();
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}, _callee2);
|
|
128
|
+
}));
|
|
129
|
+
return _deflate.apply(this, arguments);
|
|
73
130
|
}
|
|
74
131
|
|
|
75
|
-
function
|
|
76
|
-
|
|
77
|
-
return lzo.compress(value);
|
|
132
|
+
function decompress(_x4, _x5, _x6) {
|
|
133
|
+
return _decompress.apply(this, arguments);
|
|
78
134
|
}
|
|
79
135
|
|
|
80
|
-
function
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
136
|
+
function _decompress() {
|
|
137
|
+
_decompress = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(method, value, size) {
|
|
138
|
+
var compression, inputArrayBuffer, compressedArrayBuffer;
|
|
139
|
+
return _regenerator.default.wrap(function _callee3$(_context3) {
|
|
140
|
+
while (1) {
|
|
141
|
+
switch (_context3.prev = _context3.next) {
|
|
142
|
+
case 0:
|
|
143
|
+
compression = PARQUET_COMPRESSION_METHODS[method];
|
|
144
|
+
|
|
145
|
+
if (compression) {
|
|
146
|
+
_context3.next = 3;
|
|
147
|
+
break;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
throw new Error("parquet: invalid compression method: ".concat(method));
|
|
151
|
+
|
|
152
|
+
case 3:
|
|
153
|
+
inputArrayBuffer = (0, _bufferUtils.toArrayBuffer)(value);
|
|
154
|
+
_context3.next = 6;
|
|
155
|
+
return compression.decompress(inputArrayBuffer, size);
|
|
156
|
+
|
|
157
|
+
case 6:
|
|
158
|
+
compressedArrayBuffer = _context3.sent;
|
|
159
|
+
return _context3.abrupt("return", (0, _bufferUtils.toBuffer)(compressedArrayBuffer));
|
|
160
|
+
|
|
161
|
+
case 8:
|
|
162
|
+
case "end":
|
|
163
|
+
return _context3.stop();
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}, _callee3);
|
|
167
|
+
}));
|
|
168
|
+
return _decompress.apply(this, arguments);
|
|
98
169
|
}
|
|
99
170
|
|
|
100
171
|
function inflate(method, value, size) {
|
|
@@ -104,41 +175,4 @@ function inflate(method, value, size) {
|
|
|
104
175
|
|
|
105
176
|
return PARQUET_COMPRESSION_METHODS[method].inflate(value, size);
|
|
106
177
|
}
|
|
107
|
-
|
|
108
|
-
function inflate_identity(value) {
|
|
109
|
-
return value;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
function inflate_gzip(value) {
|
|
113
|
-
return _zlib.default.gunzipSync(value);
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
function inflate_snappy(value) {
|
|
117
|
-
return _snappyjs.default.uncompress(value);
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
function inflate_lzo(value, size) {
|
|
121
|
-
lzo = lzo || Util.load('lzo');
|
|
122
|
-
return lzo.decompress(value, size);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
function inflate_lz4(value, size) {
|
|
126
|
-
lz4js = lz4js || Util.load('lz4js');
|
|
127
|
-
|
|
128
|
-
try {
|
|
129
|
-
return Buffer.from(lz4js.decompress(value, size));
|
|
130
|
-
} catch (err) {
|
|
131
|
-
throw err;
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
function inflate_brotli(value) {
|
|
136
|
-
brotli = brotli || Util.load('brotli');
|
|
137
|
-
|
|
138
|
-
if (!value.length) {
|
|
139
|
-
return Buffer.alloc(0);
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
return Buffer.from(brotli.decompress(value));
|
|
143
|
-
}
|
|
144
178
|
//# sourceMappingURL=compression.js.map
|