@loaders.gl/parquet 3.0.12 → 3.1.0-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/dist/dist.min.js +7 -18
  2. package/dist/dist.min.js.map +1 -1
  3. package/dist/es5/bundle.js +2 -4
  4. package/dist/es5/bundle.js.map +1 -1
  5. package/dist/es5/constants.js +17 -0
  6. package/dist/es5/constants.js.map +1 -0
  7. package/dist/es5/index.js +53 -21
  8. package/dist/es5/index.js.map +1 -1
  9. package/dist/es5/lib/convert-schema.js +82 -0
  10. package/dist/es5/lib/convert-schema.js.map +1 -0
  11. package/dist/es5/lib/parse-parquet.js +173 -0
  12. package/dist/es5/lib/parse-parquet.js.map +1 -0
  13. package/dist/es5/lib/read-array-buffer.js +53 -0
  14. package/dist/es5/lib/read-array-buffer.js.map +1 -0
  15. package/dist/es5/parquet-loader.js +6 -79
  16. package/dist/es5/parquet-loader.js.map +1 -1
  17. package/dist/es5/parquet-writer.js +1 -1
  18. package/dist/es5/parquet-writer.js.map +1 -1
  19. package/dist/es5/parquetjs/codecs/dictionary.js +30 -0
  20. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -0
  21. package/dist/es5/parquetjs/codecs/index.js +10 -0
  22. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  23. package/dist/es5/parquetjs/codecs/rle.js +2 -2
  24. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  25. package/dist/es5/parquetjs/compression.js +138 -104
  26. package/dist/es5/parquetjs/compression.js.map +1 -1
  27. package/dist/es5/parquetjs/{writer.js → encoder/writer.js} +397 -228
  28. package/dist/es5/parquetjs/encoder/writer.js.map +1 -0
  29. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
  30. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  31. package/dist/es5/parquetjs/parser/decoders.js +495 -0
  32. package/dist/es5/parquetjs/parser/decoders.js.map +1 -0
  33. package/dist/es5/parquetjs/parser/parquet-cursor.js +215 -0
  34. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -0
  35. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +452 -0
  36. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  37. package/dist/es5/parquetjs/parser/parquet-reader.js +413 -0
  38. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -0
  39. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  40. package/dist/es5/parquetjs/schema/schema.js +2 -0
  41. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  42. package/dist/es5/parquetjs/schema/shred.js +2 -1
  43. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  44. package/dist/es5/parquetjs/schema/types.js +79 -4
  45. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  46. package/dist/es5/parquetjs/utils/buffer-utils.js +21 -0
  47. package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -0
  48. package/dist/es5/parquetjs/utils/file-utils.js +108 -0
  49. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -0
  50. package/dist/es5/parquetjs/{util.js → utils/read-utils.js} +13 -113
  51. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -0
  52. package/dist/esm/bundle.js +2 -4
  53. package/dist/esm/bundle.js.map +1 -1
  54. package/dist/esm/constants.js +6 -0
  55. package/dist/esm/constants.js.map +1 -0
  56. package/dist/esm/index.js +14 -4
  57. package/dist/esm/index.js.map +1 -1
  58. package/dist/esm/lib/convert-schema.js +71 -0
  59. package/dist/esm/lib/convert-schema.js.map +1 -0
  60. package/dist/esm/lib/parse-parquet.js +28 -0
  61. package/dist/esm/lib/parse-parquet.js.map +1 -0
  62. package/dist/esm/lib/read-array-buffer.js +9 -0
  63. package/dist/esm/lib/read-array-buffer.js.map +1 -0
  64. package/dist/esm/parquet-loader.js +4 -24
  65. package/dist/esm/parquet-loader.js.map +1 -1
  66. package/dist/esm/parquet-writer.js +1 -1
  67. package/dist/esm/parquet-writer.js.map +1 -1
  68. package/dist/esm/parquetjs/codecs/dictionary.js +12 -0
  69. package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -0
  70. package/dist/esm/parquetjs/codecs/index.js +9 -0
  71. package/dist/esm/parquetjs/codecs/index.js.map +1 -1
  72. package/dist/esm/parquetjs/codecs/rle.js +2 -2
  73. package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
  74. package/dist/esm/parquetjs/compression.js +54 -105
  75. package/dist/esm/parquetjs/compression.js.map +1 -1
  76. package/dist/esm/parquetjs/{writer.js → encoder/writer.js} +32 -35
  77. package/dist/esm/parquetjs/encoder/writer.js.map +1 -0
  78. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
  79. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  80. package/dist/esm/parquetjs/parser/decoders.js +300 -0
  81. package/dist/esm/parquetjs/parser/decoders.js.map +1 -0
  82. package/dist/esm/parquetjs/parser/parquet-cursor.js +90 -0
  83. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -0
  84. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +164 -0
  85. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  86. package/dist/esm/parquetjs/parser/parquet-reader.js +133 -0
  87. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -0
  88. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  89. package/dist/esm/parquetjs/schema/schema.js +2 -0
  90. package/dist/esm/parquetjs/schema/schema.js.map +1 -1
  91. package/dist/esm/parquetjs/schema/shred.js +2 -1
  92. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  93. package/dist/esm/parquetjs/schema/types.js +78 -4
  94. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  95. package/dist/esm/parquetjs/utils/buffer-utils.js +12 -0
  96. package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -0
  97. package/dist/esm/parquetjs/utils/file-utils.js +79 -0
  98. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -0
  99. package/dist/esm/parquetjs/{util.js → utils/read-utils.js} +11 -89
  100. package/dist/esm/parquetjs/utils/read-utils.js.map +1 -0
  101. package/dist/parquet-worker.js +7 -18
  102. package/dist/parquet-worker.js.map +1 -1
  103. package/package.json +10 -10
  104. package/src/bundle.ts +2 -3
  105. package/src/constants.ts +17 -0
  106. package/src/index.ts +30 -4
  107. package/src/lib/convert-schema.ts +95 -0
  108. package/src/lib/parse-parquet.ts +27 -0
  109. package/{dist/es5/libs → src/lib}/read-array-buffer.ts +0 -0
  110. package/src/parquet-loader.ts +4 -24
  111. package/src/parquetjs/codecs/dictionary.ts +11 -0
  112. package/src/parquetjs/codecs/index.ts +13 -0
  113. package/src/parquetjs/codecs/rle.ts +4 -2
  114. package/src/parquetjs/compression.ts +89 -50
  115. package/src/parquetjs/{writer.ts → encoder/writer.ts} +46 -45
  116. package/src/parquetjs/parquet-thrift/CompressionCodec.ts +2 -1
  117. package/src/parquetjs/parser/decoders.ts +448 -0
  118. package/src/parquetjs/parser/parquet-cursor.ts +94 -0
  119. package/src/parquetjs/parser/parquet-envelope-reader.ts +210 -0
  120. package/src/parquetjs/parser/parquet-reader.ts +179 -0
  121. package/src/parquetjs/schema/declare.ts +48 -2
  122. package/src/parquetjs/schema/schema.ts +2 -0
  123. package/src/parquetjs/schema/shred.ts +3 -1
  124. package/src/parquetjs/schema/types.ts +82 -5
  125. package/src/parquetjs/utils/buffer-utils.ts +18 -0
  126. package/src/parquetjs/utils/file-utils.ts +96 -0
  127. package/src/parquetjs/{util.ts → utils/read-utils.ts} +13 -110
  128. package/dist/dist.es5.min.js +0 -51
  129. package/dist/dist.es5.min.js.map +0 -1
  130. package/dist/es5/parquetjs/compression.ts.disabled +0 -105
  131. package/dist/es5/parquetjs/reader.js +0 -1078
  132. package/dist/es5/parquetjs/reader.js.map +0 -1
  133. package/dist/es5/parquetjs/util.js.map +0 -1
  134. package/dist/es5/parquetjs/writer.js.map +0 -1
  135. package/dist/esm/libs/read-array-buffer.ts +0 -31
  136. package/dist/esm/parquetjs/compression.ts.disabled +0 -105
  137. package/dist/esm/parquetjs/reader.js +0 -524
  138. package/dist/esm/parquetjs/reader.js.map +0 -1
  139. package/dist/esm/parquetjs/util.js.map +0 -1
  140. package/dist/esm/parquetjs/writer.js.map +0 -1
  141. package/src/libs/read-array-buffer.ts +0 -31
  142. package/src/parquetjs/compression.ts.disabled +0 -105
  143. package/src/parquetjs/reader.ts +0 -707
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../src/lib/parse-parquet.ts"],"names":["parseParquet","arrayBuffer","options","blob","Blob","parseParquetFileInBatches","batch","ParquetReader","openBlob","reader","rows","cursor","getCursor","next","record","push","close"],"mappings":";;;;;;;;;;;;;;;;;;;;AAGA;;SAEsBA,Y;;;;;4EAAf,kBAA4BC,WAA5B,EAAsDC,OAAtD;AAAA;;AAAA;AAAA;AAAA;AAAA;AACCC,YAAAA,IADD,GACQ,IAAIC,IAAJ,CAAS,CAACH,WAAD,CAAT,CADR;AAAA;AAAA;AAAA;AAAA,qDAEqBI,yBAAyB,CAACF,IAAD,EAAOD,OAAP,CAF9C;;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAEYI,YAAAA,KAFZ;AAAA,8CAGIA,KAHJ;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;;AAAA;;AAAA;AAAA;;AAAA;AAAA;;AAAA;AAAA,8CAKE,IALF;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G;;;;SAQgBD,yB;;;;;2FAAhB,iBAA0CF,IAA1C,EAAsDD,OAAtD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,sDACgBK,6BAAcC,QAAd,CAAuBL,IAAvB,CADhB;;AAAA;AACCM,YAAAA,MADD;AAECC,YAAAA,IAFD,GAEiB,EAFjB;AAAA;AAIGC,YAAAA,MAJH,GAIYF,MAAM,CAACG,SAAP,EAJZ;;AAAA;AAAA;AAAA,sDAMoBD,MAAM,CAACE,IAAP,EANpB;;AAAA;AAAA,kBAMKC,MANL;AAAA;AAAA;AAAA;;AAODJ,YAAAA,IAAI,CAACK,IAAL,CAAUD,MAAV;AAPC;AAAA;;AAAA;AAAA;AAAA;AAAA,sDAUGL,MAAM,CAACO,KAAP,EAVH;;AAAA;AAAA;;AAAA;AAAA;AAYL,mBAAMN,IAAN;;AAZK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G","sourcesContent":["// import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';\nimport type {ParquetLoaderOptions} from '../parquet-loader';\n\nimport {ParquetReader} from '../parquetjs/parser/parquet-reader';\n\nexport async function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {\n const blob = new Blob([arrayBuffer]);\n for await (const batch of parseParquetFileInBatches(blob, options)) {\n return batch;\n }\n return null;\n}\n\nexport async function* parseParquetFileInBatches(blob: Blob, options?: ParquetLoaderOptions) {\n const reader = await ParquetReader.openBlob(blob);\n const rows: any[][] = [];\n try {\n const cursor = reader.getCursor();\n let record: any[] | null;\n while ((record = await cursor.next())) {\n rows.push(record);\n }\n } finally {\n await reader.close();\n }\n yield rows;\n}\n"],"file":"parse-parquet.js"}
@@ -0,0 +1,53 @@
1
+ "use strict";
2
+
3
+ var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
+
5
+ Object.defineProperty(exports, "__esModule", {
6
+ value: true
7
+ });
8
+ exports.readArrayBuffer = readArrayBuffer;
9
+
10
+ var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
11
+
12
+ var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
13
+
14
+ function readArrayBuffer(_x, _x2, _x3) {
15
+ return _readArrayBuffer.apply(this, arguments);
16
+ }
17
+
18
+ function _readArrayBuffer() {
19
+ _readArrayBuffer = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee(file, start, length) {
20
+ var slice;
21
+ return _regenerator.default.wrap(function _callee$(_context) {
22
+ while (1) {
23
+ switch (_context.prev = _context.next) {
24
+ case 0:
25
+ if (!(file instanceof Blob)) {
26
+ _context.next = 5;
27
+ break;
28
+ }
29
+
30
+ slice = file.slice(start, start + length);
31
+ _context.next = 4;
32
+ return slice.arrayBuffer();
33
+
34
+ case 4:
35
+ return _context.abrupt("return", _context.sent);
36
+
37
+ case 5:
38
+ _context.next = 7;
39
+ return file.read(start, start + length);
40
+
41
+ case 7:
42
+ return _context.abrupt("return", _context.sent);
43
+
44
+ case 8:
45
+ case "end":
46
+ return _context.stop();
47
+ }
48
+ }
49
+ }, _callee);
50
+ }));
51
+ return _readArrayBuffer.apply(this, arguments);
52
+ }
53
+ //# sourceMappingURL=read-array-buffer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../src/lib/read-array-buffer.ts"],"names":["readArrayBuffer","file","start","length","Blob","slice","arrayBuffer","read"],"mappings":";;;;;;;;;;;;;SAEsBA,e;;;;;+EAAf,iBACLC,IADK,EAELC,KAFK,EAGLC,MAHK;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAKDF,IAAI,YAAYG,IALf;AAAA;AAAA;AAAA;;AAMGC,YAAAA,KANH,GAMWJ,IAAI,CAACI,KAAL,CAAWH,KAAX,EAAkBA,KAAK,GAAGC,MAA1B,CANX;AAAA;AAAA,mBAOUE,KAAK,CAACC,WAAN,EAPV;;AAAA;AAAA;;AAAA;AAAA;AAAA,mBASQL,IAAI,CAACM,IAAL,CAAUL,KAAV,EAAiBA,KAAK,GAAGC,MAAzB,CATR;;AAAA;AAAA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G","sourcesContent":["// Random-Access read\n\nexport async function readArrayBuffer(\n file: Blob | ArrayBuffer | any,\n start: number,\n length: number\n): Promise<ArrayBuffer> {\n if (file instanceof Blob) {\n const slice = file.slice(start, start + length);\n return await slice.arrayBuffer();\n }\n return await file.read(start, start + length);\n}\n\n/**\n * Read a slice of a Blob or File, without loading the entire file into memory\n * The trick when reading File objects is to read successive \"slices\" of the File\n * Per spec https://w3c.github.io/FileAPI/, slicing a File only updates the start and end fields\n * Actually reading from file happens in `readAsArrayBuffer`\n * @param blob to read\n export async function readBlob(blob: Blob): Promise<ArrayBuffer> {\n return await new Promise((resolve, reject) => {\n const fileReader = new FileReader();\n fileReader.onload = (event: ProgressEvent<FileReader>) =>\n resolve(event?.target?.result as ArrayBuffer);\n // TODO - reject with a proper Error\n fileReader.onerror = (error: ProgressEvent<FileReader>) => reject(error);\n fileReader.readAsArrayBuffer(blob);\n });\n}\n*/\n"],"file":"read-array-buffer.js"}
@@ -1,32 +1,17 @@
1
1
  "use strict";
2
2
 
3
- var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
-
5
3
  Object.defineProperty(exports, "__esModule", {
6
4
  value: true
7
5
  });
8
- exports.ParquetLoader = exports.ParquetWorkerLoader = void 0;
9
-
10
- var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
11
-
12
- var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
13
-
14
- var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
15
-
16
- var _reader = require("./parquetjs/reader");
17
-
18
- function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) { symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); } keys.push.apply(keys, symbols); } return keys; }
19
-
20
- function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { (0, _defineProperty2.default)(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; }
21
-
22
- var VERSION = typeof "3.0.12" !== 'undefined' ? "3.0.12" : 'latest';
6
+ exports._typecheckParquetLoader = exports.ParquetLoader = void 0;
7
+ var VERSION = typeof "3.1.0-alpha.4" !== 'undefined' ? "3.1.0-alpha.4" : 'latest';
23
8
  var DEFAULT_PARQUET_LOADER_OPTIONS = {
24
9
  parquet: {
25
10
  type: 'object-row-table',
26
11
  url: undefined
27
12
  }
28
13
  };
29
- var ParquetWorkerLoader = {
14
+ var ParquetLoader = {
30
15
  name: 'Apache Parquet',
31
16
  id: 'parquet',
32
17
  module: 'parquet',
@@ -36,68 +21,10 @@ var ParquetWorkerLoader = {
36
21
  extensions: ['parquet'],
37
22
  mimeTypes: ['application/octet-stream'],
38
23
  binary: true,
24
+ tests: ['PAR1', 'PARE'],
39
25
  options: DEFAULT_PARQUET_LOADER_OPTIONS
40
26
  };
41
- exports.ParquetWorkerLoader = ParquetWorkerLoader;
42
-
43
- var ParquetLoader = _objectSpread(_objectSpread({}, ParquetWorkerLoader), {}, {
44
- parse: parse
45
- });
46
-
47
27
  exports.ParquetLoader = ParquetLoader;
48
-
49
- function parse(_x, _x2) {
50
- return _parse.apply(this, arguments);
51
- }
52
-
53
- function _parse() {
54
- _parse = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee(arrayBuffer, options) {
55
- var reader, rows, cursor, record;
56
- return _regenerator.default.wrap(function _callee$(_context) {
57
- while (1) {
58
- switch (_context.prev = _context.next) {
59
- case 0:
60
- _context.next = 2;
61
- return _reader.ParquetReader.openArrayBuffer(arrayBuffer);
62
-
63
- case 2:
64
- reader = _context.sent;
65
- rows = [];
66
- _context.prev = 4;
67
- cursor = reader.getCursor();
68
-
69
- case 6:
70
- _context.next = 8;
71
- return cursor.next();
72
-
73
- case 8:
74
- if (!(record = _context.sent)) {
75
- _context.next = 12;
76
- break;
77
- }
78
-
79
- rows.push(record);
80
- _context.next = 6;
81
- break;
82
-
83
- case 12:
84
- _context.prev = 12;
85
- _context.next = 15;
86
- return reader.close();
87
-
88
- case 15:
89
- return _context.finish(12);
90
-
91
- case 16:
92
- return _context.abrupt("return", rows);
93
-
94
- case 17:
95
- case "end":
96
- return _context.stop();
97
- }
98
- }
99
- }, _callee, null, [[4,, 12, 16]]);
100
- }));
101
- return _parse.apply(this, arguments);
102
- }
28
+ var _typecheckParquetLoader = ParquetLoader;
29
+ exports._typecheckParquetLoader = _typecheckParquetLoader;
103
30
  //# sourceMappingURL=parquet-loader.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/parquet-loader.ts"],"names":["VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","parquet","type","url","undefined","ParquetWorkerLoader","name","id","module","version","worker","category","extensions","mimeTypes","binary","options","ParquetLoader","parse","arrayBuffer","ParquetReader","openArrayBuffer","reader","rows","cursor","getCursor","next","record","push","close"],"mappings":";;;;;;;;;;;;;;;AACA;;;;;;AAIA,IAAMA,OAAO,GAAG,oBAAuB,WAAvB,cAAmD,QAAnE;AASA,IAAMC,8BAAoD,GAAG;AAC3DC,EAAAA,OAAO,EAAE;AACPC,IAAAA,IAAI,EAAE,kBADC;AAEPC,IAAAA,GAAG,EAAEC;AAFE;AADkD,CAA7D;AAQO,IAAMC,mBAA2B,GAAG;AACzCC,EAAAA,IAAI,EAAE,gBADmC;AAEzCC,EAAAA,EAAE,EAAE,SAFqC;AAGzCC,EAAAA,MAAM,EAAE,SAHiC;AAIzCC,EAAAA,OAAO,EAAEV,OAJgC;AAKzCW,EAAAA,MAAM,EAAE,IALiC;AAMzCC,EAAAA,QAAQ,EAAE,OAN+B;AAOzCC,EAAAA,UAAU,EAAE,CAAC,SAAD,CAP6B;AAQzCC,EAAAA,SAAS,EAAE,CAAC,0BAAD,CAR8B;AASzCC,EAAAA,MAAM,EAAE,IATiC;AAWzCC,EAAAA,OAAO,EAAEf;AAXgC,CAApC;;;AAeA,IAAMgB,aAA+B,mCACvCX,mBADuC;AAE1CY,EAAAA,KAAK,EAALA;AAF0C,EAArC;;;;SAKQA,K;;;;;qEAAf,iBAAqBC,WAArB,EAA+CH,OAA/C;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,mBACuBI,sBAAcC,eAAd,CAA8BF,WAA9B,CADvB;;AAAA;AACQG,YAAAA,MADR;AAEQC,YAAAA,IAFR,GAEwB,EAFxB;AAAA;AAIUC,YAAAA,MAJV,GAImBF,MAAM,CAACG,SAAP,EAJnB;;AAAA;AAAA;AAAA,mBAM2BD,MAAM,CAACE,IAAP,EAN3B;;AAAA;AAAA,kBAMYC,MANZ;AAAA;AAAA;AAAA;;AAOMJ,YAAAA,IAAI,CAACK,IAAL,CAAUD,MAAV;AAPN;AAAA;;AAAA;AAAA;AAAA;AAAA,mBAUUL,MAAM,CAACO,KAAP,EAVV;;AAAA;AAAA;;AAAA;AAAA,6CAYSN,IAZT;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,G","sourcesContent":["import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';\nimport {ParquetReader} from './parquetjs/reader';\n\n// __VERSION__ is injected by babel-plugin-version-inline\n// @ts-ignore TS2304: Cannot find name '__VERSION__'.\nconst VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';\n\nexport type ParquetLoaderOptions = LoaderOptions & {\n parquet?: {\n type?: 'object-row-table';\n url?: string;\n };\n};\n\nconst DEFAULT_PARQUET_LOADER_OPTIONS: ParquetLoaderOptions = {\n parquet: {\n type: 'object-row-table',\n url: undefined\n }\n};\n\n/** ParquetJS table loader */\nexport const ParquetWorkerLoader: Loader = {\n name: 'Apache Parquet',\n id: 'parquet',\n module: 'parquet',\n version: VERSION,\n worker: true,\n category: 'table',\n extensions: ['parquet'],\n mimeTypes: ['application/octet-stream'],\n binary: true,\n // tests: ['PARQUET'],\n options: DEFAULT_PARQUET_LOADER_OPTIONS\n};\n\n/** ParquetJS table loader */\nexport const ParquetLoader: LoaderWithParser = {\n ...ParquetWorkerLoader,\n parse\n};\n\nasync function parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {\n const reader = await ParquetReader.openArrayBuffer(arrayBuffer);\n const rows: any[][] = [];\n try {\n const cursor = reader.getCursor();\n let record: any[] | null;\n while ((record = await cursor.next())) {\n rows.push(record);\n }\n } finally {\n await reader.close();\n }\n return rows;\n}\n"],"file":"parquet-loader.js"}
1
+ {"version":3,"sources":["../../src/parquet-loader.ts"],"names":["VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","parquet","type","url","undefined","ParquetLoader","name","id","module","version","worker","category","extensions","mimeTypes","binary","tests","options","_typecheckParquetLoader"],"mappings":";;;;;;AAIA,IAAMA,OAAO,GAAG,2BAAuB,WAAvB,qBAAmD,QAAnE;AASA,IAAMC,8BAAoD,GAAG;AAC3DC,EAAAA,OAAO,EAAE;AACPC,IAAAA,IAAI,EAAE,kBADC;AAEPC,IAAAA,GAAG,EAAEC;AAFE;AADkD,CAA7D;AAQO,IAAMC,aAAa,GAAG;AAC3BC,EAAAA,IAAI,EAAE,gBADqB;AAE3BC,EAAAA,EAAE,EAAE,SAFuB;AAG3BC,EAAAA,MAAM,EAAE,SAHmB;AAI3BC,EAAAA,OAAO,EAAEV,OAJkB;AAK3BW,EAAAA,MAAM,EAAE,IALmB;AAM3BC,EAAAA,QAAQ,EAAE,OANiB;AAO3BC,EAAAA,UAAU,EAAE,CAAC,SAAD,CAPe;AAQ3BC,EAAAA,SAAS,EAAE,CAAC,0BAAD,CARgB;AAS3BC,EAAAA,MAAM,EAAE,IATmB;AAU3BC,EAAAA,KAAK,EAAE,CAAC,MAAD,EAAS,MAAT,CAVoB;AAW3BC,EAAAA,OAAO,EAAEhB;AAXkB,CAAtB;;AAcA,IAAMiB,uBAA+B,GAAGZ,aAAxC","sourcesContent":["import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';\n\n// __VERSION__ is injected by babel-plugin-version-inline\n// @ts-ignore TS2304: Cannot find name '__VERSION__'.\nconst VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';\n\nexport type ParquetLoaderOptions = LoaderOptions & {\n parquet?: {\n type?: 'object-row-table';\n url?: string;\n };\n};\n\nconst DEFAULT_PARQUET_LOADER_OPTIONS: ParquetLoaderOptions = {\n parquet: {\n type: 'object-row-table',\n url: undefined\n }\n};\n\n/** ParquetJS table loader */\nexport const ParquetLoader = {\n name: 'Apache Parquet',\n id: 'parquet',\n module: 'parquet',\n version: VERSION,\n worker: true,\n category: 'table',\n extensions: ['parquet'],\n mimeTypes: ['application/octet-stream'],\n binary: true,\n tests: ['PAR1', 'PARE'],\n options: DEFAULT_PARQUET_LOADER_OPTIONS\n};\n\nexport const _typecheckParquetLoader: Loader = ParquetLoader;\n"],"file":"parquet-loader.js"}
@@ -4,7 +4,7 @@ Object.defineProperty(exports, "__esModule", {
4
4
  value: true
5
5
  });
6
6
  exports.ParquetWriter = void 0;
7
- var VERSION = typeof "3.0.12" !== 'undefined' ? "3.0.12" : 'latest';
7
+ var VERSION = typeof "3.1.0-alpha.4" !== 'undefined' ? "3.1.0-alpha.4" : 'latest';
8
8
  var DEFAULT_PARQUET_LOADER_OPTIONS = {};
9
9
  var ParquetWriter = {
10
10
  name: 'Apache Parquet',
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/parquet-writer.ts"],"names":["VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","ParquetWriter","name","id","module","version","extensions","mimeTypes","encodeSync","binary","options","data","ArrayBuffer"],"mappings":";;;;;;AAIA,IAAMA,OAAO,GAAG,oBAAuB,WAAvB,cAAmD,QAAnE;AAIA,IAAMC,8BAA8B,GAAG,EAAvC;AAEO,IAAMC,aAAqB,GAAG;AACnCC,EAAAA,IAAI,EAAE,gBAD6B;AAEnCC,EAAAA,EAAE,EAAE,SAF+B;AAGnCC,EAAAA,MAAM,EAAE,SAH2B;AAInCC,EAAAA,OAAO,EAAEN,OAJ0B;AAKnCO,EAAAA,UAAU,EAAE,CAAC,SAAD,CALuB;AAMnCC,EAAAA,SAAS,EAAE,CAAC,0BAAD,CANwB;AAOnCC,EAAAA,UAAU,EAAVA,UAPmC;AAQnCC,EAAAA,MAAM,EAAE,IAR2B;AASnCC,EAAAA,OAAO,EAAEV;AAT0B,CAA9B;;;AAYP,SAASQ,UAAT,CAAoBG,IAApB,EAA0BD,OAA1B,EAA0D;AACxD,SAAO,IAAIE,WAAJ,CAAgB,CAAhB,CAAP;AACD","sourcesContent":["import type {Writer} from '@loaders.gl/loader-utils';\n\n// __VERSION__ is injected by babel-plugin-version-inline\n// @ts-ignore TS2304: Cannot find name '__VERSION__'.\nconst VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';\n\nexport type ParquetWriterOptions = {};\n\nconst DEFAULT_PARQUET_LOADER_OPTIONS = {};\n\nexport const ParquetWriter: Writer = {\n name: 'Apache Parquet',\n id: 'parquet',\n module: 'parquet',\n version: VERSION,\n extensions: ['parquet'],\n mimeTypes: ['application/octet-stream'],\n encodeSync,\n binary: true,\n options: DEFAULT_PARQUET_LOADER_OPTIONS\n};\n\nfunction encodeSync(data, options?: ParquetWriterOptions) {\n return new ArrayBuffer(0);\n}\n"],"file":"parquet-writer.js"}
1
+ {"version":3,"sources":["../../src/parquet-writer.ts"],"names":["VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","ParquetWriter","name","id","module","version","extensions","mimeTypes","encodeSync","binary","options","data","ArrayBuffer"],"mappings":";;;;;;AAIA,IAAMA,OAAO,GAAG,2BAAuB,WAAvB,qBAAmD,QAAnE;AAIA,IAAMC,8BAA8B,GAAG,EAAvC;AAEO,IAAMC,aAAqB,GAAG;AACnCC,EAAAA,IAAI,EAAE,gBAD6B;AAEnCC,EAAAA,EAAE,EAAE,SAF+B;AAGnCC,EAAAA,MAAM,EAAE,SAH2B;AAInCC,EAAAA,OAAO,EAAEN,OAJ0B;AAKnCO,EAAAA,UAAU,EAAE,CAAC,SAAD,CALuB;AAMnCC,EAAAA,SAAS,EAAE,CAAC,0BAAD,CANwB;AAOnCC,EAAAA,UAAU,EAAVA,UAPmC;AAQnCC,EAAAA,MAAM,EAAE,IAR2B;AASnCC,EAAAA,OAAO,EAAEV;AAT0B,CAA9B;;;AAYP,SAASQ,UAAT,CAAoBG,IAApB,EAA0BD,OAA1B,EAA0D;AACxD,SAAO,IAAIE,WAAJ,CAAgB,CAAhB,CAAP;AACD","sourcesContent":["import type {Writer} from '@loaders.gl/loader-utils';\n\n// __VERSION__ is injected by babel-plugin-version-inline\n// @ts-ignore TS2304: Cannot find name '__VERSION__'.\nconst VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';\n\nexport type ParquetWriterOptions = {};\n\nconst DEFAULT_PARQUET_LOADER_OPTIONS = {};\n\nexport const ParquetWriter: Writer = {\n name: 'Apache Parquet',\n id: 'parquet',\n module: 'parquet',\n version: VERSION,\n extensions: ['parquet'],\n mimeTypes: ['application/octet-stream'],\n encodeSync,\n binary: true,\n options: DEFAULT_PARQUET_LOADER_OPTIONS\n};\n\nfunction encodeSync(data, options?: ParquetWriterOptions) {\n return new ArrayBuffer(0);\n}\n"],"file":"parquet-writer.js"}
@@ -0,0 +1,30 @@
1
+ "use strict";
2
+
3
+ var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
+
5
+ Object.defineProperty(exports, "__esModule", {
6
+ value: true
7
+ });
8
+ exports.decodeValues = decodeValues;
9
+ exports.encodeValues = encodeValues;
10
+
11
+ var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
12
+
13
+ var _rle = require("./rle");
14
+
15
+ function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) { symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); } keys.push.apply(keys, symbols); } return keys; }
16
+
17
+ function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(Object(source), true).forEach(function (key) { (0, _defineProperty2.default)(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(Object(source)).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; }
18
+
19
+ function decodeValues(type, cursor, count, opts) {
20
+ opts.bitWidth = cursor.buffer.slice(cursor.offset, cursor.offset + 1).readInt8(0);
21
+ cursor.offset += 1;
22
+ return (0, _rle.decodeValues)(type, cursor, count, _objectSpread(_objectSpread({}, opts), {}, {
23
+ disableEnvelope: true
24
+ }));
25
+ }
26
+
27
+ function encodeValues(type, cursor, count, opts) {
28
+ throw new Error('Encode dictionary functionality is not supported');
29
+ }
30
+ //# sourceMappingURL=dictionary.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../../src/parquetjs/codecs/dictionary.ts"],"names":["decodeValues","type","cursor","count","opts","bitWidth","buffer","slice","offset","readInt8","disableEnvelope","encodeValues","Error"],"mappings":";;;;;;;;;;;;AAAA;;;;;;AAEO,SAASA,YAAT,CAAsBC,IAAtB,EAA4BC,MAA5B,EAAoCC,KAApC,EAA2CC,IAA3C,EAAiD;AACtDA,EAAAA,IAAI,CAACC,QAAL,GAAgBH,MAAM,CAACI,MAAP,CAAcC,KAAd,CAAoBL,MAAM,CAACM,MAA3B,EAAmCN,MAAM,CAACM,MAAP,GAAgB,CAAnD,EAAsDC,QAAtD,CAA+D,CAA/D,CAAhB;AACAP,EAAAA,MAAM,CAACM,MAAP,IAAiB,CAAjB;AACA,SAAO,uBAAgBP,IAAhB,EAAsBC,MAAtB,EAA8BC,KAA9B,kCAAyCC,IAAzC;AAA+CM,IAAAA,eAAe,EAAE;AAAhE,KAAP;AACD;;AAEM,SAASC,YAAT,CAAsBV,IAAtB,EAA4BC,MAA5B,EAAoCC,KAApC,EAA2CC,IAA3C,EAAiD;AACtD,QAAM,IAAIQ,KAAJ,CAAU,kDAAV,CAAN;AACD","sourcesContent":["import {decodeValues as decodeRleValues} from './rle';\n\nexport function decodeValues(type, cursor, count, opts) {\n opts.bitWidth = cursor.buffer.slice(cursor.offset, cursor.offset + 1).readInt8(0);\n cursor.offset += 1;\n return decodeRleValues(type, cursor, count, {...opts, disableEnvelope: true});\n}\n\nexport function encodeValues(type, cursor, count, opts) {\n throw new Error('Encode dictionary functionality is not supported');\n}\n"],"file":"dictionary.js"}
@@ -14,6 +14,8 @@ var PLAIN = _interopRequireWildcard(require("./plain"));
14
14
 
15
15
  var RLE = _interopRequireWildcard(require("./rle"));
16
16
 
17
+ var DICTIONARY = _interopRequireWildcard(require("./dictionary"));
18
+
17
19
  var _declare = require("./declare");
18
20
 
19
21
  Object.keys(_declare).forEach(function (key) {
@@ -40,6 +42,14 @@ var PARQUET_CODECS = {
40
42
  RLE: {
41
43
  encodeValues: RLE.encodeValues,
42
44
  decodeValues: RLE.decodeValues
45
+ },
46
+ PLAIN_DICTIONARY: {
47
+ encodeValues: DICTIONARY.encodeValues,
48
+ decodeValues: DICTIONARY.decodeValues
49
+ },
50
+ RLE_DICTIONARY: {
51
+ encodeValues: DICTIONARY.encodeValues,
52
+ decodeValues: DICTIONARY.decodeValues
43
53
  }
44
54
  };
45
55
  exports.PARQUET_CODECS = PARQUET_CODECS;
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../src/parquetjs/codecs/index.ts"],"names":["PARQUET_CODECS","PLAIN","encodeValues","decodeValues","RLE"],"mappings":";;;;;;;;;;;;AAGA;;AACA;;AAEA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;;;;AAEO,IAAMA,cAAqD,GAAG;AACnEC,EAAAA,KAAK,EAAE;AACLC,IAAAA,YAAY,EAAED,KAAK,CAACC,YADf;AAELC,IAAAA,YAAY,EAAEF,KAAK,CAACE;AAFf,GAD4D;AAKnEC,EAAAA,GAAG,EAAE;AACHF,IAAAA,YAAY,EAAEE,GAAG,CAACF,YADf;AAEHC,IAAAA,YAAY,EAAEC,GAAG,CAACD;AAFf;AAL8D,CAA9D","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport type {ParquetCodec} from '../schema/declare';\nimport type {ParquetCodecKit} from './declare';\nimport * as PLAIN from './plain';\nimport * as RLE from './rle';\n\nexport * from './declare';\n\nexport const PARQUET_CODECS: Record<ParquetCodec, ParquetCodecKit> = {\n PLAIN: {\n encodeValues: PLAIN.encodeValues,\n decodeValues: PLAIN.decodeValues\n },\n RLE: {\n encodeValues: RLE.encodeValues,\n decodeValues: RLE.decodeValues\n }\n};\n"],"file":"index.js"}
1
+ {"version":3,"sources":["../../../../src/parquetjs/codecs/index.ts"],"names":["PARQUET_CODECS","PLAIN","encodeValues","decodeValues","RLE","PLAIN_DICTIONARY","DICTIONARY","RLE_DICTIONARY"],"mappings":";;;;;;;;;;;;AAGA;;AACA;;AACA;;AAEA;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;;;;AAEO,IAAMA,cAAqD,GAAG;AACnEC,EAAAA,KAAK,EAAE;AACLC,IAAAA,YAAY,EAAED,KAAK,CAACC,YADf;AAELC,IAAAA,YAAY,EAAEF,KAAK,CAACE;AAFf,GAD4D;AAKnEC,EAAAA,GAAG,EAAE;AACHF,IAAAA,YAAY,EAAEE,GAAG,CAACF,YADf;AAEHC,IAAAA,YAAY,EAAEC,GAAG,CAACD;AAFf,GAL8D;AAUnEE,EAAAA,gBAAgB,EAAE;AAEhBH,IAAAA,YAAY,EAAEI,UAAU,CAACJ,YAFT;AAGhBC,IAAAA,YAAY,EAAEG,UAAU,CAACH;AAHT,GAViD;AAgBnEI,EAAAA,cAAc,EAAE;AAEdL,IAAAA,YAAY,EAAEI,UAAU,CAACJ,YAFX;AAGdC,IAAAA,YAAY,EAAEG,UAAU,CAACH;AAHX;AAhBmD,CAA9D","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport type {ParquetCodec} from '../schema/declare';\nimport type {ParquetCodecKit} from './declare';\nimport * as PLAIN from './plain';\nimport * as RLE from './rle';\nimport * as DICTIONARY from './dictionary';\n\nexport * from './declare';\n\nexport const PARQUET_CODECS: Record<ParquetCodec, ParquetCodecKit> = {\n PLAIN: {\n encodeValues: PLAIN.encodeValues,\n decodeValues: PLAIN.decodeValues\n },\n RLE: {\n encodeValues: RLE.encodeValues,\n decodeValues: RLE.decodeValues\n },\n // Using the PLAIN_DICTIONARY enum value is deprecated in the Parquet 2.0 specification.\n PLAIN_DICTIONARY: {\n // @ts-ignore\n encodeValues: DICTIONARY.encodeValues,\n decodeValues: DICTIONARY.decodeValues\n },\n // Prefer using RLE_DICTIONARY in a data page and PLAIN in a dictionary page for Parquet 2.0+ files.\n RLE_DICTIONARY: {\n // @ts-ignore\n encodeValues: DICTIONARY.encodeValues,\n decodeValues: DICTIONARY.decodeValues\n }\n};\n"],"file":"index.js"}
@@ -134,7 +134,7 @@ function decodeRunRepeated(cursor, count, opts) {
134
134
  var value = 0;
135
135
 
136
136
  for (var i = 0; i < Math.ceil(bitWidth / 8); i++) {
137
- value <<= 8;
137
+ value << 8;
138
138
  value += cursor.buffer[cursor.offset];
139
139
  cursor.offset += 1;
140
140
  }
@@ -166,7 +166,7 @@ function encodeRunRepeated(value, count, opts) {
166
166
 
167
167
  for (var i = 0; i < buf.length; i++) {
168
168
  buf.writeUInt8(value & 0xff, i);
169
- value >>= 8;
169
+ value >> 8;
170
170
  }
171
171
 
172
172
  return Buffer.concat([Buffer.from(_varint.default.encode(count << 1)), buf]);
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../src/parquetjs/codecs/rle.ts"],"names":["encodeValues","type","values","opts","Error","map","x","parseInt","buf","Buffer","alloc","run","repeats","i","length","concat","encodeRunBitpacked","encodeRunRepeated","push","disableEnvelope","envelope","writeUInt32LE","undefined","copy","decodeValues","cursor","count","offset","header","varint","decode","buffer","encodingLength","decodeRunBitpacked","decodeRunRepeated","slice","bitWidth","Array","fill","b","Math","floor","value","ceil","from","encode","writeUInt8"],"mappings":";;;;;;;;;;;;AAIA;;AAGO,SAASA,YAAT,CACLC,IADK,EAELC,MAFK,EAGLC,IAHK,EAIG;AACR,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,UAAQH,IAAR;AACE,SAAK,SAAL;AACA,SAAK,OAAL;AACA,SAAK,OAAL;AAEEC,MAAAA,MAAM,GAAGA,MAAM,CAACG,GAAP,CAAW,UAACC,CAAD;AAAA,eAAOC,QAAQ,CAACD,CAAD,EAAI,EAAJ,CAAf;AAAA,OAAX,CAAT;AACA;;AAEF;AACE,YAAM,IAAIF,KAAJ,6BAA+BH,IAA/B,EAAN;AATJ;;AAYA,MAAIO,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa,CAAb,CAAV;AACA,MAAIC,GAAU,GAAG,EAAjB;AACA,MAAIC,OAAO,GAAG,CAAd;;AAEA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAA3B,EAAmCD,CAAC,EAApC,EAAwC;AAGtC,QAAID,OAAO,KAAK,CAAZ,IAAiBD,GAAG,CAACG,MAAJ,GAAa,CAAb,KAAmB,CAApC,IAAyCZ,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAjE,EAA0E;AAExE,UAAIF,GAAG,CAACG,MAAR,EAAgB;AACdN,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACAQ,QAAAA,GAAG,GAAG,EAAN;AACD;;AACDC,MAAAA,OAAO,GAAG,CAAV;AACD,KAPD,MAOO,IAAIA,OAAO,GAAG,CAAV,IAAeV,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAvC,EAAgD;AACrDD,MAAAA,OAAO,IAAI,CAAX;AACD,KAFM,MAEA;AAEL,UAAIA,OAAJ,EAAa;AACXJ,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACW,CAAC,GAAG,CAAL,CAAP,EAAgBD,OAAhB,EAAyBT,IAAzB,CAAvB,CAAd,CAAN;AACAS,QAAAA,OAAO,GAAG,CAAV;AACD;;AACDD,MAAAA,GAAG,CAACO,IAAJ,CAAShB,MAAM,CAACW,CAAD,CAAf;AACD;AACF;;AAED,MAAID,OAAJ,EAAa;AACXJ,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACA,MAAM,CAACY,MAAP,GAAgB,CAAjB,CAAP,EAA4BF,OAA5B,EAAqCT,IAArC,CAAvB,CAAd,CAAN;AACD,GAFD,MAEO,IAAIQ,GAAG,CAACG,MAAR,EAAgB;AACrBN,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACD;;AAED,MAAIA,IAAI,CAACgB,eAAT,EAA0B;AACxB,WAAOX,GAAP;AACD;;AAED,MAAMY,QAAQ,GAAGX,MAAM,CAACC,KAAP,CAAaF,GAAG,CAACM,MAAJ,GAAa,CAA1B,CAAjB;AACAM,EAAAA,QAAQ,CAACC,aAAT,CAAuBb,GAAG,CAACM,MAA3B,EAAmCQ,SAAnC;AACAd,EAAAA,GAAG,CAACe,IAAJ,CAASH,QAAT,EAAmB,CAAnB;AAEA,SAAOA,QAAP;AACD;;AAEM,SAASI,YAAT,CACLvB,IADK,EAELwB,MAFK,EAGLC,KAHK,EAILvB,IAJK,EAKK;AACV,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,MAAI,CAACD,IAAI,CAACgB,eAAV,EAA2B;AACzBM,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAED,MAAIzB,MAAgB,GAAG,EAAvB;;AACA,SAAOA,MAAM,CAACY,MAAP,GAAgBY,KAAvB,EAA8B;AAC5B,QAAME,MAAM,GAAGC,gBAAOC,MAAP,CAAcL,MAAM,CAACM,MAArB,EAA6BN,MAAM,CAACE,MAApC,CAAf;;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiBE,gBAAOG,cAAP,CAAsBJ,MAAtB,CAAjB;;AACA,QAAIA,MAAM,GAAG,CAAb,EAAgB;AAAA;;AACd,UAAMF,MAAK,GAAG,CAACE,MAAM,IAAI,CAAX,IAAgB,CAA9B;;AACA,iBAAA1B,MAAM,EAACgB,IAAP,iDAAee,kBAAkB,CAACR,MAAD,EAASC,MAAT,EAAgBvB,IAAhB,CAAjC;AACD,KAHD,MAGO;AAAA;;AACL,UAAMuB,OAAK,GAAGE,MAAM,IAAI,CAAxB;;AACA,kBAAA1B,MAAM,EAACgB,IAAP,kDAAegB,iBAAiB,CAACT,MAAD,EAASC,OAAT,EAAgBvB,IAAhB,CAAhC;AACD;AACF;;AACDD,EAAAA,MAAM,GAAGA,MAAM,CAACiC,KAAP,CAAa,CAAb,EAAgBT,KAAhB,CAAT;;AAEA,MAAIxB,MAAM,CAACY,MAAP,KAAkBY,KAAtB,EAA6B;AAC3B,UAAM,IAAItB,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,SAAOF,MAAP;AACD;;AAED,SAAS+B,kBAAT,CACER,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;;AAEA,MAAIV,KAAK,GAAG,CAAR,KAAc,CAAlB,EAAqB;AACnB,UAAM,IAAItB,KAAJ,CAAU,yBAAV,CAAN;AACD;;AAGD,MAAMF,MAAM,GAAG,IAAImC,KAAJ,CAAUX,KAAV,EAAiBY,IAAjB,CAAsB,CAAtB,CAAf;;AACA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGV,KAA/B,EAAsCa,CAAC,EAAvC,EAA2C;AACzC,QAAId,MAAM,CAACM,MAAP,CAAcN,MAAM,CAACE,MAAP,GAAgBa,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAA9B,IAAoD,KAAKA,CAAC,GAAG,CAAjE,EAAqE;AACnErC,MAAAA,MAAM,CAACsC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,IAAoC,KAAKG,CAAC,GAAGH,QAA7C;AACD;AACF;;AAEDX,EAAAA,MAAM,CAACE,MAAP,IAAiBS,QAAQ,IAAIV,KAAK,GAAG,CAAZ,CAAzB;AACA,SAAOxB,MAAP;AACD;;AAED,SAASgC,iBAAT,CACET,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;AAEA,MAAIM,KAAK,GAAG,CAAZ;;AACA,OAAK,IAAI7B,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAG2B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAApB,EAA6CvB,CAAC,EAA9C,EAAkD;AAChD6B,IAAAA,KAAK,KAAK,CAAV;AACAA,IAAAA,KAAK,IAAIjB,MAAM,CAACM,MAAP,CAAcN,MAAM,CAACE,MAArB,CAAT;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAGD,SAAO,IAAIU,KAAJ,CAAUX,KAAV,EAAiBY,IAAjB,CAAsBI,KAAtB,CAAP;AACD;;AAED,SAAS1B,kBAAT,CAA4Bd,MAA5B,EAA8CC,IAA9C,EAAiF;AAE/E,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;;AAEA,OAAK,IAAIvB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAAP,GAAgB,CAApC,EAAuCD,CAAC,EAAxC,EAA4C;AAC1CX,IAAAA,MAAM,CAACgB,IAAP,CAAY,CAAZ;AACD;;AAED,MAAMV,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa8B,IAAI,CAACG,IAAL,CAAUP,QAAQ,IAAIlC,MAAM,CAACY,MAAP,GAAgB,CAApB,CAAlB,CAAb,CAAZ;;AACA,OAAK,IAAIyB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGlC,MAAM,CAACY,MAAtC,EAA8CyB,CAAC,EAA/C,EAAmD;AACjD,QAAI,CAACrC,MAAM,CAACsC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,GAAoC,KAAKG,CAAC,GAAGH,QAA9C,IAA2D,CAA/D,EAAkE;AAChE5B,MAAAA,GAAG,CAACgC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAAD,CAAH,IAA0B,KAAKA,CAAC,GAAG,CAAnC;AACD;AACF;;AAED,SAAO9B,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACmC,IAAP,CAAYf,gBAAOgB,MAAP,CAAgB3C,MAAM,CAACY,MAAP,GAAgB,CAAjB,IAAuB,CAAxB,GAA6B,CAA3C,CAAZ,CAAD,EAA6DN,GAA7D,CAAd,CAAP;AACD;;AAED,SAASS,iBAAT,CAA2ByB,KAA3B,EAA0ChB,KAA1C,EAAyDvB,IAAzD,EAA4F;AAE1F,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;AAEA,MAAM5B,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa8B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAAb,CAAZ;;AAEA,OAAK,IAAIvB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGL,GAAG,CAACM,MAAxB,EAAgCD,CAAC,EAAjC,EAAqC;AACnCL,IAAAA,GAAG,CAACsC,UAAJ,CAAeJ,KAAK,GAAG,IAAvB,EAA6B7B,CAA7B;AACA6B,IAAAA,KAAK,KAAK,CAAV;AACD;;AAED,SAAOjC,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACmC,IAAP,CAAYf,gBAAOgB,MAAP,CAAcnB,KAAK,IAAI,CAAvB,CAAZ,CAAD,EAAyClB,GAAzC,CAAd,CAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport type {PrimitiveType} from '../schema/declare';\nimport type {CursorBuffer, ParquetCodecOptions} from './declare';\nimport varint from 'varint';\n\n// eslint-disable-next-line max-statements, complexity\nexport function encodeValues(\n type: PrimitiveType,\n values: any[],\n opts: ParquetCodecOptions\n): Buffer {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n switch (type) {\n case 'BOOLEAN':\n case 'INT32':\n case 'INT64':\n // tslint:disable-next-line:no-parameter-reassignment\n values = values.map((x) => parseInt(x, 10));\n break;\n\n default:\n throw new Error(`unsupported type: ${type}`);\n }\n\n let buf = Buffer.alloc(0);\n let run: any[] = [];\n let repeats = 0;\n\n for (let i = 0; i < values.length; i++) {\n // If we are at the beginning of a run and the next value is same we start\n // collecting repeated values\n if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {\n // If we have any data in runs we need to encode them\n if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n run = [];\n }\n repeats = 1;\n } else if (repeats > 0 && values[i] === values[i - 1]) {\n repeats += 1;\n } else {\n // If values changes we need to post any previous repeated values\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);\n repeats = 0;\n }\n run.push(values[i]);\n }\n }\n\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);\n } else if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n }\n\n if (opts.disableEnvelope) {\n return buf;\n }\n\n const envelope = Buffer.alloc(buf.length + 4);\n envelope.writeUInt32LE(buf.length, undefined);\n buf.copy(envelope, 4);\n\n return envelope;\n}\n\nexport function decodeValues(\n type: PrimitiveType,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n if (!opts.disableEnvelope) {\n cursor.offset += 4;\n }\n\n let values: number[] = [];\n while (values.length < count) {\n const header = varint.decode(cursor.buffer, cursor.offset);\n cursor.offset += varint.encodingLength(header);\n if (header & 1) {\n const count = (header >> 1) * 8;\n values.push(...decodeRunBitpacked(cursor, count, opts));\n } else {\n const count = header >> 1;\n values.push(...decodeRunRepeated(cursor, count, opts));\n }\n }\n values = values.slice(0, count);\n\n if (values.length !== count) {\n throw new Error('invalid RLE encoding');\n }\n\n return values;\n}\n\nfunction decodeRunBitpacked(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n if (count % 8 !== 0) {\n throw new Error('must be a multiple of 8');\n }\n\n // tslint:disable-next-line:prefer-array-literal\n const values = new Array(count).fill(0);\n for (let b = 0; b < bitWidth * count; b++) {\n if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) {\n values[Math.floor(b / bitWidth)] |= 1 << b % bitWidth;\n }\n }\n\n cursor.offset += bitWidth * (count / 8);\n return values;\n}\n\nfunction decodeRunRepeated(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n let value = 0;\n for (let i = 0; i < Math.ceil(bitWidth / 8); i++) {\n value <<= 8;\n value += cursor.buffer[cursor.offset];\n cursor.offset += 1;\n }\n\n // tslint:disable-next-line:prefer-array-literal\n return new Array(count).fill(value);\n}\n\nfunction encodeRunBitpacked(values: number[], opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n for (let i = 0; i < values.length % 8; i++) {\n values.push(0);\n }\n\n const buf = Buffer.alloc(Math.ceil(bitWidth * (values.length / 8)));\n for (let b = 0; b < bitWidth * values.length; b++) {\n if ((values[Math.floor(b / bitWidth)] & (1 << b % bitWidth)) > 0) {\n buf[Math.floor(b / 8)] |= 1 << b % 8;\n }\n }\n\n return Buffer.concat([Buffer.from(varint.encode(((values.length / 8) << 1) | 1)), buf]);\n}\n\nfunction encodeRunRepeated(value: number, count: number, opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n const buf = Buffer.alloc(Math.ceil(bitWidth / 8));\n\n for (let i = 0; i < buf.length; i++) {\n buf.writeUInt8(value & 0xff, i);\n value >>= 8;\n }\n\n return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf]);\n}\n"],"file":"rle.js"}
1
+ {"version":3,"sources":["../../../../src/parquetjs/codecs/rle.ts"],"names":["encodeValues","type","values","opts","Error","map","x","parseInt","buf","Buffer","alloc","run","repeats","i","length","concat","encodeRunBitpacked","encodeRunRepeated","push","disableEnvelope","envelope","writeUInt32LE","undefined","copy","decodeValues","cursor","count","offset","header","varint","decode","buffer","encodingLength","decodeRunBitpacked","decodeRunRepeated","slice","bitWidth","Array","fill","b","Math","floor","value","ceil","from","encode","writeUInt8"],"mappings":";;;;;;;;;;;;AAIA;;AAGO,SAASA,YAAT,CACLC,IADK,EAELC,MAFK,EAGLC,IAHK,EAIG;AACR,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,UAAQH,IAAR;AACE,SAAK,SAAL;AACA,SAAK,OAAL;AACA,SAAK,OAAL;AAEEC,MAAAA,MAAM,GAAGA,MAAM,CAACG,GAAP,CAAW,UAACC,CAAD;AAAA,eAAOC,QAAQ,CAACD,CAAD,EAAI,EAAJ,CAAf;AAAA,OAAX,CAAT;AACA;;AAEF;AACE,YAAM,IAAIF,KAAJ,6BAA+BH,IAA/B,EAAN;AATJ;;AAYA,MAAIO,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa,CAAb,CAAV;AACA,MAAIC,GAAU,GAAG,EAAjB;AACA,MAAIC,OAAO,GAAG,CAAd;;AAEA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAA3B,EAAmCD,CAAC,EAApC,EAAwC;AAGtC,QAAID,OAAO,KAAK,CAAZ,IAAiBD,GAAG,CAACG,MAAJ,GAAa,CAAb,KAAmB,CAApC,IAAyCZ,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAjE,EAA0E;AAExE,UAAIF,GAAG,CAACG,MAAR,EAAgB;AACdN,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACAQ,QAAAA,GAAG,GAAG,EAAN;AACD;;AACDC,MAAAA,OAAO,GAAG,CAAV;AACD,KAPD,MAOO,IAAIA,OAAO,GAAG,CAAV,IAAeV,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAvC,EAAgD;AACrDD,MAAAA,OAAO,IAAI,CAAX;AACD,KAFM,MAEA;AAEL,UAAIA,OAAJ,EAAa;AACXJ,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACW,CAAC,GAAG,CAAL,CAAP,EAAgBD,OAAhB,EAAyBT,IAAzB,CAAvB,CAAd,CAAN;AACAS,QAAAA,OAAO,GAAG,CAAV;AACD;;AACDD,MAAAA,GAAG,CAACO,IAAJ,CAAShB,MAAM,CAACW,CAAD,CAAf;AACD;AACF;;AAED,MAAID,OAAJ,EAAa;AACXJ,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACA,MAAM,CAACY,MAAP,GAAgB,CAAjB,CAAP,EAA4BF,OAA5B,EAAqCT,IAArC,CAAvB,CAAd,CAAN;AACD,GAFD,MAEO,IAAIQ,GAAG,CAACG,MAAR,EAAgB;AACrBN,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACD;;AAED,MAAIA,IAAI,CAACgB,eAAT,EAA0B;AACxB,WAAOX,GAAP;AACD;;AAED,MAAMY,QAAQ,GAAGX,MAAM,CAACC,KAAP,CAAaF,GAAG,CAACM,MAAJ,GAAa,CAA1B,CAAjB;AACAM,EAAAA,QAAQ,CAACC,aAAT,CAAuBb,GAAG,CAACM,MAA3B,EAAmCQ,SAAnC;AACAd,EAAAA,GAAG,CAACe,IAAJ,CAASH,QAAT,EAAmB,CAAnB;AAEA,SAAOA,QAAP;AACD;;AAEM,SAASI,YAAT,CACLvB,IADK,EAELwB,MAFK,EAGLC,KAHK,EAILvB,IAJK,EAKK;AACV,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,MAAI,CAACD,IAAI,CAACgB,eAAV,EAA2B;AACzBM,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAED,MAAIzB,MAAgB,GAAG,EAAvB;;AACA,SAAOA,MAAM,CAACY,MAAP,GAAgBY,KAAvB,EAA8B;AAC5B,QAAME,MAAM,GAAGC,gBAAOC,MAAP,CAAcL,MAAM,CAACM,MAArB,EAA6BN,MAAM,CAACE,MAApC,CAAf;;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiBE,gBAAOG,cAAP,CAAsBJ,MAAtB,CAAjB;;AACA,QAAIA,MAAM,GAAG,CAAb,EAAgB;AAAA;;AACd,UAAMF,MAAK,GAAG,CAACE,MAAM,IAAI,CAAX,IAAgB,CAA9B;;AACA,iBAAA1B,MAAM,EAACgB,IAAP,iDAAee,kBAAkB,CAACR,MAAD,EAASC,MAAT,EAAgBvB,IAAhB,CAAjC;AACD,KAHD,MAGO;AAAA;;AACL,UAAMuB,OAAK,GAAGE,MAAM,IAAI,CAAxB;;AACA,kBAAA1B,MAAM,EAACgB,IAAP,kDAAegB,iBAAiB,CAACT,MAAD,EAASC,OAAT,EAAgBvB,IAAhB,CAAhC;AACD;AACF;;AACDD,EAAAA,MAAM,GAAGA,MAAM,CAACiC,KAAP,CAAa,CAAb,EAAgBT,KAAhB,CAAT;;AAEA,MAAIxB,MAAM,CAACY,MAAP,KAAkBY,KAAtB,EAA6B;AAC3B,UAAM,IAAItB,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,SAAOF,MAAP;AACD;;AAED,SAAS+B,kBAAT,CACER,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;;AAEA,MAAIV,KAAK,GAAG,CAAR,KAAc,CAAlB,EAAqB;AACnB,UAAM,IAAItB,KAAJ,CAAU,yBAAV,CAAN;AACD;;AAGD,MAAMF,MAAM,GAAG,IAAImC,KAAJ,CAAUX,KAAV,EAAiBY,IAAjB,CAAsB,CAAtB,CAAf;;AACA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGV,KAA/B,EAAsCa,CAAC,EAAvC,EAA2C;AACzC,QAAId,MAAM,CAACM,MAAP,CAAcN,MAAM,CAACE,MAAP,GAAgBa,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAA9B,IAAoD,KAAKA,CAAC,GAAG,CAAjE,EAAqE;AACnErC,MAAAA,MAAM,CAACsC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,IAAoC,KAAKG,CAAC,GAAGH,QAA7C;AACD;AACF;;AAEDX,EAAAA,MAAM,CAACE,MAAP,IAAiBS,QAAQ,IAAIV,KAAK,GAAG,CAAZ,CAAzB;AACA,SAAOxB,MAAP;AACD;;AAED,SAASgC,iBAAT,CACET,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;AAEA,MAAIM,KAAK,GAAG,CAAZ;;AACA,OAAK,IAAI7B,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAG2B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAApB,EAA6CvB,CAAC,EAA9C,EAAkD;AAEhD6B,IAAAA,KAAK,IAAI,CAAT;AACAA,IAAAA,KAAK,IAAIjB,MAAM,CAACM,MAAP,CAAcN,MAAM,CAACE,MAArB,CAAT;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAGD,SAAO,IAAIU,KAAJ,CAAUX,KAAV,EAAiBY,IAAjB,CAAsBI,KAAtB,CAAP;AACD;;AAED,SAAS1B,kBAAT,CAA4Bd,MAA5B,EAA8CC,IAA9C,EAAiF;AAE/E,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;;AAEA,OAAK,IAAIvB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAAP,GAAgB,CAApC,EAAuCD,CAAC,EAAxC,EAA4C;AAC1CX,IAAAA,MAAM,CAACgB,IAAP,CAAY,CAAZ;AACD;;AAED,MAAMV,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa8B,IAAI,CAACG,IAAL,CAAUP,QAAQ,IAAIlC,MAAM,CAACY,MAAP,GAAgB,CAApB,CAAlB,CAAb,CAAZ;;AACA,OAAK,IAAIyB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGlC,MAAM,CAACY,MAAtC,EAA8CyB,CAAC,EAA/C,EAAmD;AACjD,QAAI,CAACrC,MAAM,CAACsC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,GAAoC,KAAKG,CAAC,GAAGH,QAA9C,IAA2D,CAA/D,EAAkE;AAChE5B,MAAAA,GAAG,CAACgC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAAD,CAAH,IAA0B,KAAKA,CAAC,GAAG,CAAnC;AACD;AACF;;AAED,SAAO9B,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACmC,IAAP,CAAYf,gBAAOgB,MAAP,CAAgB3C,MAAM,CAACY,MAAP,GAAgB,CAAjB,IAAuB,CAAxB,GAA6B,CAA3C,CAAZ,CAAD,EAA6DN,GAA7D,CAAd,CAAP;AACD;;AAED,SAASS,iBAAT,CAA2ByB,KAA3B,EAA0ChB,KAA1C,EAAyDvB,IAAzD,EAA4F;AAE1F,MAAMiC,QAAgB,GAAGjC,IAAI,CAACiC,QAA9B;AAEA,MAAM5B,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa8B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAAb,CAAZ;;AAEA,OAAK,IAAIvB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGL,GAAG,CAACM,MAAxB,EAAgCD,CAAC,EAAjC,EAAqC;AACnCL,IAAAA,GAAG,CAACsC,UAAJ,CAAeJ,KAAK,GAAG,IAAvB,EAA6B7B,CAA7B;AAEA6B,IAAAA,KAAK,IAAI,CAAT;AACD;;AAED,SAAOjC,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACmC,IAAP,CAAYf,gBAAOgB,MAAP,CAAcnB,KAAK,IAAI,CAAvB,CAAZ,CAAD,EAAyClB,GAAzC,CAAd,CAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport type {PrimitiveType} from '../schema/declare';\nimport type {CursorBuffer, ParquetCodecOptions} from './declare';\nimport varint from 'varint';\n\n// eslint-disable-next-line max-statements, complexity\nexport function encodeValues(\n type: PrimitiveType,\n values: any[],\n opts: ParquetCodecOptions\n): Buffer {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n switch (type) {\n case 'BOOLEAN':\n case 'INT32':\n case 'INT64':\n // tslint:disable-next-line:no-parameter-reassignment\n values = values.map((x) => parseInt(x, 10));\n break;\n\n default:\n throw new Error(`unsupported type: ${type}`);\n }\n\n let buf = Buffer.alloc(0);\n let run: any[] = [];\n let repeats = 0;\n\n for (let i = 0; i < values.length; i++) {\n // If we are at the beginning of a run and the next value is same we start\n // collecting repeated values\n if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {\n // If we have any data in runs we need to encode them\n if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n run = [];\n }\n repeats = 1;\n } else if (repeats > 0 && values[i] === values[i - 1]) {\n repeats += 1;\n } else {\n // If values changes we need to post any previous repeated values\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);\n repeats = 0;\n }\n run.push(values[i]);\n }\n }\n\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);\n } else if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n }\n\n if (opts.disableEnvelope) {\n return buf;\n }\n\n const envelope = Buffer.alloc(buf.length + 4);\n envelope.writeUInt32LE(buf.length, undefined);\n buf.copy(envelope, 4);\n\n return envelope;\n}\n\nexport function decodeValues(\n type: PrimitiveType,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n if (!opts.disableEnvelope) {\n cursor.offset += 4;\n }\n\n let values: number[] = [];\n while (values.length < count) {\n const header = varint.decode(cursor.buffer, cursor.offset);\n cursor.offset += varint.encodingLength(header);\n if (header & 1) {\n const count = (header >> 1) * 8;\n values.push(...decodeRunBitpacked(cursor, count, opts));\n } else {\n const count = header >> 1;\n values.push(...decodeRunRepeated(cursor, count, opts));\n }\n }\n values = values.slice(0, count);\n\n if (values.length !== count) {\n throw new Error('invalid RLE encoding');\n }\n\n return values;\n}\n\nfunction decodeRunBitpacked(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n if (count % 8 !== 0) {\n throw new Error('must be a multiple of 8');\n }\n\n // tslint:disable-next-line:prefer-array-literal\n const values = new Array(count).fill(0);\n for (let b = 0; b < bitWidth * count; b++) {\n if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) {\n values[Math.floor(b / bitWidth)] |= 1 << b % bitWidth;\n }\n }\n\n cursor.offset += bitWidth * (count / 8);\n return values;\n}\n\nfunction decodeRunRepeated(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n let value = 0;\n for (let i = 0; i < Math.ceil(bitWidth / 8); i++) {\n // eslint-disable-next-line\n value << 8; // TODO - this looks wrong\n value += cursor.buffer[cursor.offset];\n cursor.offset += 1;\n }\n\n // tslint:disable-next-line:prefer-array-literal\n return new Array(count).fill(value);\n}\n\nfunction encodeRunBitpacked(values: number[], opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n for (let i = 0; i < values.length % 8; i++) {\n values.push(0);\n }\n\n const buf = Buffer.alloc(Math.ceil(bitWidth * (values.length / 8)));\n for (let b = 0; b < bitWidth * values.length; b++) {\n if ((values[Math.floor(b / bitWidth)] & (1 << b % bitWidth)) > 0) {\n buf[Math.floor(b / 8)] |= 1 << b % 8;\n }\n }\n\n return Buffer.concat([Buffer.from(varint.encode(((values.length / 8) << 1) | 1)), buf]);\n}\n\nfunction encodeRunRepeated(value: number, count: number, opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n const buf = Buffer.alloc(Math.ceil(bitWidth / 8));\n\n for (let i = 0; i < buf.length; i++) {\n buf.writeUInt8(value & 0xff, i);\n // eslint-disable-next-line\n value >> 8; // TODO - this looks wrong\n }\n\n return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf]);\n}\n"],"file":"rle.js"}
@@ -2,99 +2,170 @@
2
2
 
3
3
  var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
4
4
 
5
- var _typeof = require("@babel/runtime/helpers/typeof");
6
-
7
5
  Object.defineProperty(exports, "__esModule", {
8
6
  value: true
9
7
  });
8
+ exports.preloadCompressions = preloadCompressions;
10
9
  exports.deflate = deflate;
10
+ exports.decompress = decompress;
11
11
  exports.inflate = inflate;
12
12
  exports.PARQUET_COMPRESSION_METHODS = void 0;
13
13
 
14
- var Util = _interopRequireWildcard(require("./util"));
14
+ var _regenerator = _interopRequireDefault(require("@babel/runtime/regenerator"));
15
15
 
16
- var _zlib = _interopRequireDefault(require("zlib"));
16
+ var _asyncToGenerator2 = _interopRequireDefault(require("@babel/runtime/helpers/asyncToGenerator"));
17
17
 
18
- var _snappyjs = _interopRequireDefault(require("snappyjs"));
18
+ var _compression = require("@loaders.gl/compression");
19
19
 
20
- function _getRequireWildcardCache(nodeInterop) { if (typeof WeakMap !== "function") return null; var cacheBabelInterop = new WeakMap(); var cacheNodeInterop = new WeakMap(); return (_getRequireWildcardCache = function _getRequireWildcardCache(nodeInterop) { return nodeInterop ? cacheNodeInterop : cacheBabelInterop; })(nodeInterop); }
20
+ var _bufferUtils = require("./utils/buffer-utils");
21
21
 
22
- function _interopRequireWildcard(obj, nodeInterop) { if (!nodeInterop && obj && obj.__esModule) { return obj; } if (obj === null || _typeof(obj) !== "object" && typeof obj !== "function") { return { default: obj }; } var cache = _getRequireWildcardCache(nodeInterop); if (cache && cache.has(obj)) { return cache.get(obj); } var newObj = {}; var hasPropertyDescriptor = Object.defineProperty && Object.getOwnPropertyDescriptor; for (var key in obj) { if (key !== "default" && Object.prototype.hasOwnProperty.call(obj, key)) { var desc = hasPropertyDescriptor ? Object.getOwnPropertyDescriptor(obj, key) : null; if (desc && (desc.get || desc.set)) { Object.defineProperty(newObj, key, desc); } else { newObj[key] = obj[key]; } } } newObj.default = obj; if (cache) { cache.set(obj, newObj); } return newObj; }
22
+ var _decompress2 = _interopRequireDefault(require("brotli/decompress"));
23
23
 
24
- var brotli;
25
- var lzo;
26
- var lz4js;
27
- var PARQUET_COMPRESSION_METHODS = {
28
- UNCOMPRESSED: {
29
- deflate: deflate_identity,
30
- inflate: inflate_identity
31
- },
32
- GZIP: {
33
- deflate: deflate_gzip,
34
- inflate: inflate_gzip
35
- },
36
- SNAPPY: {
37
- deflate: deflate_snappy,
38
- inflate: inflate_snappy
39
- },
40
- LZO: {
41
- deflate: deflate_lzo,
42
- inflate: inflate_lzo
43
- },
44
- BROTLI: {
45
- deflate: deflate_brotli,
46
- inflate: inflate_brotli
24
+ var _lz4js = _interopRequireDefault(require("lz4js"));
25
+
26
+ var _lzo = _interopRequireDefault(require("lzo"));
27
+
28
+ var _zstdCodec = require("zstd-codec");
29
+
30
+ var modules = {
31
+ brotli: {
32
+ decompress: _decompress2.default,
33
+ compress: function compress() {
34
+ throw new Error('brotli compress');
35
+ }
47
36
  },
48
- LZ4: {
49
- deflate: deflate_lz4,
50
- inflate: inflate_lz4
51
- }
37
+ lz4js: _lz4js.default,
38
+ lzo: _lzo.default,
39
+ 'zstd-codec': _zstdCodec.ZstdCodec
40
+ };
41
+ var PARQUET_COMPRESSION_METHODS = {
42
+ UNCOMPRESSED: new _compression.NoCompression(),
43
+ GZIP: new _compression.GZipCompression(),
44
+ SNAPPY: new _compression.SnappyCompression(),
45
+ BROTLI: new _compression.BrotliCompression({
46
+ modules: modules
47
+ }),
48
+ LZ4: new _compression.LZ4Compression({
49
+ modules: modules
50
+ }),
51
+ LZ4_RAW: new _compression.LZ4Compression({
52
+ modules: modules
53
+ }),
54
+ LZO: new _compression.LZOCompression({
55
+ modules: modules
56
+ }),
57
+ ZSTD: new _compression.ZstdCompression({
58
+ modules: modules
59
+ })
52
60
  };
53
61
  exports.PARQUET_COMPRESSION_METHODS = PARQUET_COMPRESSION_METHODS;
54
62
 
55
- function deflate(method, value) {
56
- if (!(method in PARQUET_COMPRESSION_METHODS)) {
57
- throw new Error("invalid compression method: ".concat(method));
58
- }
59
-
60
- return PARQUET_COMPRESSION_METHODS[method].deflate(value);
63
+ function preloadCompressions(_x) {
64
+ return _preloadCompressions.apply(this, arguments);
61
65
  }
62
66
 
63
- function deflate_identity(value) {
64
- return value;
67
+ function _preloadCompressions() {
68
+ _preloadCompressions = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee(options) {
69
+ var compressions;
70
+ return _regenerator.default.wrap(function _callee$(_context) {
71
+ while (1) {
72
+ switch (_context.prev = _context.next) {
73
+ case 0:
74
+ compressions = Object.values(PARQUET_COMPRESSION_METHODS);
75
+ _context.next = 3;
76
+ return Promise.all(compressions.map(function (compression) {
77
+ return compression.preload();
78
+ }));
79
+
80
+ case 3:
81
+ return _context.abrupt("return", _context.sent);
82
+
83
+ case 4:
84
+ case "end":
85
+ return _context.stop();
86
+ }
87
+ }
88
+ }, _callee);
89
+ }));
90
+ return _preloadCompressions.apply(this, arguments);
65
91
  }
66
92
 
67
- function deflate_gzip(value) {
68
- return _zlib.default.gzipSync(value);
93
+ function deflate(_x2, _x3) {
94
+ return _deflate.apply(this, arguments);
69
95
  }
70
96
 
71
- function deflate_snappy(value) {
72
- return _snappyjs.default.compress(value);
97
+ function _deflate() {
98
+ _deflate = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee2(method, value) {
99
+ var compression, inputArrayBuffer, compressedArrayBuffer;
100
+ return _regenerator.default.wrap(function _callee2$(_context2) {
101
+ while (1) {
102
+ switch (_context2.prev = _context2.next) {
103
+ case 0:
104
+ compression = PARQUET_COMPRESSION_METHODS[method];
105
+
106
+ if (compression) {
107
+ _context2.next = 3;
108
+ break;
109
+ }
110
+
111
+ throw new Error("parquet: invalid compression method: ".concat(method));
112
+
113
+ case 3:
114
+ inputArrayBuffer = (0, _bufferUtils.toArrayBuffer)(value);
115
+ _context2.next = 6;
116
+ return compression.compress(inputArrayBuffer);
117
+
118
+ case 6:
119
+ compressedArrayBuffer = _context2.sent;
120
+ return _context2.abrupt("return", (0, _bufferUtils.toBuffer)(compressedArrayBuffer));
121
+
122
+ case 8:
123
+ case "end":
124
+ return _context2.stop();
125
+ }
126
+ }
127
+ }, _callee2);
128
+ }));
129
+ return _deflate.apply(this, arguments);
73
130
  }
74
131
 
75
- function deflate_lzo(value) {
76
- lzo = lzo || Util.load('lzo');
77
- return lzo.compress(value);
132
+ function decompress(_x4, _x5, _x6) {
133
+ return _decompress.apply(this, arguments);
78
134
  }
79
135
 
80
- function deflate_brotli(value) {
81
- brotli = brotli || Util.load('brotli');
82
- var result = brotli.compress(value, {
83
- mode: 0,
84
- quality: 8,
85
- lgwin: 22
86
- });
87
- return result ? Buffer.from(result) : Buffer.alloc(0);
88
- }
89
-
90
- function deflate_lz4(value) {
91
- lz4js = lz4js || Util.load('lz4js');
92
-
93
- try {
94
- return Buffer.from(lz4js.compress(value));
95
- } catch (err) {
96
- throw err;
97
- }
136
+ function _decompress() {
137
+ _decompress = (0, _asyncToGenerator2.default)(_regenerator.default.mark(function _callee3(method, value, size) {
138
+ var compression, inputArrayBuffer, compressedArrayBuffer;
139
+ return _regenerator.default.wrap(function _callee3$(_context3) {
140
+ while (1) {
141
+ switch (_context3.prev = _context3.next) {
142
+ case 0:
143
+ compression = PARQUET_COMPRESSION_METHODS[method];
144
+
145
+ if (compression) {
146
+ _context3.next = 3;
147
+ break;
148
+ }
149
+
150
+ throw new Error("parquet: invalid compression method: ".concat(method));
151
+
152
+ case 3:
153
+ inputArrayBuffer = (0, _bufferUtils.toArrayBuffer)(value);
154
+ _context3.next = 6;
155
+ return compression.decompress(inputArrayBuffer, size);
156
+
157
+ case 6:
158
+ compressedArrayBuffer = _context3.sent;
159
+ return _context3.abrupt("return", (0, _bufferUtils.toBuffer)(compressedArrayBuffer));
160
+
161
+ case 8:
162
+ case "end":
163
+ return _context3.stop();
164
+ }
165
+ }
166
+ }, _callee3);
167
+ }));
168
+ return _decompress.apply(this, arguments);
98
169
  }
99
170
 
100
171
  function inflate(method, value, size) {
@@ -104,41 +175,4 @@ function inflate(method, value, size) {
104
175
 
105
176
  return PARQUET_COMPRESSION_METHODS[method].inflate(value, size);
106
177
  }
107
-
108
- function inflate_identity(value) {
109
- return value;
110
- }
111
-
112
- function inflate_gzip(value) {
113
- return _zlib.default.gunzipSync(value);
114
- }
115
-
116
- function inflate_snappy(value) {
117
- return _snappyjs.default.uncompress(value);
118
- }
119
-
120
- function inflate_lzo(value, size) {
121
- lzo = lzo || Util.load('lzo');
122
- return lzo.decompress(value, size);
123
- }
124
-
125
- function inflate_lz4(value, size) {
126
- lz4js = lz4js || Util.load('lz4js');
127
-
128
- try {
129
- return Buffer.from(lz4js.decompress(value, size));
130
- } catch (err) {
131
- throw err;
132
- }
133
- }
134
-
135
- function inflate_brotli(value) {
136
- brotli = brotli || Util.load('brotli');
137
-
138
- if (!value.length) {
139
- return Buffer.alloc(0);
140
- }
141
-
142
- return Buffer.from(brotli.decompress(value));
143
- }
144
178
  //# sourceMappingURL=compression.js.map