@loaders.gl/parquet 3.0.12 → 3.1.0-alpha.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/dist/dist.min.js +7 -18
  2. package/dist/dist.min.js.map +1 -1
  3. package/dist/es5/bundle.js +2 -4
  4. package/dist/es5/bundle.js.map +1 -1
  5. package/dist/es5/constants.js +17 -0
  6. package/dist/es5/constants.js.map +1 -0
  7. package/dist/es5/index.js +53 -21
  8. package/dist/es5/index.js.map +1 -1
  9. package/dist/es5/lib/convert-schema.js +82 -0
  10. package/dist/es5/lib/convert-schema.js.map +1 -0
  11. package/dist/es5/lib/parse-parquet.js +173 -0
  12. package/dist/es5/lib/parse-parquet.js.map +1 -0
  13. package/dist/es5/lib/read-array-buffer.js +53 -0
  14. package/dist/es5/lib/read-array-buffer.js.map +1 -0
  15. package/dist/es5/parquet-loader.js +6 -79
  16. package/dist/es5/parquet-loader.js.map +1 -1
  17. package/dist/es5/parquet-writer.js +1 -1
  18. package/dist/es5/parquet-writer.js.map +1 -1
  19. package/dist/es5/parquetjs/codecs/dictionary.js +30 -0
  20. package/dist/es5/parquetjs/codecs/dictionary.js.map +1 -0
  21. package/dist/es5/parquetjs/codecs/index.js +10 -0
  22. package/dist/es5/parquetjs/codecs/index.js.map +1 -1
  23. package/dist/es5/parquetjs/codecs/rle.js +2 -2
  24. package/dist/es5/parquetjs/codecs/rle.js.map +1 -1
  25. package/dist/es5/parquetjs/compression.js +138 -104
  26. package/dist/es5/parquetjs/compression.js.map +1 -1
  27. package/dist/es5/parquetjs/{writer.js → encoder/writer.js} +397 -228
  28. package/dist/es5/parquetjs/encoder/writer.js.map +1 -0
  29. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
  30. package/dist/es5/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  31. package/dist/es5/parquetjs/parser/decoders.js +495 -0
  32. package/dist/es5/parquetjs/parser/decoders.js.map +1 -0
  33. package/dist/es5/parquetjs/parser/parquet-cursor.js +215 -0
  34. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -0
  35. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +452 -0
  36. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  37. package/dist/es5/parquetjs/parser/parquet-reader.js +413 -0
  38. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -0
  39. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  40. package/dist/es5/parquetjs/schema/schema.js +2 -0
  41. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  42. package/dist/es5/parquetjs/schema/shred.js +2 -1
  43. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  44. package/dist/es5/parquetjs/schema/types.js +79 -4
  45. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  46. package/dist/es5/parquetjs/utils/buffer-utils.js +21 -0
  47. package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -0
  48. package/dist/es5/parquetjs/utils/file-utils.js +108 -0
  49. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -0
  50. package/dist/es5/parquetjs/{util.js → utils/read-utils.js} +13 -113
  51. package/dist/es5/parquetjs/utils/read-utils.js.map +1 -0
  52. package/dist/esm/bundle.js +2 -4
  53. package/dist/esm/bundle.js.map +1 -1
  54. package/dist/esm/constants.js +6 -0
  55. package/dist/esm/constants.js.map +1 -0
  56. package/dist/esm/index.js +14 -4
  57. package/dist/esm/index.js.map +1 -1
  58. package/dist/esm/lib/convert-schema.js +71 -0
  59. package/dist/esm/lib/convert-schema.js.map +1 -0
  60. package/dist/esm/lib/parse-parquet.js +28 -0
  61. package/dist/esm/lib/parse-parquet.js.map +1 -0
  62. package/dist/esm/lib/read-array-buffer.js +9 -0
  63. package/dist/esm/lib/read-array-buffer.js.map +1 -0
  64. package/dist/esm/parquet-loader.js +4 -24
  65. package/dist/esm/parquet-loader.js.map +1 -1
  66. package/dist/esm/parquet-writer.js +1 -1
  67. package/dist/esm/parquet-writer.js.map +1 -1
  68. package/dist/esm/parquetjs/codecs/dictionary.js +12 -0
  69. package/dist/esm/parquetjs/codecs/dictionary.js.map +1 -0
  70. package/dist/esm/parquetjs/codecs/index.js +9 -0
  71. package/dist/esm/parquetjs/codecs/index.js.map +1 -1
  72. package/dist/esm/parquetjs/codecs/rle.js +2 -2
  73. package/dist/esm/parquetjs/codecs/rle.js.map +1 -1
  74. package/dist/esm/parquetjs/compression.js +54 -105
  75. package/dist/esm/parquetjs/compression.js.map +1 -1
  76. package/dist/esm/parquetjs/{writer.js → encoder/writer.js} +32 -35
  77. package/dist/esm/parquetjs/encoder/writer.js.map +1 -0
  78. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js +1 -0
  79. package/dist/esm/parquetjs/parquet-thrift/CompressionCodec.js.map +1 -1
  80. package/dist/esm/parquetjs/parser/decoders.js +300 -0
  81. package/dist/esm/parquetjs/parser/decoders.js.map +1 -0
  82. package/dist/esm/parquetjs/parser/parquet-cursor.js +90 -0
  83. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -0
  84. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +164 -0
  85. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  86. package/dist/esm/parquetjs/parser/parquet-reader.js +133 -0
  87. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -0
  88. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  89. package/dist/esm/parquetjs/schema/schema.js +2 -0
  90. package/dist/esm/parquetjs/schema/schema.js.map +1 -1
  91. package/dist/esm/parquetjs/schema/shred.js +2 -1
  92. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  93. package/dist/esm/parquetjs/schema/types.js +78 -4
  94. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  95. package/dist/esm/parquetjs/utils/buffer-utils.js +12 -0
  96. package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -0
  97. package/dist/esm/parquetjs/utils/file-utils.js +79 -0
  98. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -0
  99. package/dist/esm/parquetjs/{util.js → utils/read-utils.js} +11 -89
  100. package/dist/esm/parquetjs/utils/read-utils.js.map +1 -0
  101. package/dist/parquet-worker.js +7 -18
  102. package/dist/parquet-worker.js.map +1 -1
  103. package/package.json +10 -10
  104. package/src/bundle.ts +2 -3
  105. package/src/constants.ts +17 -0
  106. package/src/index.ts +30 -4
  107. package/src/lib/convert-schema.ts +95 -0
  108. package/src/lib/parse-parquet.ts +27 -0
  109. package/{dist/es5/libs → src/lib}/read-array-buffer.ts +0 -0
  110. package/src/parquet-loader.ts +4 -24
  111. package/src/parquetjs/codecs/dictionary.ts +11 -0
  112. package/src/parquetjs/codecs/index.ts +13 -0
  113. package/src/parquetjs/codecs/rle.ts +4 -2
  114. package/src/parquetjs/compression.ts +89 -50
  115. package/src/parquetjs/{writer.ts → encoder/writer.ts} +46 -45
  116. package/src/parquetjs/parquet-thrift/CompressionCodec.ts +2 -1
  117. package/src/parquetjs/parser/decoders.ts +448 -0
  118. package/src/parquetjs/parser/parquet-cursor.ts +94 -0
  119. package/src/parquetjs/parser/parquet-envelope-reader.ts +210 -0
  120. package/src/parquetjs/parser/parquet-reader.ts +179 -0
  121. package/src/parquetjs/schema/declare.ts +48 -2
  122. package/src/parquetjs/schema/schema.ts +2 -0
  123. package/src/parquetjs/schema/shred.ts +3 -1
  124. package/src/parquetjs/schema/types.ts +82 -5
  125. package/src/parquetjs/utils/buffer-utils.ts +18 -0
  126. package/src/parquetjs/utils/file-utils.ts +96 -0
  127. package/src/parquetjs/{util.ts → utils/read-utils.ts} +13 -110
  128. package/dist/dist.es5.min.js +0 -51
  129. package/dist/dist.es5.min.js.map +0 -1
  130. package/dist/es5/parquetjs/compression.ts.disabled +0 -105
  131. package/dist/es5/parquetjs/reader.js +0 -1078
  132. package/dist/es5/parquetjs/reader.js.map +0 -1
  133. package/dist/es5/parquetjs/util.js.map +0 -1
  134. package/dist/es5/parquetjs/writer.js.map +0 -1
  135. package/dist/esm/libs/read-array-buffer.ts +0 -31
  136. package/dist/esm/parquetjs/compression.ts.disabled +0 -105
  137. package/dist/esm/parquetjs/reader.js +0 -524
  138. package/dist/esm/parquetjs/reader.js.map +0 -1
  139. package/dist/esm/parquetjs/util.js.map +0 -1
  140. package/dist/esm/parquetjs/writer.js.map +0 -1
  141. package/src/libs/read-array-buffer.ts +0 -31
  142. package/src/parquetjs/compression.ts.disabled +0 -105
  143. package/src/parquetjs/reader.ts +0 -707
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/bundle.ts"],"names":["moduleExports","require","_global","window","global","loaders","module","exports","Object","assign"],"mappings":"AACA,MAAMA,aAAa,GAAGC,OAAO,CAAC,SAAD,CAA7B;;AACA,MAAMC,OAAO,GAAG,OAAOC,MAAP,KAAkB,WAAlB,GAAgCC,MAAhC,GAAyCD,MAAzD;;AACAD,OAAO,CAACG,OAAR,GAAkBH,OAAO,CAACG,OAAR,IAAmB,EAArC;AACAC,MAAM,CAACC,OAAP,GAAiBC,MAAM,CAACC,MAAP,CAAcP,OAAO,CAACG,OAAtB,EAA+BL,aAA/B,CAAjB","sourcesContent":["// @ts-nocheck\nconst moduleExports = require('./index');\nconst _global = typeof window === 'undefined' ? global : window;\n_global.loaders = _global.loaders || {};\nmodule.exports = Object.assign(_global.loaders, moduleExports);\n"],"file":"bundle.js"}
1
+ {"version":3,"sources":["../../src/bundle.ts"],"names":["moduleExports","require","globalThis","loaders","module","exports","Object","assign"],"mappings":"AACA,MAAMA,aAAa,GAAGC,OAAO,CAAC,SAAD,CAA7B;;AACAC,UAAU,CAACC,OAAX,GAAqBD,UAAU,CAACC,OAAX,IAAsB,EAA3C;AACAC,MAAM,CAACC,OAAP,GAAiBC,MAAM,CAACC,MAAP,CAAcL,UAAU,CAACC,OAAzB,EAAkCH,aAAlC,CAAjB","sourcesContent":["// @ts-nocheck\nconst moduleExports = require('./index');\nglobalThis.loaders = globalThis.loaders || {};\nmodule.exports = Object.assign(globalThis.loaders, moduleExports);\n"],"file":"bundle.js"}
@@ -0,0 +1,6 @@
1
+ export const PARQUET_MAGIC = 'PAR1';
2
+ export const PARQUET_MAGIC_ENCRYPTED = 'PARE';
3
+ export const PARQUET_VERSION = 1;
4
+ export const PARQUET_RDLVL_TYPE = 'INT32';
5
+ export const PARQUET_RDLVL_ENCODING = 'RLE';
6
+ //# sourceMappingURL=constants.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/constants.ts"],"names":["PARQUET_MAGIC","PARQUET_MAGIC_ENCRYPTED","PARQUET_VERSION","PARQUET_RDLVL_TYPE","PARQUET_RDLVL_ENCODING"],"mappings":"AAIA,OAAO,MAAMA,aAAa,GAAG,MAAtB;AACP,OAAO,MAAMC,uBAAuB,GAAG,MAAhC;AAKP,OAAO,MAAMC,eAAe,GAAG,CAAxB;AAKP,OAAO,MAAMC,kBAAkB,GAAG,OAA3B;AACP,OAAO,MAAMC,sBAAsB,GAAG,KAA/B","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n/**\n * Parquet File Magic String\n */\nexport const PARQUET_MAGIC = 'PAR1';\nexport const PARQUET_MAGIC_ENCRYPTED = 'PARE';\n\n/**\n * Parquet File Format Version\n */\nexport const PARQUET_VERSION = 1;\n\n/**\n * Internal type used for repetition/definition levels\n */\nexport const PARQUET_RDLVL_TYPE = 'INT32';\nexport const PARQUET_RDLVL_ENCODING = 'RLE';\n"],"file":"constants.js"}
package/dist/esm/index.js CHANGED
@@ -1,6 +1,16 @@
1
- export { ParquetReader, ParquetEnvelopeReader } from './parquetjs/reader';
2
- export { ParquetWriter, ParquetEnvelopeWriter, ParquetTransformer } from './parquetjs/writer';
3
- export { ParquetSchema } from './parquetjs/schema/schema';
4
- export { ParquetLoader, ParquetWorkerLoader } from './parquet-loader';
1
+ import { ParquetLoader as ParquetWorkerLoader } from './parquet-loader';
2
+ import { parseParquet, parseParquetFileInBatches } from './lib/parse-parquet';
3
+ export { ParquetWorkerLoader };
4
+ export const ParquetLoader = { ...ParquetWorkerLoader,
5
+ parse: parseParquet,
6
+ parseFileInBatches: parseParquetFileInBatches
7
+ };
5
8
  export { ParquetWriter as _ParquetWriter } from './parquet-writer';
9
+ export { preloadCompressions } from './parquetjs/compression';
10
+ export { ParquetEnvelopeReader } from './parquetjs/parser/parquet-envelope-reader';
11
+ export { ParquetReader } from './parquetjs/parser/parquet-reader';
12
+ export { ParquetWriter, ParquetEnvelopeWriter, ParquetTransformer } from './parquetjs/encoder/writer';
13
+ export { ParquetSchema } from './parquetjs/schema/schema';
14
+ export { convertParquetToArrowSchema } from './lib/convert-schema';
15
+ export const _typecheckParquetLoader = ParquetLoader;
6
16
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/index.ts"],"names":["ParquetReader","ParquetEnvelopeReader","ParquetWriter","ParquetEnvelopeWriter","ParquetTransformer","ParquetSchema","ParquetLoader","ParquetWorkerLoader","_ParquetWriter"],"mappings":"AAAA,SAAQA,aAAR,EAAuBC,qBAAvB,QAAmD,oBAAnD;AACA,SAAQC,aAAR,EAAuBC,qBAAvB,EAA8CC,kBAA9C,QAAuE,oBAAvE;AACA,SAAQC,aAAR,QAA4B,2BAA5B;AAEA,SAAQC,aAAR,EAAuBC,mBAAvB,QAAiD,kBAAjD;AACA,SAAQL,aAAa,IAAIM,cAAzB,QAA8C,kBAA9C","sourcesContent":["export {ParquetReader, ParquetEnvelopeReader} from './parquetjs/reader';\nexport {ParquetWriter, ParquetEnvelopeWriter, ParquetTransformer} from './parquetjs/writer';\nexport {ParquetSchema} from './parquetjs/schema/schema';\n\nexport {ParquetLoader, ParquetWorkerLoader} from './parquet-loader';\nexport {ParquetWriter as _ParquetWriter} from './parquet-writer';\n"],"file":"index.js"}
1
+ {"version":3,"sources":["../../src/index.ts"],"names":["ParquetLoader","ParquetWorkerLoader","parseParquet","parseParquetFileInBatches","parse","parseFileInBatches","ParquetWriter","_ParquetWriter","preloadCompressions","ParquetEnvelopeReader","ParquetReader","ParquetEnvelopeWriter","ParquetTransformer","ParquetSchema","convertParquetToArrowSchema","_typecheckParquetLoader"],"mappings":"AAIA,SAAQA,aAAa,IAAIC,mBAAzB,QAAmD,kBAAnD;AACA,SAAQC,YAAR,EAAsBC,yBAAtB,QAAsD,qBAAtD;AAEA,SAAQF,mBAAR;AAGA,OAAO,MAAMD,aAAa,GAAG,EAC3B,GAAGC,mBADwB;AAE3BG,EAAAA,KAAK,EAAEF,YAFoB;AAG3BG,EAAAA,kBAAkB,EAAEF;AAHO,CAAtB;AAQP,SAAQG,aAAa,IAAIC,cAAzB,QAA8C,kBAA9C;AAIA,SAAQC,mBAAR,QAAkC,yBAAlC;AAEA,SAAQC,qBAAR,QAAoC,4CAApC;AACA,SAAQC,aAAR,QAA4B,mCAA5B;AACA,SAAQJ,aAAR,EAAuBK,qBAAvB,EAA8CC,kBAA9C,QAAuE,4BAAvE;AACA,SAAQC,aAAR,QAA4B,2BAA5B;AACA,SAAQC,2BAAR,QAA0C,sBAA1C;AAGA,OAAO,MAAMC,uBAAyC,GAAGf,aAAlD","sourcesContent":["import type {LoaderWithParser} from '@loaders.gl/loader-utils';\n\n// ParquetLoader\n\nimport {ParquetLoader as ParquetWorkerLoader} from './parquet-loader';\nimport {parseParquet, parseParquetFileInBatches} from './lib/parse-parquet';\n\nexport {ParquetWorkerLoader};\n\n/** ParquetJS table loader */\nexport const ParquetLoader = {\n ...ParquetWorkerLoader,\n parse: parseParquet,\n parseFileInBatches: parseParquetFileInBatches\n};\n\n// ParquetWriter\n\nexport {ParquetWriter as _ParquetWriter} from './parquet-writer';\n\n// EXPERIMENTAL - expose the internal parquetjs API\n\nexport {preloadCompressions} from './parquetjs/compression';\n\nexport {ParquetEnvelopeReader} from './parquetjs/parser/parquet-envelope-reader';\nexport {ParquetReader} from './parquetjs/parser/parquet-reader';\nexport {ParquetWriter, ParquetEnvelopeWriter, ParquetTransformer} from './parquetjs/encoder/writer';\nexport {ParquetSchema} from './parquetjs/schema/schema';\nexport {convertParquetToArrowSchema} from './lib/convert-schema';\n\n// TESTS\nexport const _typecheckParquetLoader: LoaderWithParser = ParquetLoader;\n"],"file":"index.js"}
@@ -0,0 +1,71 @@
1
+ import { Schema, Struct, Field, Bool, Float64, Int32, Float32, Binary, Utf8, Int64, Uint16, Uint32, Uint64, Int8, Int16 } from '@loaders.gl/schema';
2
+ export const PARQUET_TYPE_MAPPING = {
3
+ BOOLEAN: Bool,
4
+ INT32: Int32,
5
+ INT64: Float64,
6
+ INT96: Float64,
7
+ FLOAT: Float32,
8
+ DOUBLE: Float64,
9
+ BYTE_ARRAY: Binary,
10
+ FIXED_LEN_BYTE_ARRAY: Binary,
11
+ UTF8: Utf8,
12
+ DATE: Int32,
13
+ TIME_MILLIS: Int64,
14
+ TIME_MICROS: Int64,
15
+ TIMESTAMP_MILLIS: Int64,
16
+ TIMESTAMP_MICROS: Int64,
17
+ UINT_8: Int32,
18
+ UINT_16: Uint16,
19
+ UINT_32: Uint32,
20
+ UINT_64: Uint64,
21
+ INT_8: Int8,
22
+ INT_16: Int16,
23
+ INT_32: Int32,
24
+ INT_64: Int64,
25
+ JSON: Binary,
26
+ BSON: Binary,
27
+ INTERVAL: Binary,
28
+ DECIMAL_INT32: Float32,
29
+ DECIMAL_INT64: Float64,
30
+ DECIMAL_BYTE_ARRAY: Float64,
31
+ DECIMAL_FIXED_LEN_BYTE_ARRAY: Float64
32
+ };
33
+ export function convertParquetToArrowSchema(parquetSchema) {
34
+ const fields = getFields(parquetSchema.schema);
35
+ return new Schema(fields);
36
+ }
37
+
38
+ function getFieldMetadata(field) {
39
+ const metadata = new Map();
40
+
41
+ for (const key in field) {
42
+ if (key !== 'name') {
43
+ const value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
44
+ metadata.set(key, value);
45
+ }
46
+ }
47
+
48
+ return metadata;
49
+ }
50
+
51
+ function getFields(schema) {
52
+ const fields = [];
53
+
54
+ for (const name in schema) {
55
+ const field = schema[name];
56
+
57
+ if (field.fields) {
58
+ const childField = getFields(field.fields);
59
+ const nestedField = new Field(name, new Struct(childField), field.optional);
60
+ fields.push(nestedField);
61
+ } else {
62
+ const FieldType = PARQUET_TYPE_MAPPING[field.type];
63
+ const metadata = getFieldMetadata(field);
64
+ const arrowField = new Field(name, new FieldType(), field.optional, metadata);
65
+ fields.push(arrowField);
66
+ }
67
+ }
68
+
69
+ return fields;
70
+ }
71
+ //# sourceMappingURL=convert-schema.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../src/lib/convert-schema.ts"],"names":["Schema","Struct","Field","Bool","Float64","Int32","Float32","Binary","Utf8","Int64","Uint16","Uint32","Uint64","Int8","Int16","PARQUET_TYPE_MAPPING","BOOLEAN","INT32","INT64","INT96","FLOAT","DOUBLE","BYTE_ARRAY","FIXED_LEN_BYTE_ARRAY","UTF8","DATE","TIME_MILLIS","TIME_MICROS","TIMESTAMP_MILLIS","TIMESTAMP_MICROS","UINT_8","UINT_16","UINT_32","UINT_64","INT_8","INT_16","INT_32","INT_64","JSON","BSON","INTERVAL","DECIMAL_INT32","DECIMAL_INT64","DECIMAL_BYTE_ARRAY","DECIMAL_FIXED_LEN_BYTE_ARRAY","convertParquetToArrowSchema","parquetSchema","fields","getFields","schema","getFieldMetadata","field","metadata","Map","key","value","stringify","set","name","childField","nestedField","optional","push","FieldType","type","arrowField"],"mappings":"AAGA,SACEA,MADF,EAEEC,MAFF,EAGEC,KAHF,EAKEC,IALF,EAMEC,OANF,EAOEC,KAPF,EAQEC,OARF,EASEC,MATF,EAUEC,IAVF,EAWEC,KAXF,EAYEC,MAZF,EAaEC,MAbF,EAcEC,MAdF,EAeEC,IAfF,EAgBEC,KAhBF,QAiBO,oBAjBP;AAmBA,OAAO,MAAMC,oBAA8D,GAAG;AAC5EC,EAAAA,OAAO,EAAEb,IADmE;AAE5Ec,EAAAA,KAAK,EAAEZ,KAFqE;AAG5Ea,EAAAA,KAAK,EAAEd,OAHqE;AAI5Ee,EAAAA,KAAK,EAAEf,OAJqE;AAK5EgB,EAAAA,KAAK,EAAEd,OALqE;AAM5Ee,EAAAA,MAAM,EAAEjB,OANoE;AAO5EkB,EAAAA,UAAU,EAAEf,MAPgE;AAQ5EgB,EAAAA,oBAAoB,EAAEhB,MARsD;AAS5EiB,EAAAA,IAAI,EAAEhB,IATsE;AAU5EiB,EAAAA,IAAI,EAAEpB,KAVsE;AAW5EqB,EAAAA,WAAW,EAAEjB,KAX+D;AAY5EkB,EAAAA,WAAW,EAAElB,KAZ+D;AAa5EmB,EAAAA,gBAAgB,EAAEnB,KAb0D;AAc5EoB,EAAAA,gBAAgB,EAAEpB,KAd0D;AAe5EqB,EAAAA,MAAM,EAAEzB,KAfoE;AAgB5E0B,EAAAA,OAAO,EAAErB,MAhBmE;AAiB5EsB,EAAAA,OAAO,EAAErB,MAjBmE;AAkB5EsB,EAAAA,OAAO,EAAErB,MAlBmE;AAmB5EsB,EAAAA,KAAK,EAAErB,IAnBqE;AAoB5EsB,EAAAA,MAAM,EAAErB,KApBoE;AAqB5EsB,EAAAA,MAAM,EAAE/B,KArBoE;AAsB5EgC,EAAAA,MAAM,EAAE5B,KAtBoE;AAuB5E6B,EAAAA,IAAI,EAAE/B,MAvBsE;AAwB5EgC,EAAAA,IAAI,EAAEhC,MAxBsE;AA0B5EiC,EAAAA,QAAQ,EAAEjC,MA1BkE;AA2B5EkC,EAAAA,aAAa,EAAEnC,OA3B6D;AA4B5EoC,EAAAA,aAAa,EAAEtC,OA5B6D;AA6B5EuC,EAAAA,kBAAkB,EAAEvC,OA7BwD;AA8B5EwC,EAAAA,4BAA4B,EAAExC;AA9B8C,CAAvE;AAiCP,OAAO,SAASyC,2BAAT,CAAqCC,aAArC,EAA2E;AAChF,QAAMC,MAAM,GAAGC,SAAS,CAACF,aAAa,CAACG,MAAf,CAAxB;AAGA,SAAO,IAAIjD,MAAJ,CAAW+C,MAAX,CAAP;AACD;;AAED,SAASG,gBAAT,CAA0BC,KAA1B,EAAoE;AAClE,QAAMC,QAAQ,GAAG,IAAIC,GAAJ,EAAjB;;AAEA,OAAK,MAAMC,GAAX,IAAkBH,KAAlB,EAAyB;AACvB,QAAIG,GAAG,KAAK,MAAZ,EAAoB;AAClB,YAAMC,KAAK,GAAG,OAAOJ,KAAK,CAACG,GAAD,CAAZ,KAAsB,QAAtB,GAAiChB,IAAI,CAACkB,SAAL,CAAeL,KAAK,CAACG,GAAD,CAApB,CAAjC,GAA8DH,KAAK,CAACG,GAAD,CAAjF;AACAF,MAAAA,QAAQ,CAACK,GAAT,CAAaH,GAAb,EAAkBC,KAAlB;AACD;AACF;;AAED,SAAOH,QAAP;AACD;;AAED,SAASJ,SAAT,CAAmBC,MAAnB,EAAqD;AACnD,QAAMF,MAAe,GAAG,EAAxB;;AAEA,OAAK,MAAMW,IAAX,IAAmBT,MAAnB,EAA2B;AACzB,UAAME,KAAK,GAAGF,MAAM,CAACS,IAAD,CAApB;;AAEA,QAAIP,KAAK,CAACJ,MAAV,EAAkB;AAChB,YAAMY,UAAU,GAAGX,SAAS,CAACG,KAAK,CAACJ,MAAP,CAA5B;AACA,YAAMa,WAAW,GAAG,IAAI1D,KAAJ,CAAUwD,IAAV,EAAgB,IAAIzD,MAAJ,CAAW0D,UAAX,CAAhB,EAAwCR,KAAK,CAACU,QAA9C,CAApB;AACAd,MAAAA,MAAM,CAACe,IAAP,CAAYF,WAAZ;AACD,KAJD,MAIO;AACL,YAAMG,SAAS,GAAGhD,oBAAoB,CAACoC,KAAK,CAACa,IAAP,CAAtC;AACA,YAAMZ,QAAQ,GAAGF,gBAAgB,CAACC,KAAD,CAAjC;AACA,YAAMc,UAAU,GAAG,IAAI/D,KAAJ,CAAUwD,IAAV,EAAgB,IAAIK,SAAJ,EAAhB,EAAiCZ,KAAK,CAACU,QAAvC,EAAiDT,QAAjD,CAAnB;AACAL,MAAAA,MAAM,CAACe,IAAP,CAAYG,UAAZ;AACD;AACF;;AAED,SAAOlB,MAAP;AACD","sourcesContent":["import type {ParquetSchema} from '../parquetjs/schema/schema';\nimport type {FieldDefinition, ParquetField, ParquetType} from '../parquetjs/schema/declare';\n\nimport {\n Schema,\n Struct,\n Field,\n DataType,\n Bool,\n Float64,\n Int32,\n Float32,\n Binary,\n Utf8,\n Int64,\n Uint16,\n Uint32,\n Uint64,\n Int8,\n Int16\n} from '@loaders.gl/schema';\n\nexport const PARQUET_TYPE_MAPPING: {[type in ParquetType]: typeof DataType} = {\n BOOLEAN: Bool,\n INT32: Int32,\n INT64: Float64,\n INT96: Float64,\n FLOAT: Float32,\n DOUBLE: Float64,\n BYTE_ARRAY: Binary,\n FIXED_LEN_BYTE_ARRAY: Binary,\n UTF8: Utf8,\n DATE: Int32,\n TIME_MILLIS: Int64,\n TIME_MICROS: Int64,\n TIMESTAMP_MILLIS: Int64,\n TIMESTAMP_MICROS: Int64,\n UINT_8: Int32,\n UINT_16: Uint16,\n UINT_32: Uint32,\n UINT_64: Uint64,\n INT_8: Int8,\n INT_16: Int16,\n INT_32: Int32,\n INT_64: Int64,\n JSON: Binary,\n BSON: Binary,\n // TODO check interal type\n INTERVAL: Binary,\n DECIMAL_INT32: Float32,\n DECIMAL_INT64: Float64,\n DECIMAL_BYTE_ARRAY: Float64,\n DECIMAL_FIXED_LEN_BYTE_ARRAY: Float64\n};\n\nexport function convertParquetToArrowSchema(parquetSchema: ParquetSchema): Schema {\n const fields = getFields(parquetSchema.schema);\n\n // TODO add metadata if needed.\n return new Schema(fields);\n}\n\nfunction getFieldMetadata(field: ParquetField): Map<string, string> {\n const metadata = new Map();\n\n for (const key in field) {\n if (key !== 'name') {\n const value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];\n metadata.set(key, value);\n }\n }\n\n return metadata;\n}\n\nfunction getFields(schema: FieldDefinition): Field[] {\n const fields: Field[] = [];\n\n for (const name in schema) {\n const field = schema[name];\n\n if (field.fields) {\n const childField = getFields(field.fields);\n const nestedField = new Field(name, new Struct(childField), field.optional);\n fields.push(nestedField);\n } else {\n const FieldType = PARQUET_TYPE_MAPPING[field.type];\n const metadata = getFieldMetadata(field);\n const arrowField = new Field(name, new FieldType(), field.optional, metadata);\n fields.push(arrowField);\n }\n }\n\n return fields;\n}\n"],"file":"convert-schema.js"}
@@ -0,0 +1,28 @@
1
+ import { ParquetReader } from '../parquetjs/parser/parquet-reader';
2
+ export async function parseParquet(arrayBuffer, options) {
3
+ const blob = new Blob([arrayBuffer]);
4
+
5
+ for await (const batch of parseParquetFileInBatches(blob, options)) {
6
+ return batch;
7
+ }
8
+
9
+ return null;
10
+ }
11
+ export async function* parseParquetFileInBatches(blob, options) {
12
+ const reader = await ParquetReader.openBlob(blob);
13
+ const rows = [];
14
+
15
+ try {
16
+ const cursor = reader.getCursor();
17
+ let record;
18
+
19
+ while (record = await cursor.next()) {
20
+ rows.push(record);
21
+ }
22
+ } finally {
23
+ await reader.close();
24
+ }
25
+
26
+ yield rows;
27
+ }
28
+ //# sourceMappingURL=parse-parquet.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../src/lib/parse-parquet.ts"],"names":["ParquetReader","parseParquet","arrayBuffer","options","blob","Blob","batch","parseParquetFileInBatches","reader","openBlob","rows","cursor","getCursor","record","next","push","close"],"mappings":"AAGA,SAAQA,aAAR,QAA4B,oCAA5B;AAEA,OAAO,eAAeC,YAAf,CAA4BC,WAA5B,EAAsDC,OAAtD,EAAsF;AAC3F,QAAMC,IAAI,GAAG,IAAIC,IAAJ,CAAS,CAACH,WAAD,CAAT,CAAb;;AACA,aAAW,MAAMI,KAAjB,IAA0BC,yBAAyB,CAACH,IAAD,EAAOD,OAAP,CAAnD,EAAoE;AAClE,WAAOG,KAAP;AACD;;AACD,SAAO,IAAP;AACD;AAED,OAAO,gBAAgBC,yBAAhB,CAA0CH,IAA1C,EAAsDD,OAAtD,EAAsF;AAC3F,QAAMK,MAAM,GAAG,MAAMR,aAAa,CAACS,QAAd,CAAuBL,IAAvB,CAArB;AACA,QAAMM,IAAa,GAAG,EAAtB;;AACA,MAAI;AACF,UAAMC,MAAM,GAAGH,MAAM,CAACI,SAAP,EAAf;AACA,QAAIC,MAAJ;;AACA,WAAQA,MAAM,GAAG,MAAMF,MAAM,CAACG,IAAP,EAAvB,EAAuC;AACrCJ,MAAAA,IAAI,CAACK,IAAL,CAAUF,MAAV;AACD;AACF,GAND,SAMU;AACR,UAAML,MAAM,CAACQ,KAAP,EAAN;AACD;;AACD,QAAMN,IAAN;AACD","sourcesContent":["// import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';\nimport type {ParquetLoaderOptions} from '../parquet-loader';\n\nimport {ParquetReader} from '../parquetjs/parser/parquet-reader';\n\nexport async function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {\n const blob = new Blob([arrayBuffer]);\n for await (const batch of parseParquetFileInBatches(blob, options)) {\n return batch;\n }\n return null;\n}\n\nexport async function* parseParquetFileInBatches(blob: Blob, options?: ParquetLoaderOptions) {\n const reader = await ParquetReader.openBlob(blob);\n const rows: any[][] = [];\n try {\n const cursor = reader.getCursor();\n let record: any[] | null;\n while ((record = await cursor.next())) {\n rows.push(record);\n }\n } finally {\n await reader.close();\n }\n yield rows;\n}\n"],"file":"parse-parquet.js"}
@@ -0,0 +1,9 @@
1
+ export async function readArrayBuffer(file, start, length) {
2
+ if (file instanceof Blob) {
3
+ const slice = file.slice(start, start + length);
4
+ return await slice.arrayBuffer();
5
+ }
6
+
7
+ return await file.read(start, start + length);
8
+ }
9
+ //# sourceMappingURL=read-array-buffer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../src/lib/read-array-buffer.ts"],"names":["readArrayBuffer","file","start","length","Blob","slice","arrayBuffer","read"],"mappings":"AAEA,OAAO,eAAeA,eAAf,CACLC,IADK,EAELC,KAFK,EAGLC,MAHK,EAIiB;AACtB,MAAIF,IAAI,YAAYG,IAApB,EAA0B;AACxB,UAAMC,KAAK,GAAGJ,IAAI,CAACI,KAAL,CAAWH,KAAX,EAAkBA,KAAK,GAAGC,MAA1B,CAAd;AACA,WAAO,MAAME,KAAK,CAACC,WAAN,EAAb;AACD;;AACD,SAAO,MAAML,IAAI,CAACM,IAAL,CAAUL,KAAV,EAAiBA,KAAK,GAAGC,MAAzB,CAAb;AACD","sourcesContent":["// Random-Access read\n\nexport async function readArrayBuffer(\n file: Blob | ArrayBuffer | any,\n start: number,\n length: number\n): Promise<ArrayBuffer> {\n if (file instanceof Blob) {\n const slice = file.slice(start, start + length);\n return await slice.arrayBuffer();\n }\n return await file.read(start, start + length);\n}\n\n/**\n * Read a slice of a Blob or File, without loading the entire file into memory\n * The trick when reading File objects is to read successive \"slices\" of the File\n * Per spec https://w3c.github.io/FileAPI/, slicing a File only updates the start and end fields\n * Actually reading from file happens in `readAsArrayBuffer`\n * @param blob to read\n export async function readBlob(blob: Blob): Promise<ArrayBuffer> {\n return await new Promise((resolve, reject) => {\n const fileReader = new FileReader();\n fileReader.onload = (event: ProgressEvent<FileReader>) =>\n resolve(event?.target?.result as ArrayBuffer);\n // TODO - reject with a proper Error\n fileReader.onerror = (error: ProgressEvent<FileReader>) => reject(error);\n fileReader.readAsArrayBuffer(blob);\n });\n}\n*/\n"],"file":"read-array-buffer.js"}
@@ -1,12 +1,11 @@
1
- import { ParquetReader } from './parquetjs/reader';
2
- const VERSION = typeof "3.0.12" !== 'undefined' ? "3.0.12" : 'latest';
1
+ const VERSION = typeof "3.1.0-alpha.4" !== 'undefined' ? "3.1.0-alpha.4" : 'latest';
3
2
  const DEFAULT_PARQUET_LOADER_OPTIONS = {
4
3
  parquet: {
5
4
  type: 'object-row-table',
6
5
  url: undefined
7
6
  }
8
7
  };
9
- export const ParquetWorkerLoader = {
8
+ export const ParquetLoader = {
10
9
  name: 'Apache Parquet',
11
10
  id: 'parquet',
12
11
  module: 'parquet',
@@ -16,27 +15,8 @@ export const ParquetWorkerLoader = {
16
15
  extensions: ['parquet'],
17
16
  mimeTypes: ['application/octet-stream'],
18
17
  binary: true,
18
+ tests: ['PAR1', 'PARE'],
19
19
  options: DEFAULT_PARQUET_LOADER_OPTIONS
20
20
  };
21
- export const ParquetLoader = { ...ParquetWorkerLoader,
22
- parse
23
- };
24
-
25
- async function parse(arrayBuffer, options) {
26
- const reader = await ParquetReader.openArrayBuffer(arrayBuffer);
27
- const rows = [];
28
-
29
- try {
30
- const cursor = reader.getCursor();
31
- let record;
32
-
33
- while (record = await cursor.next()) {
34
- rows.push(record);
35
- }
36
- } finally {
37
- await reader.close();
38
- }
39
-
40
- return rows;
41
- }
21
+ export const _typecheckParquetLoader = ParquetLoader;
42
22
  //# sourceMappingURL=parquet-loader.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/parquet-loader.ts"],"names":["ParquetReader","VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","parquet","type","url","undefined","ParquetWorkerLoader","name","id","module","version","worker","category","extensions","mimeTypes","binary","options","ParquetLoader","parse","arrayBuffer","reader","openArrayBuffer","rows","cursor","getCursor","record","next","push","close"],"mappings":"AACA,SAAQA,aAAR,QAA4B,oBAA5B;AAIA,MAAMC,OAAO,GAAG,oBAAuB,WAAvB,cAAmD,QAAnE;AASA,MAAMC,8BAAoD,GAAG;AAC3DC,EAAAA,OAAO,EAAE;AACPC,IAAAA,IAAI,EAAE,kBADC;AAEPC,IAAAA,GAAG,EAAEC;AAFE;AADkD,CAA7D;AAQA,OAAO,MAAMC,mBAA2B,GAAG;AACzCC,EAAAA,IAAI,EAAE,gBADmC;AAEzCC,EAAAA,EAAE,EAAE,SAFqC;AAGzCC,EAAAA,MAAM,EAAE,SAHiC;AAIzCC,EAAAA,OAAO,EAAEV,OAJgC;AAKzCW,EAAAA,MAAM,EAAE,IALiC;AAMzCC,EAAAA,QAAQ,EAAE,OAN+B;AAOzCC,EAAAA,UAAU,EAAE,CAAC,SAAD,CAP6B;AAQzCC,EAAAA,SAAS,EAAE,CAAC,0BAAD,CAR8B;AASzCC,EAAAA,MAAM,EAAE,IATiC;AAWzCC,EAAAA,OAAO,EAAEf;AAXgC,CAApC;AAeP,OAAO,MAAMgB,aAA+B,GAAG,EAC7C,GAAGX,mBAD0C;AAE7CY,EAAAA;AAF6C,CAAxC;;AAKP,eAAeA,KAAf,CAAqBC,WAArB,EAA+CH,OAA/C,EAA+E;AAC7E,QAAMI,MAAM,GAAG,MAAMrB,aAAa,CAACsB,eAAd,CAA8BF,WAA9B,CAArB;AACA,QAAMG,IAAa,GAAG,EAAtB;;AACA,MAAI;AACF,UAAMC,MAAM,GAAGH,MAAM,CAACI,SAAP,EAAf;AACA,QAAIC,MAAJ;;AACA,WAAQA,MAAM,GAAG,MAAMF,MAAM,CAACG,IAAP,EAAvB,EAAuC;AACrCJ,MAAAA,IAAI,CAACK,IAAL,CAAUF,MAAV;AACD;AACF,GAND,SAMU;AACR,UAAML,MAAM,CAACQ,KAAP,EAAN;AACD;;AACD,SAAON,IAAP;AACD","sourcesContent":["import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';\nimport {ParquetReader} from './parquetjs/reader';\n\n// __VERSION__ is injected by babel-plugin-version-inline\n// @ts-ignore TS2304: Cannot find name '__VERSION__'.\nconst VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';\n\nexport type ParquetLoaderOptions = LoaderOptions & {\n parquet?: {\n type?: 'object-row-table';\n url?: string;\n };\n};\n\nconst DEFAULT_PARQUET_LOADER_OPTIONS: ParquetLoaderOptions = {\n parquet: {\n type: 'object-row-table',\n url: undefined\n }\n};\n\n/** ParquetJS table loader */\nexport const ParquetWorkerLoader: Loader = {\n name: 'Apache Parquet',\n id: 'parquet',\n module: 'parquet',\n version: VERSION,\n worker: true,\n category: 'table',\n extensions: ['parquet'],\n mimeTypes: ['application/octet-stream'],\n binary: true,\n // tests: ['PARQUET'],\n options: DEFAULT_PARQUET_LOADER_OPTIONS\n};\n\n/** ParquetJS table loader */\nexport const ParquetLoader: LoaderWithParser = {\n ...ParquetWorkerLoader,\n parse\n};\n\nasync function parse(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {\n const reader = await ParquetReader.openArrayBuffer(arrayBuffer);\n const rows: any[][] = [];\n try {\n const cursor = reader.getCursor();\n let record: any[] | null;\n while ((record = await cursor.next())) {\n rows.push(record);\n }\n } finally {\n await reader.close();\n }\n return rows;\n}\n"],"file":"parquet-loader.js"}
1
+ {"version":3,"sources":["../../src/parquet-loader.ts"],"names":["VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","parquet","type","url","undefined","ParquetLoader","name","id","module","version","worker","category","extensions","mimeTypes","binary","tests","options","_typecheckParquetLoader"],"mappings":"AAIA,MAAMA,OAAO,GAAG,2BAAuB,WAAvB,qBAAmD,QAAnE;AASA,MAAMC,8BAAoD,GAAG;AAC3DC,EAAAA,OAAO,EAAE;AACPC,IAAAA,IAAI,EAAE,kBADC;AAEPC,IAAAA,GAAG,EAAEC;AAFE;AADkD,CAA7D;AAQA,OAAO,MAAMC,aAAa,GAAG;AAC3BC,EAAAA,IAAI,EAAE,gBADqB;AAE3BC,EAAAA,EAAE,EAAE,SAFuB;AAG3BC,EAAAA,MAAM,EAAE,SAHmB;AAI3BC,EAAAA,OAAO,EAAEV,OAJkB;AAK3BW,EAAAA,MAAM,EAAE,IALmB;AAM3BC,EAAAA,QAAQ,EAAE,OANiB;AAO3BC,EAAAA,UAAU,EAAE,CAAC,SAAD,CAPe;AAQ3BC,EAAAA,SAAS,EAAE,CAAC,0BAAD,CARgB;AAS3BC,EAAAA,MAAM,EAAE,IATmB;AAU3BC,EAAAA,KAAK,EAAE,CAAC,MAAD,EAAS,MAAT,CAVoB;AAW3BC,EAAAA,OAAO,EAAEhB;AAXkB,CAAtB;AAcP,OAAO,MAAMiB,uBAA+B,GAAGZ,aAAxC","sourcesContent":["import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';\n\n// __VERSION__ is injected by babel-plugin-version-inline\n// @ts-ignore TS2304: Cannot find name '__VERSION__'.\nconst VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';\n\nexport type ParquetLoaderOptions = LoaderOptions & {\n parquet?: {\n type?: 'object-row-table';\n url?: string;\n };\n};\n\nconst DEFAULT_PARQUET_LOADER_OPTIONS: ParquetLoaderOptions = {\n parquet: {\n type: 'object-row-table',\n url: undefined\n }\n};\n\n/** ParquetJS table loader */\nexport const ParquetLoader = {\n name: 'Apache Parquet',\n id: 'parquet',\n module: 'parquet',\n version: VERSION,\n worker: true,\n category: 'table',\n extensions: ['parquet'],\n mimeTypes: ['application/octet-stream'],\n binary: true,\n tests: ['PAR1', 'PARE'],\n options: DEFAULT_PARQUET_LOADER_OPTIONS\n};\n\nexport const _typecheckParquetLoader: Loader = ParquetLoader;\n"],"file":"parquet-loader.js"}
@@ -1,4 +1,4 @@
1
- const VERSION = typeof "3.0.12" !== 'undefined' ? "3.0.12" : 'latest';
1
+ const VERSION = typeof "3.1.0-alpha.4" !== 'undefined' ? "3.1.0-alpha.4" : 'latest';
2
2
  const DEFAULT_PARQUET_LOADER_OPTIONS = {};
3
3
  export const ParquetWriter = {
4
4
  name: 'Apache Parquet',
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/parquet-writer.ts"],"names":["VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","ParquetWriter","name","id","module","version","extensions","mimeTypes","encodeSync","binary","options","data","ArrayBuffer"],"mappings":"AAIA,MAAMA,OAAO,GAAG,oBAAuB,WAAvB,cAAmD,QAAnE;AAIA,MAAMC,8BAA8B,GAAG,EAAvC;AAEA,OAAO,MAAMC,aAAqB,GAAG;AACnCC,EAAAA,IAAI,EAAE,gBAD6B;AAEnCC,EAAAA,EAAE,EAAE,SAF+B;AAGnCC,EAAAA,MAAM,EAAE,SAH2B;AAInCC,EAAAA,OAAO,EAAEN,OAJ0B;AAKnCO,EAAAA,UAAU,EAAE,CAAC,SAAD,CALuB;AAMnCC,EAAAA,SAAS,EAAE,CAAC,0BAAD,CANwB;AAOnCC,EAAAA,UAPmC;AAQnCC,EAAAA,MAAM,EAAE,IAR2B;AASnCC,EAAAA,OAAO,EAAEV;AAT0B,CAA9B;;AAYP,SAASQ,UAAT,CAAoBG,IAApB,EAA0BD,OAA1B,EAA0D;AACxD,SAAO,IAAIE,WAAJ,CAAgB,CAAhB,CAAP;AACD","sourcesContent":["import type {Writer} from '@loaders.gl/loader-utils';\n\n// __VERSION__ is injected by babel-plugin-version-inline\n// @ts-ignore TS2304: Cannot find name '__VERSION__'.\nconst VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';\n\nexport type ParquetWriterOptions = {};\n\nconst DEFAULT_PARQUET_LOADER_OPTIONS = {};\n\nexport const ParquetWriter: Writer = {\n name: 'Apache Parquet',\n id: 'parquet',\n module: 'parquet',\n version: VERSION,\n extensions: ['parquet'],\n mimeTypes: ['application/octet-stream'],\n encodeSync,\n binary: true,\n options: DEFAULT_PARQUET_LOADER_OPTIONS\n};\n\nfunction encodeSync(data, options?: ParquetWriterOptions) {\n return new ArrayBuffer(0);\n}\n"],"file":"parquet-writer.js"}
1
+ {"version":3,"sources":["../../src/parquet-writer.ts"],"names":["VERSION","DEFAULT_PARQUET_LOADER_OPTIONS","ParquetWriter","name","id","module","version","extensions","mimeTypes","encodeSync","binary","options","data","ArrayBuffer"],"mappings":"AAIA,MAAMA,OAAO,GAAG,2BAAuB,WAAvB,qBAAmD,QAAnE;AAIA,MAAMC,8BAA8B,GAAG,EAAvC;AAEA,OAAO,MAAMC,aAAqB,GAAG;AACnCC,EAAAA,IAAI,EAAE,gBAD6B;AAEnCC,EAAAA,EAAE,EAAE,SAF+B;AAGnCC,EAAAA,MAAM,EAAE,SAH2B;AAInCC,EAAAA,OAAO,EAAEN,OAJ0B;AAKnCO,EAAAA,UAAU,EAAE,CAAC,SAAD,CALuB;AAMnCC,EAAAA,SAAS,EAAE,CAAC,0BAAD,CANwB;AAOnCC,EAAAA,UAPmC;AAQnCC,EAAAA,MAAM,EAAE,IAR2B;AASnCC,EAAAA,OAAO,EAAEV;AAT0B,CAA9B;;AAYP,SAASQ,UAAT,CAAoBG,IAApB,EAA0BD,OAA1B,EAA0D;AACxD,SAAO,IAAIE,WAAJ,CAAgB,CAAhB,CAAP;AACD","sourcesContent":["import type {Writer} from '@loaders.gl/loader-utils';\n\n// __VERSION__ is injected by babel-plugin-version-inline\n// @ts-ignore TS2304: Cannot find name '__VERSION__'.\nconst VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';\n\nexport type ParquetWriterOptions = {};\n\nconst DEFAULT_PARQUET_LOADER_OPTIONS = {};\n\nexport const ParquetWriter: Writer = {\n name: 'Apache Parquet',\n id: 'parquet',\n module: 'parquet',\n version: VERSION,\n extensions: ['parquet'],\n mimeTypes: ['application/octet-stream'],\n encodeSync,\n binary: true,\n options: DEFAULT_PARQUET_LOADER_OPTIONS\n};\n\nfunction encodeSync(data, options?: ParquetWriterOptions) {\n return new ArrayBuffer(0);\n}\n"],"file":"parquet-writer.js"}
@@ -0,0 +1,12 @@
1
+ import { decodeValues as decodeRleValues } from './rle';
2
+ export function decodeValues(type, cursor, count, opts) {
3
+ opts.bitWidth = cursor.buffer.slice(cursor.offset, cursor.offset + 1).readInt8(0);
4
+ cursor.offset += 1;
5
+ return decodeRleValues(type, cursor, count, { ...opts,
6
+ disableEnvelope: true
7
+ });
8
+ }
9
+ export function encodeValues(type, cursor, count, opts) {
10
+ throw new Error('Encode dictionary functionality is not supported');
11
+ }
12
+ //# sourceMappingURL=dictionary.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../../../src/parquetjs/codecs/dictionary.ts"],"names":["decodeValues","decodeRleValues","type","cursor","count","opts","bitWidth","buffer","slice","offset","readInt8","disableEnvelope","encodeValues","Error"],"mappings":"AAAA,SAAQA,YAAY,IAAIC,eAAxB,QAA8C,OAA9C;AAEA,OAAO,SAASD,YAAT,CAAsBE,IAAtB,EAA4BC,MAA5B,EAAoCC,KAApC,EAA2CC,IAA3C,EAAiD;AACtDA,EAAAA,IAAI,CAACC,QAAL,GAAgBH,MAAM,CAACI,MAAP,CAAcC,KAAd,CAAoBL,MAAM,CAACM,MAA3B,EAAmCN,MAAM,CAACM,MAAP,GAAgB,CAAnD,EAAsDC,QAAtD,CAA+D,CAA/D,CAAhB;AACAP,EAAAA,MAAM,CAACM,MAAP,IAAiB,CAAjB;AACA,SAAOR,eAAe,CAACC,IAAD,EAAOC,MAAP,EAAeC,KAAf,EAAsB,EAAC,GAAGC,IAAJ;AAAUM,IAAAA,eAAe,EAAE;AAA3B,GAAtB,CAAtB;AACD;AAED,OAAO,SAASC,YAAT,CAAsBV,IAAtB,EAA4BC,MAA5B,EAAoCC,KAApC,EAA2CC,IAA3C,EAAiD;AACtD,QAAM,IAAIQ,KAAJ,CAAU,kDAAV,CAAN;AACD","sourcesContent":["import {decodeValues as decodeRleValues} from './rle';\n\nexport function decodeValues(type, cursor, count, opts) {\n opts.bitWidth = cursor.buffer.slice(cursor.offset, cursor.offset + 1).readInt8(0);\n cursor.offset += 1;\n return decodeRleValues(type, cursor, count, {...opts, disableEnvelope: true});\n}\n\nexport function encodeValues(type, cursor, count, opts) {\n throw new Error('Encode dictionary functionality is not supported');\n}\n"],"file":"dictionary.js"}
@@ -1,5 +1,6 @@
1
1
  import * as PLAIN from './plain';
2
2
  import * as RLE from './rle';
3
+ import * as DICTIONARY from './dictionary';
3
4
  export * from './declare';
4
5
  export const PARQUET_CODECS = {
5
6
  PLAIN: {
@@ -9,6 +10,14 @@ export const PARQUET_CODECS = {
9
10
  RLE: {
10
11
  encodeValues: RLE.encodeValues,
11
12
  decodeValues: RLE.decodeValues
13
+ },
14
+ PLAIN_DICTIONARY: {
15
+ encodeValues: DICTIONARY.encodeValues,
16
+ decodeValues: DICTIONARY.decodeValues
17
+ },
18
+ RLE_DICTIONARY: {
19
+ encodeValues: DICTIONARY.encodeValues,
20
+ decodeValues: DICTIONARY.decodeValues
12
21
  }
13
22
  };
14
23
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../src/parquetjs/codecs/index.ts"],"names":["PLAIN","RLE","PARQUET_CODECS","encodeValues","decodeValues"],"mappings":"AAGA,OAAO,KAAKA,KAAZ,MAAuB,SAAvB;AACA,OAAO,KAAKC,GAAZ,MAAqB,OAArB;AAEA,cAAc,WAAd;AAEA,OAAO,MAAMC,cAAqD,GAAG;AACnEF,EAAAA,KAAK,EAAE;AACLG,IAAAA,YAAY,EAAEH,KAAK,CAACG,YADf;AAELC,IAAAA,YAAY,EAAEJ,KAAK,CAACI;AAFf,GAD4D;AAKnEH,EAAAA,GAAG,EAAE;AACHE,IAAAA,YAAY,EAAEF,GAAG,CAACE,YADf;AAEHC,IAAAA,YAAY,EAAEH,GAAG,CAACG;AAFf;AAL8D,CAA9D","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport type {ParquetCodec} from '../schema/declare';\nimport type {ParquetCodecKit} from './declare';\nimport * as PLAIN from './plain';\nimport * as RLE from './rle';\n\nexport * from './declare';\n\nexport const PARQUET_CODECS: Record<ParquetCodec, ParquetCodecKit> = {\n PLAIN: {\n encodeValues: PLAIN.encodeValues,\n decodeValues: PLAIN.decodeValues\n },\n RLE: {\n encodeValues: RLE.encodeValues,\n decodeValues: RLE.decodeValues\n }\n};\n"],"file":"index.js"}
1
+ {"version":3,"sources":["../../../../src/parquetjs/codecs/index.ts"],"names":["PLAIN","RLE","DICTIONARY","PARQUET_CODECS","encodeValues","decodeValues","PLAIN_DICTIONARY","RLE_DICTIONARY"],"mappings":"AAGA,OAAO,KAAKA,KAAZ,MAAuB,SAAvB;AACA,OAAO,KAAKC,GAAZ,MAAqB,OAArB;AACA,OAAO,KAAKC,UAAZ,MAA4B,cAA5B;AAEA,cAAc,WAAd;AAEA,OAAO,MAAMC,cAAqD,GAAG;AACnEH,EAAAA,KAAK,EAAE;AACLI,IAAAA,YAAY,EAAEJ,KAAK,CAACI,YADf;AAELC,IAAAA,YAAY,EAAEL,KAAK,CAACK;AAFf,GAD4D;AAKnEJ,EAAAA,GAAG,EAAE;AACHG,IAAAA,YAAY,EAAEH,GAAG,CAACG,YADf;AAEHC,IAAAA,YAAY,EAAEJ,GAAG,CAACI;AAFf,GAL8D;AAUnEC,EAAAA,gBAAgB,EAAE;AAEhBF,IAAAA,YAAY,EAAEF,UAAU,CAACE,YAFT;AAGhBC,IAAAA,YAAY,EAAEH,UAAU,CAACG;AAHT,GAViD;AAgBnEE,EAAAA,cAAc,EAAE;AAEdH,IAAAA,YAAY,EAAEF,UAAU,CAACE,YAFX;AAGdC,IAAAA,YAAY,EAAEH,UAAU,CAACG;AAHX;AAhBmD,CAA9D","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\nimport type {ParquetCodec} from '../schema/declare';\nimport type {ParquetCodecKit} from './declare';\nimport * as PLAIN from './plain';\nimport * as RLE from './rle';\nimport * as DICTIONARY from './dictionary';\n\nexport * from './declare';\n\nexport const PARQUET_CODECS: Record<ParquetCodec, ParquetCodecKit> = {\n PLAIN: {\n encodeValues: PLAIN.encodeValues,\n decodeValues: PLAIN.decodeValues\n },\n RLE: {\n encodeValues: RLE.encodeValues,\n decodeValues: RLE.decodeValues\n },\n // Using the PLAIN_DICTIONARY enum value is deprecated in the Parquet 2.0 specification.\n PLAIN_DICTIONARY: {\n // @ts-ignore\n encodeValues: DICTIONARY.encodeValues,\n decodeValues: DICTIONARY.decodeValues\n },\n // Prefer using RLE_DICTIONARY in a data page and PLAIN in a dictionary page for Parquet 2.0+ files.\n RLE_DICTIONARY: {\n // @ts-ignore\n encodeValues: DICTIONARY.encodeValues,\n decodeValues: DICTIONARY.decodeValues\n }\n};\n"],"file":"index.js"}
@@ -111,7 +111,7 @@ function decodeRunRepeated(cursor, count, opts) {
111
111
  let value = 0;
112
112
 
113
113
  for (let i = 0; i < Math.ceil(bitWidth / 8); i++) {
114
- value <<= 8;
114
+ value << 8;
115
115
  value += cursor.buffer[cursor.offset];
116
116
  cursor.offset += 1;
117
117
  }
@@ -143,7 +143,7 @@ function encodeRunRepeated(value, count, opts) {
143
143
 
144
144
  for (let i = 0; i < buf.length; i++) {
145
145
  buf.writeUInt8(value & 0xff, i);
146
- value >>= 8;
146
+ value >> 8;
147
147
  }
148
148
 
149
149
  return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf]);
@@ -1 +1 @@
1
- {"version":3,"sources":["../../../../src/parquetjs/codecs/rle.ts"],"names":["varint","encodeValues","type","values","opts","Error","map","x","parseInt","buf","Buffer","alloc","run","repeats","i","length","concat","encodeRunBitpacked","encodeRunRepeated","push","disableEnvelope","envelope","writeUInt32LE","undefined","copy","decodeValues","cursor","count","offset","header","decode","buffer","encodingLength","decodeRunBitpacked","decodeRunRepeated","slice","bitWidth","Array","fill","b","Math","floor","value","ceil","from","encode","writeUInt8"],"mappings":"AAIA,OAAOA,MAAP,MAAmB,QAAnB;AAGA,OAAO,SAASC,YAAT,CACLC,IADK,EAELC,MAFK,EAGLC,IAHK,EAIG;AACR,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,UAAQH,IAAR;AACE,SAAK,SAAL;AACA,SAAK,OAAL;AACA,SAAK,OAAL;AAEEC,MAAAA,MAAM,GAAGA,MAAM,CAACG,GAAP,CAAYC,CAAD,IAAOC,QAAQ,CAACD,CAAD,EAAI,EAAJ,CAA1B,CAAT;AACA;;AAEF;AACE,YAAM,IAAIF,KAAJ,CAAW,qBAAoBH,IAAK,EAApC,CAAN;AATJ;;AAYA,MAAIO,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa,CAAb,CAAV;AACA,MAAIC,GAAU,GAAG,EAAjB;AACA,MAAIC,OAAO,GAAG,CAAd;;AAEA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAA3B,EAAmCD,CAAC,EAApC,EAAwC;AAGtC,QAAID,OAAO,KAAK,CAAZ,IAAiBD,GAAG,CAACG,MAAJ,GAAa,CAAb,KAAmB,CAApC,IAAyCZ,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAjE,EAA0E;AAExE,UAAIF,GAAG,CAACG,MAAR,EAAgB;AACdN,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACAQ,QAAAA,GAAG,GAAG,EAAN;AACD;;AACDC,MAAAA,OAAO,GAAG,CAAV;AACD,KAPD,MAOO,IAAIA,OAAO,GAAG,CAAV,IAAeV,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAvC,EAAgD;AACrDD,MAAAA,OAAO,IAAI,CAAX;AACD,KAFM,MAEA;AAEL,UAAIA,OAAJ,EAAa;AACXJ,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACW,CAAC,GAAG,CAAL,CAAP,EAAgBD,OAAhB,EAAyBT,IAAzB,CAAvB,CAAd,CAAN;AACAS,QAAAA,OAAO,GAAG,CAAV;AACD;;AACDD,MAAAA,GAAG,CAACO,IAAJ,CAAShB,MAAM,CAACW,CAAD,CAAf;AACD;AACF;;AAED,MAAID,OAAJ,EAAa;AACXJ,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACA,MAAM,CAACY,MAAP,GAAgB,CAAjB,CAAP,EAA4BF,OAA5B,EAAqCT,IAArC,CAAvB,CAAd,CAAN;AACD,GAFD,MAEO,IAAIQ,GAAG,CAACG,MAAR,EAAgB;AACrBN,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACD;;AAED,MAAIA,IAAI,CAACgB,eAAT,EAA0B;AACxB,WAAOX,GAAP;AACD;;AAED,QAAMY,QAAQ,GAAGX,MAAM,CAACC,KAAP,CAAaF,GAAG,CAACM,MAAJ,GAAa,CAA1B,CAAjB;AACAM,EAAAA,QAAQ,CAACC,aAAT,CAAuBb,GAAG,CAACM,MAA3B,EAAmCQ,SAAnC;AACAd,EAAAA,GAAG,CAACe,IAAJ,CAASH,QAAT,EAAmB,CAAnB;AAEA,SAAOA,QAAP;AACD;AAED,OAAO,SAASI,YAAT,CACLvB,IADK,EAELwB,MAFK,EAGLC,KAHK,EAILvB,IAJK,EAKK;AACV,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,MAAI,CAACD,IAAI,CAACgB,eAAV,EAA2B;AACzBM,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAED,MAAIzB,MAAgB,GAAG,EAAvB;;AACA,SAAOA,MAAM,CAACY,MAAP,GAAgBY,KAAvB,EAA8B;AAC5B,UAAME,MAAM,GAAG7B,MAAM,CAAC8B,MAAP,CAAcJ,MAAM,CAACK,MAArB,EAA6BL,MAAM,CAACE,MAApC,CAAf;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiB5B,MAAM,CAACgC,cAAP,CAAsBH,MAAtB,CAAjB;;AACA,QAAIA,MAAM,GAAG,CAAb,EAAgB;AACd,YAAMF,KAAK,GAAG,CAACE,MAAM,IAAI,CAAX,IAAgB,CAA9B;AACA1B,MAAAA,MAAM,CAACgB,IAAP,CAAY,GAAGc,kBAAkB,CAACP,MAAD,EAASC,KAAT,EAAgBvB,IAAhB,CAAjC;AACD,KAHD,MAGO;AACL,YAAMuB,KAAK,GAAGE,MAAM,IAAI,CAAxB;AACA1B,MAAAA,MAAM,CAACgB,IAAP,CAAY,GAAGe,iBAAiB,CAACR,MAAD,EAASC,KAAT,EAAgBvB,IAAhB,CAAhC;AACD;AACF;;AACDD,EAAAA,MAAM,GAAGA,MAAM,CAACgC,KAAP,CAAa,CAAb,EAAgBR,KAAhB,CAAT;;AAEA,MAAIxB,MAAM,CAACY,MAAP,KAAkBY,KAAtB,EAA6B;AAC3B,UAAM,IAAItB,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,SAAOF,MAAP;AACD;;AAED,SAAS8B,kBAAT,CACEP,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;;AAEA,MAAIT,KAAK,GAAG,CAAR,KAAc,CAAlB,EAAqB;AACnB,UAAM,IAAItB,KAAJ,CAAU,yBAAV,CAAN;AACD;;AAGD,QAAMF,MAAM,GAAG,IAAIkC,KAAJ,CAAUV,KAAV,EAAiBW,IAAjB,CAAsB,CAAtB,CAAf;;AACA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGT,KAA/B,EAAsCY,CAAC,EAAvC,EAA2C;AACzC,QAAIb,MAAM,CAACK,MAAP,CAAcL,MAAM,CAACE,MAAP,GAAgBY,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAA9B,IAAoD,KAAKA,CAAC,GAAG,CAAjE,EAAqE;AACnEpC,MAAAA,MAAM,CAACqC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,IAAoC,KAAKG,CAAC,GAAGH,QAA7C;AACD;AACF;;AAEDV,EAAAA,MAAM,CAACE,MAAP,IAAiBQ,QAAQ,IAAIT,KAAK,GAAG,CAAZ,CAAzB;AACA,SAAOxB,MAAP;AACD;;AAED,SAAS+B,iBAAT,CACER,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;AAEA,MAAIM,KAAK,GAAG,CAAZ;;AACA,OAAK,IAAI5B,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAG0B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAApB,EAA6CtB,CAAC,EAA9C,EAAkD;AAChD4B,IAAAA,KAAK,KAAK,CAAV;AACAA,IAAAA,KAAK,IAAIhB,MAAM,CAACK,MAAP,CAAcL,MAAM,CAACE,MAArB,CAAT;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAGD,SAAO,IAAIS,KAAJ,CAAUV,KAAV,EAAiBW,IAAjB,CAAsBI,KAAtB,CAAP;AACD;;AAED,SAASzB,kBAAT,CAA4Bd,MAA5B,EAA8CC,IAA9C,EAAiF;AAE/E,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;;AAEA,OAAK,IAAItB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAAP,GAAgB,CAApC,EAAuCD,CAAC,EAAxC,EAA4C;AAC1CX,IAAAA,MAAM,CAACgB,IAAP,CAAY,CAAZ;AACD;;AAED,QAAMV,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa6B,IAAI,CAACG,IAAL,CAAUP,QAAQ,IAAIjC,MAAM,CAACY,MAAP,GAAgB,CAApB,CAAlB,CAAb,CAAZ;;AACA,OAAK,IAAIwB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGjC,MAAM,CAACY,MAAtC,EAA8CwB,CAAC,EAA/C,EAAmD;AACjD,QAAI,CAACpC,MAAM,CAACqC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,GAAoC,KAAKG,CAAC,GAAGH,QAA9C,IAA2D,CAA/D,EAAkE;AAChE3B,MAAAA,GAAG,CAAC+B,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAAD,CAAH,IAA0B,KAAKA,CAAC,GAAG,CAAnC;AACD;AACF;;AAED,SAAO7B,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACkC,IAAP,CAAY5C,MAAM,CAAC6C,MAAP,CAAgB1C,MAAM,CAACY,MAAP,GAAgB,CAAjB,IAAuB,CAAxB,GAA6B,CAA3C,CAAZ,CAAD,EAA6DN,GAA7D,CAAd,CAAP;AACD;;AAED,SAASS,iBAAT,CAA2BwB,KAA3B,EAA0Cf,KAA1C,EAAyDvB,IAAzD,EAA4F;AAE1F,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;AAEA,QAAM3B,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa6B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAAb,CAAZ;;AAEA,OAAK,IAAItB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGL,GAAG,CAACM,MAAxB,EAAgCD,CAAC,EAAjC,EAAqC;AACnCL,IAAAA,GAAG,CAACqC,UAAJ,CAAeJ,KAAK,GAAG,IAAvB,EAA6B5B,CAA7B;AACA4B,IAAAA,KAAK,KAAK,CAAV;AACD;;AAED,SAAOhC,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACkC,IAAP,CAAY5C,MAAM,CAAC6C,MAAP,CAAclB,KAAK,IAAI,CAAvB,CAAZ,CAAD,EAAyClB,GAAzC,CAAd,CAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport type {PrimitiveType} from '../schema/declare';\nimport type {CursorBuffer, ParquetCodecOptions} from './declare';\nimport varint from 'varint';\n\n// eslint-disable-next-line max-statements, complexity\nexport function encodeValues(\n type: PrimitiveType,\n values: any[],\n opts: ParquetCodecOptions\n): Buffer {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n switch (type) {\n case 'BOOLEAN':\n case 'INT32':\n case 'INT64':\n // tslint:disable-next-line:no-parameter-reassignment\n values = values.map((x) => parseInt(x, 10));\n break;\n\n default:\n throw new Error(`unsupported type: ${type}`);\n }\n\n let buf = Buffer.alloc(0);\n let run: any[] = [];\n let repeats = 0;\n\n for (let i = 0; i < values.length; i++) {\n // If we are at the beginning of a run and the next value is same we start\n // collecting repeated values\n if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {\n // If we have any data in runs we need to encode them\n if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n run = [];\n }\n repeats = 1;\n } else if (repeats > 0 && values[i] === values[i - 1]) {\n repeats += 1;\n } else {\n // If values changes we need to post any previous repeated values\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);\n repeats = 0;\n }\n run.push(values[i]);\n }\n }\n\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);\n } else if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n }\n\n if (opts.disableEnvelope) {\n return buf;\n }\n\n const envelope = Buffer.alloc(buf.length + 4);\n envelope.writeUInt32LE(buf.length, undefined);\n buf.copy(envelope, 4);\n\n return envelope;\n}\n\nexport function decodeValues(\n type: PrimitiveType,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n if (!opts.disableEnvelope) {\n cursor.offset += 4;\n }\n\n let values: number[] = [];\n while (values.length < count) {\n const header = varint.decode(cursor.buffer, cursor.offset);\n cursor.offset += varint.encodingLength(header);\n if (header & 1) {\n const count = (header >> 1) * 8;\n values.push(...decodeRunBitpacked(cursor, count, opts));\n } else {\n const count = header >> 1;\n values.push(...decodeRunRepeated(cursor, count, opts));\n }\n }\n values = values.slice(0, count);\n\n if (values.length !== count) {\n throw new Error('invalid RLE encoding');\n }\n\n return values;\n}\n\nfunction decodeRunBitpacked(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n if (count % 8 !== 0) {\n throw new Error('must be a multiple of 8');\n }\n\n // tslint:disable-next-line:prefer-array-literal\n const values = new Array(count).fill(0);\n for (let b = 0; b < bitWidth * count; b++) {\n if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) {\n values[Math.floor(b / bitWidth)] |= 1 << b % bitWidth;\n }\n }\n\n cursor.offset += bitWidth * (count / 8);\n return values;\n}\n\nfunction decodeRunRepeated(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n let value = 0;\n for (let i = 0; i < Math.ceil(bitWidth / 8); i++) {\n value <<= 8;\n value += cursor.buffer[cursor.offset];\n cursor.offset += 1;\n }\n\n // tslint:disable-next-line:prefer-array-literal\n return new Array(count).fill(value);\n}\n\nfunction encodeRunBitpacked(values: number[], opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n for (let i = 0; i < values.length % 8; i++) {\n values.push(0);\n }\n\n const buf = Buffer.alloc(Math.ceil(bitWidth * (values.length / 8)));\n for (let b = 0; b < bitWidth * values.length; b++) {\n if ((values[Math.floor(b / bitWidth)] & (1 << b % bitWidth)) > 0) {\n buf[Math.floor(b / 8)] |= 1 << b % 8;\n }\n }\n\n return Buffer.concat([Buffer.from(varint.encode(((values.length / 8) << 1) | 1)), buf]);\n}\n\nfunction encodeRunRepeated(value: number, count: number, opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n const buf = Buffer.alloc(Math.ceil(bitWidth / 8));\n\n for (let i = 0; i < buf.length; i++) {\n buf.writeUInt8(value & 0xff, i);\n value >>= 8;\n }\n\n return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf]);\n}\n"],"file":"rle.js"}
1
+ {"version":3,"sources":["../../../../src/parquetjs/codecs/rle.ts"],"names":["varint","encodeValues","type","values","opts","Error","map","x","parseInt","buf","Buffer","alloc","run","repeats","i","length","concat","encodeRunBitpacked","encodeRunRepeated","push","disableEnvelope","envelope","writeUInt32LE","undefined","copy","decodeValues","cursor","count","offset","header","decode","buffer","encodingLength","decodeRunBitpacked","decodeRunRepeated","slice","bitWidth","Array","fill","b","Math","floor","value","ceil","from","encode","writeUInt8"],"mappings":"AAIA,OAAOA,MAAP,MAAmB,QAAnB;AAGA,OAAO,SAASC,YAAT,CACLC,IADK,EAELC,MAFK,EAGLC,IAHK,EAIG;AACR,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,UAAQH,IAAR;AACE,SAAK,SAAL;AACA,SAAK,OAAL;AACA,SAAK,OAAL;AAEEC,MAAAA,MAAM,GAAGA,MAAM,CAACG,GAAP,CAAYC,CAAD,IAAOC,QAAQ,CAACD,CAAD,EAAI,EAAJ,CAA1B,CAAT;AACA;;AAEF;AACE,YAAM,IAAIF,KAAJ,CAAW,qBAAoBH,IAAK,EAApC,CAAN;AATJ;;AAYA,MAAIO,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa,CAAb,CAAV;AACA,MAAIC,GAAU,GAAG,EAAjB;AACA,MAAIC,OAAO,GAAG,CAAd;;AAEA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAA3B,EAAmCD,CAAC,EAApC,EAAwC;AAGtC,QAAID,OAAO,KAAK,CAAZ,IAAiBD,GAAG,CAACG,MAAJ,GAAa,CAAb,KAAmB,CAApC,IAAyCZ,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAjE,EAA0E;AAExE,UAAIF,GAAG,CAACG,MAAR,EAAgB;AACdN,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACAQ,QAAAA,GAAG,GAAG,EAAN;AACD;;AACDC,MAAAA,OAAO,GAAG,CAAV;AACD,KAPD,MAOO,IAAIA,OAAO,GAAG,CAAV,IAAeV,MAAM,CAACW,CAAD,CAAN,KAAcX,MAAM,CAACW,CAAC,GAAG,CAAL,CAAvC,EAAgD;AACrDD,MAAAA,OAAO,IAAI,CAAX;AACD,KAFM,MAEA;AAEL,UAAIA,OAAJ,EAAa;AACXJ,QAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACW,CAAC,GAAG,CAAL,CAAP,EAAgBD,OAAhB,EAAyBT,IAAzB,CAAvB,CAAd,CAAN;AACAS,QAAAA,OAAO,GAAG,CAAV;AACD;;AACDD,MAAAA,GAAG,CAACO,IAAJ,CAAShB,MAAM,CAACW,CAAD,CAAf;AACD;AACF;;AAED,MAAID,OAAJ,EAAa;AACXJ,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMS,iBAAiB,CAACf,MAAM,CAACA,MAAM,CAACY,MAAP,GAAgB,CAAjB,CAAP,EAA4BF,OAA5B,EAAqCT,IAArC,CAAvB,CAAd,CAAN;AACD,GAFD,MAEO,IAAIQ,GAAG,CAACG,MAAR,EAAgB;AACrBN,IAAAA,GAAG,GAAGC,MAAM,CAACM,MAAP,CAAc,CAACP,GAAD,EAAMQ,kBAAkB,CAACL,GAAD,EAAMR,IAAN,CAAxB,CAAd,CAAN;AACD;;AAED,MAAIA,IAAI,CAACgB,eAAT,EAA0B;AACxB,WAAOX,GAAP;AACD;;AAED,QAAMY,QAAQ,GAAGX,MAAM,CAACC,KAAP,CAAaF,GAAG,CAACM,MAAJ,GAAa,CAA1B,CAAjB;AACAM,EAAAA,QAAQ,CAACC,aAAT,CAAuBb,GAAG,CAACM,MAA3B,EAAmCQ,SAAnC;AACAd,EAAAA,GAAG,CAACe,IAAJ,CAASH,QAAT,EAAmB,CAAnB;AAEA,SAAOA,QAAP;AACD;AAED,OAAO,SAASI,YAAT,CACLvB,IADK,EAELwB,MAFK,EAGLC,KAHK,EAILvB,IAJK,EAKK;AACV,MAAI,EAAE,cAAcA,IAAhB,CAAJ,EAA2B;AACzB,UAAM,IAAIC,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,MAAI,CAACD,IAAI,CAACgB,eAAV,EAA2B;AACzBM,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAED,MAAIzB,MAAgB,GAAG,EAAvB;;AACA,SAAOA,MAAM,CAACY,MAAP,GAAgBY,KAAvB,EAA8B;AAC5B,UAAME,MAAM,GAAG7B,MAAM,CAAC8B,MAAP,CAAcJ,MAAM,CAACK,MAArB,EAA6BL,MAAM,CAACE,MAApC,CAAf;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiB5B,MAAM,CAACgC,cAAP,CAAsBH,MAAtB,CAAjB;;AACA,QAAIA,MAAM,GAAG,CAAb,EAAgB;AACd,YAAMF,KAAK,GAAG,CAACE,MAAM,IAAI,CAAX,IAAgB,CAA9B;AACA1B,MAAAA,MAAM,CAACgB,IAAP,CAAY,GAAGc,kBAAkB,CAACP,MAAD,EAASC,KAAT,EAAgBvB,IAAhB,CAAjC;AACD,KAHD,MAGO;AACL,YAAMuB,KAAK,GAAGE,MAAM,IAAI,CAAxB;AACA1B,MAAAA,MAAM,CAACgB,IAAP,CAAY,GAAGe,iBAAiB,CAACR,MAAD,EAASC,KAAT,EAAgBvB,IAAhB,CAAhC;AACD;AACF;;AACDD,EAAAA,MAAM,GAAGA,MAAM,CAACgC,KAAP,CAAa,CAAb,EAAgBR,KAAhB,CAAT;;AAEA,MAAIxB,MAAM,CAACY,MAAP,KAAkBY,KAAtB,EAA6B;AAC3B,UAAM,IAAItB,KAAJ,CAAU,sBAAV,CAAN;AACD;;AAED,SAAOF,MAAP;AACD;;AAED,SAAS8B,kBAAT,CACEP,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;;AAEA,MAAIT,KAAK,GAAG,CAAR,KAAc,CAAlB,EAAqB;AACnB,UAAM,IAAItB,KAAJ,CAAU,yBAAV,CAAN;AACD;;AAGD,QAAMF,MAAM,GAAG,IAAIkC,KAAJ,CAAUV,KAAV,EAAiBW,IAAjB,CAAsB,CAAtB,CAAf;;AACA,OAAK,IAAIC,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGT,KAA/B,EAAsCY,CAAC,EAAvC,EAA2C;AACzC,QAAIb,MAAM,CAACK,MAAP,CAAcL,MAAM,CAACE,MAAP,GAAgBY,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAA9B,IAAoD,KAAKA,CAAC,GAAG,CAAjE,EAAqE;AACnEpC,MAAAA,MAAM,CAACqC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,IAAoC,KAAKG,CAAC,GAAGH,QAA7C;AACD;AACF;;AAEDV,EAAAA,MAAM,CAACE,MAAP,IAAiBQ,QAAQ,IAAIT,KAAK,GAAG,CAAZ,CAAzB;AACA,SAAOxB,MAAP;AACD;;AAED,SAAS+B,iBAAT,CACER,MADF,EAEEC,KAFF,EAGEvB,IAHF,EAIY;AAEV,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;AAEA,MAAIM,KAAK,GAAG,CAAZ;;AACA,OAAK,IAAI5B,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAG0B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAApB,EAA6CtB,CAAC,EAA9C,EAAkD;AAEhD4B,IAAAA,KAAK,IAAI,CAAT;AACAA,IAAAA,KAAK,IAAIhB,MAAM,CAACK,MAAP,CAAcL,MAAM,CAACE,MAArB,CAAT;AACAF,IAAAA,MAAM,CAACE,MAAP,IAAiB,CAAjB;AACD;;AAGD,SAAO,IAAIS,KAAJ,CAAUV,KAAV,EAAiBW,IAAjB,CAAsBI,KAAtB,CAAP;AACD;;AAED,SAASzB,kBAAT,CAA4Bd,MAA5B,EAA8CC,IAA9C,EAAiF;AAE/E,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;;AAEA,OAAK,IAAItB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGX,MAAM,CAACY,MAAP,GAAgB,CAApC,EAAuCD,CAAC,EAAxC,EAA4C;AAC1CX,IAAAA,MAAM,CAACgB,IAAP,CAAY,CAAZ;AACD;;AAED,QAAMV,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa6B,IAAI,CAACG,IAAL,CAAUP,QAAQ,IAAIjC,MAAM,CAACY,MAAP,GAAgB,CAApB,CAAlB,CAAb,CAAZ;;AACA,OAAK,IAAIwB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGH,QAAQ,GAAGjC,MAAM,CAACY,MAAtC,EAA8CwB,CAAC,EAA/C,EAAmD;AACjD,QAAI,CAACpC,MAAM,CAACqC,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAGH,QAAf,CAAD,CAAN,GAAoC,KAAKG,CAAC,GAAGH,QAA9C,IAA2D,CAA/D,EAAkE;AAChE3B,MAAAA,GAAG,CAAC+B,IAAI,CAACC,KAAL,CAAWF,CAAC,GAAG,CAAf,CAAD,CAAH,IAA0B,KAAKA,CAAC,GAAG,CAAnC;AACD;AACF;;AAED,SAAO7B,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACkC,IAAP,CAAY5C,MAAM,CAAC6C,MAAP,CAAgB1C,MAAM,CAACY,MAAP,GAAgB,CAAjB,IAAuB,CAAxB,GAA6B,CAA3C,CAAZ,CAAD,EAA6DN,GAA7D,CAAd,CAAP;AACD;;AAED,SAASS,iBAAT,CAA2BwB,KAA3B,EAA0Cf,KAA1C,EAAyDvB,IAAzD,EAA4F;AAE1F,QAAMgC,QAAgB,GAAGhC,IAAI,CAACgC,QAA9B;AAEA,QAAM3B,GAAG,GAAGC,MAAM,CAACC,KAAP,CAAa6B,IAAI,CAACG,IAAL,CAAUP,QAAQ,GAAG,CAArB,CAAb,CAAZ;;AAEA,OAAK,IAAItB,CAAC,GAAG,CAAb,EAAgBA,CAAC,GAAGL,GAAG,CAACM,MAAxB,EAAgCD,CAAC,EAAjC,EAAqC;AACnCL,IAAAA,GAAG,CAACqC,UAAJ,CAAeJ,KAAK,GAAG,IAAvB,EAA6B5B,CAA7B;AAEA4B,IAAAA,KAAK,IAAI,CAAT;AACD;;AAED,SAAOhC,MAAM,CAACM,MAAP,CAAc,CAACN,MAAM,CAACkC,IAAP,CAAY5C,MAAM,CAAC6C,MAAP,CAAclB,KAAK,IAAI,CAAvB,CAAZ,CAAD,EAAyClB,GAAzC,CAAd,CAAP;AACD","sourcesContent":["// Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)\n\nimport type {PrimitiveType} from '../schema/declare';\nimport type {CursorBuffer, ParquetCodecOptions} from './declare';\nimport varint from 'varint';\n\n// eslint-disable-next-line max-statements, complexity\nexport function encodeValues(\n type: PrimitiveType,\n values: any[],\n opts: ParquetCodecOptions\n): Buffer {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n switch (type) {\n case 'BOOLEAN':\n case 'INT32':\n case 'INT64':\n // tslint:disable-next-line:no-parameter-reassignment\n values = values.map((x) => parseInt(x, 10));\n break;\n\n default:\n throw new Error(`unsupported type: ${type}`);\n }\n\n let buf = Buffer.alloc(0);\n let run: any[] = [];\n let repeats = 0;\n\n for (let i = 0; i < values.length; i++) {\n // If we are at the beginning of a run and the next value is same we start\n // collecting repeated values\n if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) {\n // If we have any data in runs we need to encode them\n if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n run = [];\n }\n repeats = 1;\n } else if (repeats > 0 && values[i] === values[i - 1]) {\n repeats += 1;\n } else {\n // If values changes we need to post any previous repeated values\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]);\n repeats = 0;\n }\n run.push(values[i]);\n }\n }\n\n if (repeats) {\n buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]);\n } else if (run.length) {\n buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]);\n }\n\n if (opts.disableEnvelope) {\n return buf;\n }\n\n const envelope = Buffer.alloc(buf.length + 4);\n envelope.writeUInt32LE(buf.length, undefined);\n buf.copy(envelope, 4);\n\n return envelope;\n}\n\nexport function decodeValues(\n type: PrimitiveType,\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n if (!('bitWidth' in opts)) {\n throw new Error('bitWidth is required');\n }\n\n if (!opts.disableEnvelope) {\n cursor.offset += 4;\n }\n\n let values: number[] = [];\n while (values.length < count) {\n const header = varint.decode(cursor.buffer, cursor.offset);\n cursor.offset += varint.encodingLength(header);\n if (header & 1) {\n const count = (header >> 1) * 8;\n values.push(...decodeRunBitpacked(cursor, count, opts));\n } else {\n const count = header >> 1;\n values.push(...decodeRunRepeated(cursor, count, opts));\n }\n }\n values = values.slice(0, count);\n\n if (values.length !== count) {\n throw new Error('invalid RLE encoding');\n }\n\n return values;\n}\n\nfunction decodeRunBitpacked(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n if (count % 8 !== 0) {\n throw new Error('must be a multiple of 8');\n }\n\n // tslint:disable-next-line:prefer-array-literal\n const values = new Array(count).fill(0);\n for (let b = 0; b < bitWidth * count; b++) {\n if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) {\n values[Math.floor(b / bitWidth)] |= 1 << b % bitWidth;\n }\n }\n\n cursor.offset += bitWidth * (count / 8);\n return values;\n}\n\nfunction decodeRunRepeated(\n cursor: CursorBuffer,\n count: number,\n opts: ParquetCodecOptions\n): number[] {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n let value = 0;\n for (let i = 0; i < Math.ceil(bitWidth / 8); i++) {\n // eslint-disable-next-line\n value << 8; // TODO - this looks wrong\n value += cursor.buffer[cursor.offset];\n cursor.offset += 1;\n }\n\n // tslint:disable-next-line:prefer-array-literal\n return new Array(count).fill(value);\n}\n\nfunction encodeRunBitpacked(values: number[], opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n for (let i = 0; i < values.length % 8; i++) {\n values.push(0);\n }\n\n const buf = Buffer.alloc(Math.ceil(bitWidth * (values.length / 8)));\n for (let b = 0; b < bitWidth * values.length; b++) {\n if ((values[Math.floor(b / bitWidth)] & (1 << b % bitWidth)) > 0) {\n buf[Math.floor(b / 8)] |= 1 << b % 8;\n }\n }\n\n return Buffer.concat([Buffer.from(varint.encode(((values.length / 8) << 1) | 1)), buf]);\n}\n\nfunction encodeRunRepeated(value: number, count: number, opts: ParquetCodecOptions): Buffer {\n // @ts-ignore\n const bitWidth: number = opts.bitWidth;\n\n const buf = Buffer.alloc(Math.ceil(bitWidth / 8));\n\n for (let i = 0; i < buf.length; i++) {\n buf.writeUInt8(value & 0xff, i);\n // eslint-disable-next-line\n value >> 8; // TODO - this looks wrong\n }\n\n return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf]);\n}\n"],"file":"rle.js"}
@@ -1,80 +1,66 @@
1
- import * as Util from './util';
2
- import zlib from 'zlib';
3
- import snappyjs from 'snappyjs';
4
- let brotli;
5
- let lzo;
6
- let lz4js;
7
- export const PARQUET_COMPRESSION_METHODS = {
8
- UNCOMPRESSED: {
9
- deflate: deflate_identity,
10
- inflate: inflate_identity
11
- },
12
- GZIP: {
13
- deflate: deflate_gzip,
14
- inflate: inflate_gzip
15
- },
16
- SNAPPY: {
17
- deflate: deflate_snappy,
18
- inflate: inflate_snappy
19
- },
20
- LZO: {
21
- deflate: deflate_lzo,
22
- inflate: inflate_lzo
23
- },
24
- BROTLI: {
25
- deflate: deflate_brotli,
26
- inflate: inflate_brotli
1
+ import { NoCompression, GZipCompression, SnappyCompression, BrotliCompression, LZOCompression, LZ4Compression, ZstdCompression } from '@loaders.gl/compression';
2
+ import { toArrayBuffer, toBuffer } from './utils/buffer-utils';
3
+ import brotliDecompress from 'brotli/decompress';
4
+ import lz4js from 'lz4js';
5
+ import lzo from 'lzo';
6
+ import { ZstdCodec } from 'zstd-codec';
7
+ const modules = {
8
+ brotli: {
9
+ decompress: brotliDecompress,
10
+ compress: () => {
11
+ throw new Error('brotli compress');
12
+ }
27
13
  },
28
- LZ4: {
29
- deflate: deflate_lz4,
30
- inflate: inflate_lz4
31
- }
14
+ lz4js,
15
+ lzo,
16
+ 'zstd-codec': ZstdCodec
32
17
  };
33
- export function deflate(method, value) {
34
- if (!(method in PARQUET_COMPRESSION_METHODS)) {
35
- throw new Error(`invalid compression method: ${method}`);
36
- }
37
-
38
- return PARQUET_COMPRESSION_METHODS[method].deflate(value);
39
- }
40
-
41
- function deflate_identity(value) {
42
- return value;
43
- }
44
-
45
- function deflate_gzip(value) {
46
- return zlib.gzipSync(value);
47
- }
48
-
49
- function deflate_snappy(value) {
50
- return snappyjs.compress(value);
18
+ export const PARQUET_COMPRESSION_METHODS = {
19
+ UNCOMPRESSED: new NoCompression(),
20
+ GZIP: new GZipCompression(),
21
+ SNAPPY: new SnappyCompression(),
22
+ BROTLI: new BrotliCompression({
23
+ modules
24
+ }),
25
+ LZ4: new LZ4Compression({
26
+ modules
27
+ }),
28
+ LZ4_RAW: new LZ4Compression({
29
+ modules
30
+ }),
31
+ LZO: new LZOCompression({
32
+ modules
33
+ }),
34
+ ZSTD: new ZstdCompression({
35
+ modules
36
+ })
37
+ };
38
+ export async function preloadCompressions(options) {
39
+ const compressions = Object.values(PARQUET_COMPRESSION_METHODS);
40
+ return await Promise.all(compressions.map(compression => compression.preload()));
51
41
  }
42
+ export async function deflate(method, value) {
43
+ const compression = PARQUET_COMPRESSION_METHODS[method];
52
44
 
53
- function deflate_lzo(value) {
54
- lzo = lzo || Util.load('lzo');
55
- return lzo.compress(value);
56
- }
45
+ if (!compression) {
46
+ throw new Error(`parquet: invalid compression method: ${method}`);
47
+ }
57
48
 
58
- function deflate_brotli(value) {
59
- brotli = brotli || Util.load('brotli');
60
- const result = brotli.compress(value, {
61
- mode: 0,
62
- quality: 8,
63
- lgwin: 22
64
- });
65
- return result ? Buffer.from(result) : Buffer.alloc(0);
49
+ const inputArrayBuffer = toArrayBuffer(value);
50
+ const compressedArrayBuffer = await compression.compress(inputArrayBuffer);
51
+ return toBuffer(compressedArrayBuffer);
66
52
  }
53
+ export async function decompress(method, value, size) {
54
+ const compression = PARQUET_COMPRESSION_METHODS[method];
67
55
 
68
- function deflate_lz4(value) {
69
- lz4js = lz4js || Util.load('lz4js');
70
-
71
- try {
72
- return Buffer.from(lz4js.compress(value));
73
- } catch (err) {
74
- throw err;
56
+ if (!compression) {
57
+ throw new Error(`parquet: invalid compression method: ${method}`);
75
58
  }
76
- }
77
59
 
60
+ const inputArrayBuffer = toArrayBuffer(value);
61
+ const compressedArrayBuffer = await compression.decompress(inputArrayBuffer, size);
62
+ return toBuffer(compressedArrayBuffer);
63
+ }
78
64
  export function inflate(method, value, size) {
79
65
  if (!(method in PARQUET_COMPRESSION_METHODS)) {
80
66
  throw new Error(`invalid compression method: ${method}`);
@@ -82,41 +68,4 @@ export function inflate(method, value, size) {
82
68
 
83
69
  return PARQUET_COMPRESSION_METHODS[method].inflate(value, size);
84
70
  }
85
-
86
- function inflate_identity(value) {
87
- return value;
88
- }
89
-
90
- function inflate_gzip(value) {
91
- return zlib.gunzipSync(value);
92
- }
93
-
94
- function inflate_snappy(value) {
95
- return snappyjs.uncompress(value);
96
- }
97
-
98
- function inflate_lzo(value, size) {
99
- lzo = lzo || Util.load('lzo');
100
- return lzo.decompress(value, size);
101
- }
102
-
103
- function inflate_lz4(value, size) {
104
- lz4js = lz4js || Util.load('lz4js');
105
-
106
- try {
107
- return Buffer.from(lz4js.decompress(value, size));
108
- } catch (err) {
109
- throw err;
110
- }
111
- }
112
-
113
- function inflate_brotli(value) {
114
- brotli = brotli || Util.load('brotli');
115
-
116
- if (!value.length) {
117
- return Buffer.alloc(0);
118
- }
119
-
120
- return Buffer.from(brotli.decompress(value));
121
- }
122
71
  //# sourceMappingURL=compression.js.map