@loaders.gl/parquet 3.3.0 → 3.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/dist/dist.min.js +26 -17
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/index.js +3 -3
  4. package/dist/es5/index.js.map +1 -1
  5. package/dist/es5/lib/parse-parquet.js +25 -49
  6. package/dist/es5/lib/parse-parquet.js.map +1 -1
  7. package/dist/es5/parquet-loader.js +2 -3
  8. package/dist/es5/parquet-loader.js.map +1 -1
  9. package/dist/es5/parquet-wasm-loader.js +1 -1
  10. package/dist/es5/parquet-wasm-loader.js.map +1 -1
  11. package/dist/es5/parquet-wasm-writer.js +1 -1
  12. package/dist/es5/parquet-wasm-writer.js.map +1 -1
  13. package/dist/es5/parquet-writer.js +1 -1
  14. package/dist/es5/parquet-writer.js.map +1 -1
  15. package/dist/es5/parquetjs/compression.js +5 -15
  16. package/dist/es5/parquetjs/compression.js.map +1 -1
  17. package/dist/es5/parquetjs/encoder/{parquet-encoder.js → writer.js} +158 -70
  18. package/dist/es5/parquetjs/encoder/writer.js.map +1 -0
  19. package/dist/es5/parquetjs/file.js +94 -0
  20. package/dist/es5/parquetjs/file.js.map +1 -0
  21. package/dist/es5/parquetjs/parser/parquet-cursor.js +183 -0
  22. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +1 -0
  23. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +327 -0
  24. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  25. package/dist/es5/parquetjs/parser/parquet-reader.js +222 -553
  26. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  27. package/dist/es5/parquetjs/schema/declare.js +1 -3
  28. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  29. package/dist/es5/parquetjs/schema/shred.js +33 -39
  30. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  31. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  32. package/dist/es5/parquetjs/utils/buffer-utils.js +19 -0
  33. package/dist/es5/parquetjs/utils/buffer-utils.js.map +1 -0
  34. package/dist/es5/parquetjs/utils/file-utils.js +3 -2
  35. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  36. package/dist/esm/index.js +1 -1
  37. package/dist/esm/index.js.map +1 -1
  38. package/dist/esm/lib/parse-parquet.js +12 -6
  39. package/dist/esm/lib/parse-parquet.js.map +1 -1
  40. package/dist/esm/parquet-loader.js +2 -3
  41. package/dist/esm/parquet-loader.js.map +1 -1
  42. package/dist/esm/parquet-wasm-loader.js +1 -1
  43. package/dist/esm/parquet-wasm-loader.js.map +1 -1
  44. package/dist/esm/parquet-wasm-writer.js +1 -1
  45. package/dist/esm/parquet-wasm-writer.js.map +1 -1
  46. package/dist/esm/parquet-writer.js +1 -1
  47. package/dist/esm/parquet-writer.js.map +1 -1
  48. package/dist/esm/parquetjs/compression.js +1 -10
  49. package/dist/esm/parquetjs/compression.js.map +1 -1
  50. package/dist/esm/parquetjs/encoder/{parquet-encoder.js → writer.js} +37 -7
  51. package/dist/esm/parquetjs/encoder/writer.js.map +1 -0
  52. package/dist/esm/parquetjs/file.js +81 -0
  53. package/dist/esm/parquetjs/file.js.map +1 -0
  54. package/dist/esm/parquetjs/parser/parquet-cursor.js +78 -0
  55. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +1 -0
  56. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +129 -0
  57. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +1 -0
  58. package/dist/esm/parquetjs/parser/parquet-reader.js +72 -158
  59. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
  60. package/dist/esm/parquetjs/schema/declare.js +0 -1
  61. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  62. package/dist/esm/parquetjs/schema/shred.js +34 -42
  63. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  64. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  65. package/dist/esm/parquetjs/utils/buffer-utils.js +13 -0
  66. package/dist/esm/parquetjs/utils/buffer-utils.js.map +1 -0
  67. package/dist/esm/parquetjs/utils/file-utils.js +1 -1
  68. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
  69. package/dist/index.d.ts +1 -1
  70. package/dist/index.d.ts.map +1 -1
  71. package/dist/index.js +4 -3
  72. package/dist/lib/parse-parquet.d.ts +2 -2
  73. package/dist/lib/parse-parquet.d.ts.map +1 -1
  74. package/dist/lib/parse-parquet.js +12 -24
  75. package/dist/parquet-loader.d.ts +0 -1
  76. package/dist/parquet-loader.d.ts.map +1 -1
  77. package/dist/parquet-loader.js +1 -2
  78. package/dist/parquet-worker.js +24 -15
  79. package/dist/parquet-worker.js.map +3 -3
  80. package/dist/parquetjs/compression.d.ts.map +1 -1
  81. package/dist/parquetjs/compression.js +5 -16
  82. package/dist/parquetjs/encoder/{parquet-encoder.d.ts → writer.d.ts} +19 -10
  83. package/dist/parquetjs/encoder/writer.d.ts.map +1 -0
  84. package/dist/parquetjs/encoder/{parquet-encoder.js → writer.js} +37 -39
  85. package/dist/parquetjs/file.d.ts +10 -0
  86. package/dist/parquetjs/file.d.ts.map +1 -0
  87. package/dist/parquetjs/file.js +99 -0
  88. package/dist/parquetjs/parser/parquet-cursor.d.ts +36 -0
  89. package/dist/parquetjs/parser/parquet-cursor.d.ts.map +1 -0
  90. package/dist/parquetjs/parser/parquet-cursor.js +74 -0
  91. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +40 -0
  92. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +1 -0
  93. package/dist/parquetjs/parser/parquet-envelope-reader.js +136 -0
  94. package/dist/parquetjs/parser/parquet-reader.d.ts +57 -47
  95. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  96. package/dist/parquetjs/parser/parquet-reader.js +102 -168
  97. package/dist/parquetjs/schema/declare.d.ts +7 -14
  98. package/dist/parquetjs/schema/declare.d.ts.map +1 -1
  99. package/dist/parquetjs/schema/declare.js +0 -2
  100. package/dist/parquetjs/schema/shred.d.ts +0 -115
  101. package/dist/parquetjs/schema/shred.d.ts.map +1 -1
  102. package/dist/parquetjs/schema/shred.js +43 -161
  103. package/dist/parquetjs/schema/types.d.ts +2 -2
  104. package/dist/parquetjs/schema/types.d.ts.map +1 -1
  105. package/dist/parquetjs/utils/buffer-utils.d.ts +10 -0
  106. package/dist/parquetjs/utils/buffer-utils.d.ts.map +1 -0
  107. package/dist/parquetjs/utils/buffer-utils.js +22 -0
  108. package/dist/parquetjs/utils/file-utils.d.ts +4 -3
  109. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
  110. package/dist/parquetjs/utils/file-utils.js +5 -2
  111. package/package.json +5 -7
  112. package/src/index.ts +2 -2
  113. package/src/lib/parse-parquet.ts +12 -25
  114. package/src/parquet-loader.ts +1 -3
  115. package/src/parquetjs/compression.ts +1 -14
  116. package/src/parquetjs/encoder/{parquet-encoder.ts → writer.ts} +28 -22
  117. package/src/parquetjs/file.ts +90 -0
  118. package/src/parquetjs/parser/parquet-cursor.ts +94 -0
  119. package/src/parquetjs/parser/parquet-envelope-reader.ts +199 -0
  120. package/src/parquetjs/parser/parquet-reader.ts +122 -239
  121. package/src/parquetjs/schema/declare.ts +9 -17
  122. package/src/parquetjs/schema/shred.ts +28 -157
  123. package/src/parquetjs/schema/types.ts +27 -21
  124. package/src/parquetjs/utils/buffer-utils.ts +18 -0
  125. package/src/parquetjs/utils/file-utils.ts +4 -3
  126. package/dist/es5/lib/convert-schema-deep.ts.disabled +0 -910
  127. package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +0 -1
  128. package/dist/esm/lib/convert-schema-deep.ts.disabled +0 -910
  129. package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +0 -1
  130. package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +0 -1
  131. package/src/lib/convert-schema-deep.ts.disabled +0 -910
@@ -1 +1 @@
1
- {"version":3,"file":"compression.d.ts","sourceRoot":"","sources":["../../src/parquetjs/compression.ts"],"names":[],"mappings":";AAIA,OAAO,EACL,WAAW,EAQZ,MAAM,yBAAyB,CAAC;AAEjC,OAAO,EAAC,kBAAkB,EAAC,MAAM,kBAAkB,CAAC;AAuCpD,eAAO,MAAM,2BAA2B,EAAE,MAAM,CAAC,kBAAkB,EAAE,WAAW,CAU/E,CAAC;AAEF;;;GAGG;AACH,wBAAsB,mBAAmB,CAAC,OAAO,CAAC,EAAE;IAAC,OAAO,EAAE;QAAC,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAA;KAAC,CAAA;CAAC,mBAGlF;AAED;;GAEG;AACH,wBAAsB,OAAO,CAAC,MAAM,EAAE,kBAAkB,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAQxF;AAED;;GAEG;AACH,wBAAsB,UAAU,CAC9B,MAAM,EAAE,kBAAkB,EAC1B,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,MAAM,CAAC,CAQjB;AAKD,wBAAgB,OAAO,CAAC,MAAM,EAAE,kBAAkB,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,CAMvF"}
1
+ {"version":3,"file":"compression.d.ts","sourceRoot":"","sources":["../../src/parquetjs/compression.ts"],"names":[],"mappings":";AAIA,OAAO,EACL,WAAW,EAQZ,MAAM,yBAAyB,CAAC;AAEjC,OAAO,EAAC,kBAAkB,EAAC,MAAM,kBAAkB,CAAC;AA0BpD,eAAO,MAAM,2BAA2B,EAAE,MAAM,CAAC,kBAAkB,EAAE,WAAW,CAU/E,CAAC;AAEF;;;GAGG;AACH,wBAAsB,mBAAmB,CAAC,OAAO,CAAC,EAAE;IAAC,OAAO,EAAE;QAAC,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAA;KAAC,CAAA;CAAC,mBAGlF;AAED;;GAEG;AACH,wBAAsB,OAAO,CAAC,MAAM,EAAE,kBAAkB,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAQxF;AAED;;GAEG;AACH,wBAAsB,UAAU,CAC9B,MAAM,EAAE,kBAAkB,EAC1B,KAAK,EAAE,MAAM,EACb,IAAI,EAAE,MAAM,GACX,OAAO,CAAC,MAAM,CAAC,CAQjB;AAKD,wBAAgB,OAAO,CAAC,MAAM,EAAE,kBAAkB,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,CAMvF"}
@@ -8,18 +8,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
8
8
  Object.defineProperty(exports, "__esModule", { value: true });
9
9
  exports.inflate = exports.decompress = exports.deflate = exports.preloadCompressions = exports.PARQUET_COMPRESSION_METHODS = void 0;
10
10
  const compression_1 = require("@loaders.gl/compression");
11
- /** We can't use loaders-util buffer handling since we are dependent on buffers even in the browser */
12
- function toBuffer(arrayBuffer) {
13
- return Buffer.from(arrayBuffer);
14
- }
15
- function toArrayBuffer(buffer) {
16
- // TODO - per docs we should just be able to call buffer.buffer, but there are issues
17
- if (Buffer.isBuffer(buffer)) {
18
- const typedArray = new Uint8Array(buffer.buffer, buffer.byteOffset, buffer.length);
19
- return typedArray.slice().buffer;
20
- }
21
- return buffer;
22
- }
11
+ const buffer_utils_1 = require("./utils/buffer-utils");
23
12
  // TODO switch to worker compression to avoid bundling...
24
13
  // import brotli from 'brotli'; - brotli has problems with decompress in browsers
25
14
  // import brotliDecompress from 'brotli/decompress';
@@ -68,9 +57,9 @@ async function deflate(method, value) {
68
57
  if (!compression) {
69
58
  throw new Error(`parquet: invalid compression method: ${method}`);
70
59
  }
71
- const inputArrayBuffer = toArrayBuffer(value);
60
+ const inputArrayBuffer = (0, buffer_utils_1.toArrayBuffer)(value);
72
61
  const compressedArrayBuffer = await compression.compress(inputArrayBuffer);
73
- return toBuffer(compressedArrayBuffer);
62
+ return (0, buffer_utils_1.toBuffer)(compressedArrayBuffer);
74
63
  }
75
64
  exports.deflate = deflate;
76
65
  /**
@@ -81,9 +70,9 @@ async function decompress(method, value, size) {
81
70
  if (!compression) {
82
71
  throw new Error(`parquet: invalid compression method: ${method}`);
83
72
  }
84
- const inputArrayBuffer = toArrayBuffer(value);
73
+ const inputArrayBuffer = (0, buffer_utils_1.toArrayBuffer)(value);
85
74
  const compressedArrayBuffer = await compression.decompress(inputArrayBuffer, size);
86
- return toBuffer(compressedArrayBuffer);
75
+ return (0, buffer_utils_1.toBuffer)(compressedArrayBuffer);
87
76
  }
88
77
  exports.decompress = decompress;
89
78
  /*
@@ -1,10 +1,10 @@
1
1
  /// <reference types="node" />
2
2
  /// <reference types="node" />
3
- import { stream } from '@loaders.gl/loader-utils';
3
+ import { Transform, Writable } from 'stream';
4
4
  import { ParquetBuffer } from '../schema/declare';
5
5
  import { ParquetSchema } from '../schema/schema';
6
6
  import { RowGroup } from '../parquet-thrift';
7
- export interface ParquetEncoderOptions {
7
+ export interface ParquetWriterOptions {
8
8
  baseOffset?: number;
9
9
  rowGroupSize?: number;
10
10
  pageSize?: number;
@@ -17,21 +17,21 @@ export interface ParquetEncoderOptions {
17
17
  start?: number;
18
18
  }
19
19
  /**
20
- * Write a parquet file to an output stream. The ParquetEncoder will perform
20
+ * Write a parquet file to an output stream. The ParquetWriter will perform
21
21
  * buffering/batching for performance, so close() must be called after all rows
22
22
  * are written.
23
23
  */
24
- export declare class ParquetEncoder<T> {
24
+ export declare class ParquetWriter<T> {
25
25
  /**
26
26
  * Convenience method to create a new buffered parquet writer that writes to
27
27
  * the specified file
28
28
  */
29
- static openFile<T>(schema: ParquetSchema, path: string, opts?: ParquetEncoderOptions): Promise<ParquetEncoder<T>>;
29
+ static openFile<T>(schema: ParquetSchema, path: string, opts?: ParquetWriterOptions): Promise<ParquetWriter<T>>;
30
30
  /**
31
31
  * Convenience method to create a new buffered parquet writer that writes to
32
32
  * the specified stream
33
33
  */
34
- static openStream<T>(schema: ParquetSchema, outputStream: stream.Writable, opts?: ParquetEncoderOptions): Promise<ParquetEncoder<T>>;
34
+ static openStream<T>(schema: ParquetSchema, outputStream: Writable, opts?: ParquetWriterOptions): Promise<ParquetWriter<T>>;
35
35
  schema: ParquetSchema;
36
36
  envelopeWriter: ParquetEnvelopeWriter;
37
37
  rowBuffer: ParquetBuffer;
@@ -41,7 +41,7 @@ export declare class ParquetEncoder<T> {
41
41
  /**
42
42
  * Create a new buffered parquet writer for a given envelope writer
43
43
  */
44
- constructor(schema: ParquetSchema, envelopeWriter: ParquetEnvelopeWriter, opts: ParquetEncoderOptions);
44
+ constructor(schema: ParquetSchema, envelopeWriter: ParquetEnvelopeWriter, opts: ParquetWriterOptions);
45
45
  writeHeader(): Promise<void>;
46
46
  /**
47
47
  * Append a single row to the parquet file. Rows are buffered in memory until
@@ -82,7 +82,7 @@ export declare class ParquetEnvelopeWriter {
82
82
  /**
83
83
  * Create a new parquet envelope writer that writes to the specified stream
84
84
  */
85
- static openStream(schema: ParquetSchema, outputStream: stream.Writable, opts: ParquetEncoderOptions): Promise<ParquetEnvelopeWriter>;
85
+ static openStream(schema: ParquetSchema, outputStream: Writable, opts: ParquetWriterOptions): Promise<ParquetEnvelopeWriter>;
86
86
  schema: ParquetSchema;
87
87
  write: (buf: Buffer) => Promise<void>;
88
88
  close: () => Promise<void>;
@@ -91,7 +91,7 @@ export declare class ParquetEnvelopeWriter {
91
91
  rowGroups: RowGroup[];
92
92
  pageSize: number;
93
93
  useDataPageV2: boolean;
94
- constructor(schema: ParquetSchema, writeFn: (buf: Buffer) => Promise<void>, closeFn: () => Promise<void>, fileOffset: number, opts: ParquetEncoderOptions);
94
+ constructor(schema: ParquetSchema, writeFn: (buf: Buffer) => Promise<void>, closeFn: () => Promise<void>, fileOffset: number, opts: ParquetWriterOptions);
95
95
  writeSection(buf: Buffer): Promise<void>;
96
96
  /**
97
97
  * Encode the parquet file header
@@ -112,4 +112,13 @@ export declare class ParquetEnvelopeWriter {
112
112
  */
113
113
  setPageSize(cnt: number): void;
114
114
  }
115
- //# sourceMappingURL=parquet-encoder.d.ts.map
115
+ /**
116
+ * Create a parquet transform stream
117
+ */
118
+ export declare class ParquetTransformer<T> extends Transform {
119
+ writer: ParquetWriter<T>;
120
+ constructor(schema: ParquetSchema, opts?: ParquetWriterOptions);
121
+ _transform(row: any, encoding: string, callback: (val?: any) => void): Promise<void>;
122
+ _flush(callback: (val?: any) => void): Promise<void>;
123
+ }
124
+ //# sourceMappingURL=writer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/encoder/writer.ts"],"names":[],"mappings":";;AAEA,OAAO,EAAC,SAAS,EAAE,QAAQ,EAAC,MAAM,QAAQ,CAAC;AAG3C,OAAO,EACL,aAAa,EAKd,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAC,aAAa,EAAC,MAAM,kBAAkB,CAAC;AAE/C,OAAO,EAaL,QAAQ,EAGT,MAAM,mBAAmB,CAAC;AA2B3B,MAAM,WAAW,oBAAoB;IACnC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,aAAa,CAAC,EAAE,OAAO,CAAC;IAGxB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;GAIG;AAEH,qBAAa,aAAa,CAAC,CAAC;IAC1B;;;OAGG;WACU,QAAQ,CAAC,CAAC,EACrB,MAAM,EAAE,aAAa,EACrB,IAAI,EAAE,MAAM,EACZ,IAAI,CAAC,EAAE,oBAAoB,GAC1B,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;IAK5B;;;OAGG;WACU,UAAU,CAAC,CAAC,EACvB,MAAM,EAAE,aAAa,EACrB,YAAY,EAAE,QAAQ,EACtB,IAAI,CAAC,EAAE,oBAAoB,GAC1B,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;IAWrB,MAAM,EAAE,aAAa,CAAC;IACtB,cAAc,EAAE,qBAAqB,CAAC;IACtC,SAAS,EAAE,aAAa,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAE5C;;OAEG;gBAED,MAAM,EAAE,aAAa,EACrB,cAAc,EAAE,qBAAqB,EACrC,IAAI,EAAE,oBAAoB;IActB,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAUlC;;;OAGG;IACG,SAAS,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAWzC;;;;;OAKG;IACG,KAAK,CAAC,QAAQ,CAAC,EAAE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAqBjD;;OAEG;IACH,WAAW,CAAC,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI;IAK7C;;;;;OAKG;IACH,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;IAIlC;;;OAGG;IACH,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;CAG/B;AAED;;;;;GAKG;AACH,qBAAa,qBAAqB;IAChC;;OAEG;WACU,UAAU,CACrB,MAAM,EAAE,aAAa,EACrB,YAAY,EAAE,QAAQ,EACtB,IAAI,EAAE,oBAAoB,GACzB,OAAO,CAAC,qBAAqB,CAAC;IAM1B,MAAM,EAAE,aAAa,CAAC;IACtB,KAAK,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IACtC,KAAK,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,aAAa,EAAE,OAAO,CAAC;gBAG5B,MAAM,EAAE,aAAa,EACrB,OAAO,EAAE,CAAC,GAAG,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,EACvC,OAAO,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,EAC5B,UAAU,EAAE,MAAM,EAClB,IAAI,EAAE,oBAAoB;IAY5B,YAAY,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAKxC;;OAEG;IACH,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAI5B;;;OAGG;IACG,aAAa,CAAC,OAAO,EAAE,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;IAY1D;;OAEG;IACH,WAAW,CAAC,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAWhE;;;OAGG;IACH,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI;CAG/B;AAED;;GAEG;AACH,qBAAa,kBAAkB,CAAC,CAAC,CAAE,SAAQ,SAAS;IAC3C,MAAM,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC;gBAEpB,MAAM,EAAE,aAAa,EAAE,IAAI,GAAE,oBAAyB;IAiBlE,UAAU,CAAC,GAAG,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,KAAK,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC;IAS9E,MAAM,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,KAAK,IAAI;CAG3C"}
@@ -26,7 +26,10 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
26
26
  return (mod && mod.__esModule) ? mod : { "default": mod };
27
27
  };
28
28
  Object.defineProperty(exports, "__esModule", { value: true });
29
- exports.ParquetEnvelopeWriter = exports.ParquetEncoder = void 0;
29
+ exports.ParquetTransformer = exports.ParquetEnvelopeWriter = exports.ParquetWriter = void 0;
30
+ // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
31
+ /* eslint-disable camelcase */
32
+ const stream_1 = require("stream");
30
33
  const codecs_1 = require("../codecs");
31
34
  const Compression = __importStar(require("../compression"));
32
35
  const Shred = __importStar(require("../schema/shred"));
@@ -53,27 +56,31 @@ const PARQUET_DEFAULT_ROW_GROUP_SIZE = 4096;
53
56
  const PARQUET_RDLVL_TYPE = 'INT32';
54
57
  const PARQUET_RDLVL_ENCODING = 'RLE';
55
58
  /**
56
- * Write a parquet file to an output stream. The ParquetEncoder will perform
59
+ * Write a parquet file to an output stream. The ParquetWriter will perform
57
60
  * buffering/batching for performance, so close() must be called after all rows
58
61
  * are written.
59
62
  */
60
63
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
61
- class ParquetEncoder {
64
+ class ParquetWriter {
62
65
  /**
63
66
  * Convenience method to create a new buffered parquet writer that writes to
64
67
  * the specified file
65
68
  */
66
69
  static async openFile(schema, path, opts) {
67
70
  const outputStream = await (0, file_utils_1.osopen)(path, opts);
68
- return ParquetEncoder.openStream(schema, outputStream, opts);
71
+ return ParquetWriter.openStream(schema, outputStream, opts);
69
72
  }
70
73
  /**
71
74
  * Convenience method to create a new buffered parquet writer that writes to
72
75
  * the specified stream
73
76
  */
74
- static async openStream(schema, outputStream, opts = {}) {
77
+ static async openStream(schema, outputStream, opts) {
78
+ if (!opts) {
79
+ // tslint:disable-next-line:no-parameter-reassignment
80
+ opts = {};
81
+ }
75
82
  const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts);
76
- return new ParquetEncoder(schema, envelopeWriter, opts);
83
+ return new ParquetWriter(schema, envelopeWriter, opts);
77
84
  }
78
85
  /**
79
86
  * Create a new buffered parquet writer for a given envelope writer
@@ -159,7 +166,7 @@ class ParquetEncoder {
159
166
  this.envelopeWriter.setPageSize(cnt);
160
167
  }
161
168
  }
162
- exports.ParquetEncoder = ParquetEncoder;
169
+ exports.ParquetWriter = ParquetWriter;
163
170
  /**
164
171
  * Create a parquet file from a schema and a number of row groups. This class
165
172
  * performs direct, unbuffered writes to the underlying output stream and is
@@ -230,40 +237,31 @@ class ParquetEnvelopeWriter {
230
237
  exports.ParquetEnvelopeWriter = ParquetEnvelopeWriter;
231
238
  /**
232
239
  * Create a parquet transform stream
233
- export class ParquetTransformer<T> extends stream.Transform {
234
- public writer: ParquetEncoder<T>;
235
-
236
- constructor(schema: ParquetSchema, opts: ParquetEncoderOptions = {}) {
237
- super({objectMode: true});
238
-
239
- const writeProxy = (function (t: ParquetTransformer<any>) {
240
- return async function (b: any): Promise<void> {
241
- t.push(b);
242
- };
243
- })(this);
244
-
245
- this.writer = new ParquetEncoder(
246
- schema,
247
- new ParquetEnvelopeWriter(schema, writeProxy, async () => {}, 0, opts),
248
- opts
249
- );
250
- }
251
-
252
- // tslint:disable-next-line:function-name
253
- _transform(row: any, encoding: string, callback: (val?: any) => void): Promise<void> {
254
- if (row) {
255
- return this.writer.appendRow(row).then(callback);
240
+ */
241
+ class ParquetTransformer extends stream_1.Transform {
242
+ constructor(schema, opts = {}) {
243
+ super({ objectMode: true });
244
+ const writeProxy = (function (t) {
245
+ return async function (b) {
246
+ t.push(b);
247
+ };
248
+ })(this);
249
+ this.writer = new ParquetWriter(schema, new ParquetEnvelopeWriter(schema, writeProxy, async () => { }, 0, opts), opts);
250
+ }
251
+ // tslint:disable-next-line:function-name
252
+ _transform(row, encoding, callback) {
253
+ if (row) {
254
+ return this.writer.appendRow(row).then(callback);
255
+ }
256
+ callback();
257
+ return Promise.resolve();
258
+ }
259
+ // tslint:disable-next-line:function-name
260
+ async _flush(callback) {
261
+ await this.writer.close(callback);
256
262
  }
257
- callback();
258
- return Promise.resolve();
259
- }
260
-
261
- // tslint:disable-next-line:function-name
262
- async _flush(callback: (val?: any) => void) {
263
- await this.writer.close(callback);
264
- }
265
263
  }
266
- */
264
+ exports.ParquetTransformer = ParquetTransformer;
267
265
  /**
268
266
  * Encode a consecutive array of data using one of the parquet encodings
269
267
  */
@@ -0,0 +1,10 @@
1
+ /// <reference types="node" />
2
+ import fs from 'fs';
3
+ export declare function fopen(filePath: any): Promise<unknown>;
4
+ export declare function fstat(filePath: any): Promise<fs.Stats>;
5
+ export declare function fread(fd: any, position: any, length: any): Promise<unknown>;
6
+ export declare function fclose(fd: any): Promise<unknown>;
7
+ export declare function oswrite(os: any, buf: any): Promise<void>;
8
+ export declare function osclose(os: any): Promise<void>;
9
+ export declare function osopen(path: any, opts: any): Promise<unknown>;
10
+ //# sourceMappingURL=file.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"file.d.ts","sourceRoot":"","sources":["../../src/parquetjs/file.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,MAAM,IAAI,CAAC;AAEpB,wBAAgB,KAAK,CAAC,QAAQ,KAAA,oBAU7B;AAED,wBAAgB,KAAK,CAAC,QAAQ,KAAA,qBAU7B;AAED,wBAAgB,KAAK,CAAC,EAAE,KAAA,EAAE,QAAQ,KAAA,EAAE,MAAM,KAAA,oBAYzC;AAED,wBAAgB,MAAM,CAAC,EAAE,KAAA,oBAUxB;AAED,wBAAgB,OAAO,CAAC,EAAE,KAAA,EAAE,GAAG,KAAA,GAAG,OAAO,CAAC,IAAI,CAAC,CAU9C;AAED,wBAAgB,OAAO,CAAC,EAAE,KAAA,GAAG,OAAO,CAAC,IAAI,CAAC,CAUzC;AAED,wBAAgB,MAAM,CAAC,IAAI,KAAA,EAAE,IAAI,KAAA,oBAYhC"}
@@ -0,0 +1,99 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.osopen = exports.osclose = exports.oswrite = exports.fclose = exports.fread = exports.fstat = exports.fopen = void 0;
7
+ // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
8
+ const fs_1 = __importDefault(require("fs"));
9
+ function fopen(filePath) {
10
+ return new Promise((resolve, reject) => {
11
+ fs_1.default.open(filePath, 'r', (err, fd) => {
12
+ if (err) {
13
+ reject(err);
14
+ }
15
+ else {
16
+ resolve(fd);
17
+ }
18
+ });
19
+ });
20
+ }
21
+ exports.fopen = fopen;
22
+ function fstat(filePath) {
23
+ return new Promise((resolve, reject) => {
24
+ fs_1.default.stat(filePath, (err, stat) => {
25
+ if (err) {
26
+ reject(err);
27
+ }
28
+ else {
29
+ resolve(stat);
30
+ }
31
+ });
32
+ });
33
+ }
34
+ exports.fstat = fstat;
35
+ function fread(fd, position, length) {
36
+ const buffer = Buffer.alloc(length);
37
+ return new Promise((resolve, reject) => {
38
+ fs_1.default.read(fd, buffer, 0, length, position, (err, bytesRead, buf) => {
39
+ if (err || bytesRead !== length) {
40
+ reject(err || Error('read failed'));
41
+ }
42
+ else {
43
+ resolve(buf);
44
+ }
45
+ });
46
+ });
47
+ }
48
+ exports.fread = fread;
49
+ function fclose(fd) {
50
+ return new Promise((resolve, reject) => {
51
+ fs_1.default.close(fd, (err) => {
52
+ if (err) {
53
+ reject(err);
54
+ }
55
+ else {
56
+ resolve(err);
57
+ }
58
+ });
59
+ });
60
+ }
61
+ exports.fclose = fclose;
62
+ function oswrite(os, buf) {
63
+ return new Promise((resolve, reject) => {
64
+ os.write(buf, (err) => {
65
+ if (err) {
66
+ reject(err);
67
+ }
68
+ else {
69
+ resolve();
70
+ }
71
+ });
72
+ });
73
+ }
74
+ exports.oswrite = oswrite;
75
+ function osclose(os) {
76
+ return new Promise((resolve, reject) => {
77
+ os.close((err) => {
78
+ if (err) {
79
+ reject(err);
80
+ }
81
+ else {
82
+ resolve();
83
+ }
84
+ });
85
+ });
86
+ }
87
+ exports.osclose = osclose;
88
+ function osopen(path, opts) {
89
+ return new Promise((resolve, reject) => {
90
+ const outputStream = fs_1.default.createWriteStream(path, opts);
91
+ outputStream.on('open', function (fd) {
92
+ resolve(outputStream);
93
+ });
94
+ outputStream.on('error', function (err) {
95
+ reject(err);
96
+ });
97
+ });
98
+ }
99
+ exports.osopen = osopen;
@@ -0,0 +1,36 @@
1
+ import { FileMetaData } from '../parquet-thrift';
2
+ import { ParquetEnvelopeReader } from './parquet-envelope-reader';
3
+ import { ParquetSchema } from '../schema/schema';
4
+ import { ParquetRecord } from '../schema/declare';
5
+ /**
6
+ * A parquet cursor is used to retrieve rows from a parquet file in order
7
+ */
8
+ export declare class ParquetCursor<T> implements AsyncIterable<T> {
9
+ metadata: FileMetaData;
10
+ envelopeReader: ParquetEnvelopeReader;
11
+ schema: ParquetSchema;
12
+ columnList: string[][];
13
+ rowGroup: ParquetRecord[];
14
+ rowGroupIndex: number;
15
+ /**
16
+ * Create a new parquet reader from the file metadata and an envelope reader.
17
+ * It is usually not recommended to call this constructor directly except for
18
+ * advanced and internal use cases. Consider using getCursor() on the
19
+ * ParquetReader instead
20
+ */
21
+ constructor(metadata: FileMetaData, envelopeReader: ParquetEnvelopeReader, schema: ParquetSchema, columnList: string[][]);
22
+ /**
23
+ * Retrieve the next row from the cursor. Returns a row or NULL if the end
24
+ * of the file was reached
25
+ */
26
+ next<T = any>(): Promise<T>;
27
+ /**
28
+ * Rewind the cursor the the beginning of the file
29
+ */
30
+ rewind(): void;
31
+ /**
32
+ * Implement AsyncIterable
33
+ */
34
+ [Symbol.asyncIterator](): AsyncIterator<T>;
35
+ }
36
+ //# sourceMappingURL=parquet-cursor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parquet-cursor.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parser/parquet-cursor.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAC,qBAAqB,EAAC,MAAM,2BAA2B,CAAC;AAChE,OAAO,EAAC,aAAa,EAAC,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAC,aAAa,EAAC,MAAM,mBAAmB,CAAC;AAGhD;;GAEG;AACH,qBAAa,aAAa,CAAC,CAAC,CAAE,YAAW,aAAa,CAAC,CAAC,CAAC;IAChD,QAAQ,EAAE,YAAY,CAAC;IACvB,cAAc,EAAE,qBAAqB,CAAC;IACtC,MAAM,EAAE,aAAa,CAAC;IACtB,UAAU,EAAE,MAAM,EAAE,EAAE,CAAC;IACvB,QAAQ,EAAE,aAAa,EAAE,CAAM;IAC/B,aAAa,EAAE,MAAM,CAAC;IAE7B;;;;;OAKG;gBAED,QAAQ,EAAE,YAAY,EACtB,cAAc,EAAE,qBAAqB,EACrC,MAAM,EAAE,aAAa,EACrB,UAAU,EAAE,MAAM,EAAE,EAAE;IASxB;;;OAGG;IACG,IAAI,CAAC,CAAC,GAAG,GAAG,KAAK,OAAO,CAAC,CAAC,CAAC;IAiBjC;;OAEG;IACH,MAAM,IAAI,IAAI;IAKd;;OAEG;IAEH,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,aAAa,CAAC,CAAC,CAAC;CAuB3C"}
@@ -0,0 +1,74 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ParquetCursor = void 0;
4
+ const shred_1 = require("../schema/shred");
5
+ /**
6
+ * A parquet cursor is used to retrieve rows from a parquet file in order
7
+ */
8
+ class ParquetCursor {
9
+ /**
10
+ * Create a new parquet reader from the file metadata and an envelope reader.
11
+ * It is usually not recommended to call this constructor directly except for
12
+ * advanced and internal use cases. Consider using getCursor() on the
13
+ * ParquetReader instead
14
+ */
15
+ constructor(metadata, envelopeReader, schema, columnList) {
16
+ this.rowGroup = [];
17
+ this.metadata = metadata;
18
+ this.envelopeReader = envelopeReader;
19
+ this.schema = schema;
20
+ this.columnList = columnList;
21
+ this.rowGroupIndex = 0;
22
+ }
23
+ /**
24
+ * Retrieve the next row from the cursor. Returns a row or NULL if the end
25
+ * of the file was reached
26
+ */
27
+ async next() {
28
+ if (this.rowGroup.length === 0) {
29
+ if (this.rowGroupIndex >= this.metadata.row_groups.length) {
30
+ // @ts-ignore
31
+ return null;
32
+ }
33
+ const rowBuffer = await this.envelopeReader.readRowGroup(this.schema, this.metadata.row_groups[this.rowGroupIndex], this.columnList);
34
+ this.rowGroup = (0, shred_1.materializeRecords)(this.schema, rowBuffer);
35
+ this.rowGroupIndex++;
36
+ }
37
+ return this.rowGroup.shift();
38
+ }
39
+ /**
40
+ * Rewind the cursor the the beginning of the file
41
+ */
42
+ rewind() {
43
+ this.rowGroup = [];
44
+ this.rowGroupIndex = 0;
45
+ }
46
+ /**
47
+ * Implement AsyncIterable
48
+ */
49
+ // tslint:disable-next-line:function-name
50
+ [Symbol.asyncIterator]() {
51
+ let done = false;
52
+ return {
53
+ next: async () => {
54
+ if (done) {
55
+ return { done, value: null };
56
+ }
57
+ const value = await this.next();
58
+ if (value === null) {
59
+ return { done: true, value };
60
+ }
61
+ return { done: false, value };
62
+ },
63
+ return: async () => {
64
+ done = true;
65
+ return { done, value: null };
66
+ },
67
+ throw: async () => {
68
+ done = true;
69
+ return { done: true, value: null };
70
+ }
71
+ };
72
+ }
73
+ }
74
+ exports.ParquetCursor = ParquetCursor;
@@ -0,0 +1,40 @@
1
+ /// <reference types="node" />
2
+ import { ParquetSchema } from '../schema/schema';
3
+ import { ColumnChunk, FileMetaData, RowGroup } from '../parquet-thrift';
4
+ import { ParquetBuffer, ParquetData, ParquetOptions } from '../schema/declare';
5
+ /**
6
+ * The parquet envelope reader allows direct, unbuffered access to the individual
7
+ * sections of the parquet file, namely the header, footer and the row groups.
8
+ * This class is intended for advanced/internal users; if you just want to retrieve
9
+ * rows from a parquet file use the ParquetReader instead
10
+ */
11
+ export declare class ParquetEnvelopeReader {
12
+ read: (position: number, length: number) => Promise<Buffer>;
13
+ /**
14
+ * Close this parquet reader. You MUST call this method once you're finished
15
+ * reading rows
16
+ */
17
+ close: () => Promise<void>;
18
+ fileSize: number;
19
+ defaultDictionarySize: number;
20
+ static openBuffer(buffer: Buffer): Promise<ParquetEnvelopeReader>;
21
+ constructor(read: (position: number, length: number) => Promise<Buffer>, close: () => Promise<void>, fileSize: number, options?: any);
22
+ readHeader(): Promise<void>;
23
+ readRowGroup(schema: ParquetSchema, rowGroup: RowGroup, columnList: string[][]): Promise<ParquetBuffer>;
24
+ /**
25
+ * Do reading of parquet file's column chunk
26
+ * @param schema
27
+ * @param colChunk
28
+ */
29
+ readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<ParquetData>;
30
+ /**
31
+ * Getting dictionary for allows to flatten values by indices.
32
+ * @param dictionaryPageOffset
33
+ * @param options
34
+ * @param pagesOffset
35
+ * @returns
36
+ */
37
+ getDictionary(dictionaryPageOffset: number, options: ParquetOptions, pagesOffset: number): Promise<string[]>;
38
+ readFooter(): Promise<FileMetaData>;
39
+ }
40
+ //# sourceMappingURL=parquet-envelope-reader.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parquet-envelope-reader.d.ts","sourceRoot":"","sources":["../../../src/parquetjs/parser/parquet-envelope-reader.ts"],"names":[],"mappings":";AACA,OAAO,EAAC,aAAa,EAAC,MAAM,kBAAkB,CAAC;AAE/C,OAAO,EAAC,WAAW,EAAoB,YAAY,EAAE,QAAQ,EAAO,MAAM,mBAAmB,CAAC;AAC9F,OAAO,EACL,aAAa,EAEb,WAAW,EAEX,cAAc,EACf,MAAM,mBAAmB,CAAC;AAM3B;;;;;GAKG;AACH,qBAAa,qBAAqB;IACzB,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC;IACnE;;;OAGG;IACI,KAAK,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,qBAAqB,EAAE,MAAM,CAAC;WAExB,UAAU,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,qBAAqB,CAAC;gBAQrE,IAAI,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,EAC3D,KAAK,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,EAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,GAAG;IAQT,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAc3B,YAAY,CAChB,MAAM,EAAE,aAAa,EACrB,QAAQ,EAAE,QAAQ,EAClB,UAAU,EAAE,MAAM,EAAE,EAAE,GACrB,OAAO,CAAC,aAAa,CAAC;IAgBzB;;;;OAIG;IACG,eAAe,CAAC,MAAM,EAAE,aAAa,EAAE,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC;IAoDzF;;;;;;OAMG;IACG,aAAa,CACjB,oBAAoB,EAAE,MAAM,EAC5B,OAAO,EAAE,cAAc,EACvB,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,MAAM,EAAE,CAAC;IAwBd,UAAU,IAAI,OAAO,CAAC,YAAY,CAAC;CAqB1C"}