@loaders.gl/parquet 4.2.0-alpha.5 → 4.2.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/index.cjs +200 -48
  2. package/dist/index.cjs.map +3 -3
  3. package/dist/lib/constants.js +1 -1
  4. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
  5. package/dist/lib/parsers/parse-parquet-to-columns.js +4 -0
  6. package/dist/lib/parsers/parse-parquet.d.ts.map +1 -1
  7. package/dist/lib/parsers/parse-parquet.js +4 -0
  8. package/dist/parquet-loader.d.ts +151 -7
  9. package/dist/parquet-loader.d.ts.map +1 -1
  10. package/dist/parquet-loader.js +9 -1
  11. package/dist/parquet-wasm-loader.d.ts +22 -3
  12. package/dist/parquet-wasm-loader.d.ts.map +1 -1
  13. package/dist/parquet-wasm-loader.js +2 -0
  14. package/dist/parquet-wasm-writer.d.ts +1 -3
  15. package/dist/parquet-wasm-writer.d.ts.map +1 -1
  16. package/dist/parquet-writer.d.ts +15 -3
  17. package/dist/parquet-writer.d.ts.map +1 -1
  18. package/dist/parquet-writer.js +1 -1
  19. package/dist/parquetjs/compression.d.ts +1 -1
  20. package/dist/parquetjs/compression.d.ts.map +1 -1
  21. package/dist/parquetjs/compression.js +3 -1
  22. package/dist/parquetjs/encoder/parquet-encoder.js +14 -0
  23. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +7 -0
  24. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +5 -0
  25. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +13 -0
  26. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +1 -0
  27. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +5 -0
  28. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +8 -1
  29. package/dist/parquetjs/parquet-thrift/DecimalType.js +2 -0
  30. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +3 -0
  31. package/dist/parquetjs/parquet-thrift/FileMetaData.js +7 -0
  32. package/dist/parquetjs/parquet-thrift/IntType.js +2 -0
  33. package/dist/parquetjs/parquet-thrift/KeyValue.js +2 -0
  34. package/dist/parquetjs/parquet-thrift/LogicalType.js +13 -0
  35. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +1 -0
  36. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +3 -0
  37. package/dist/parquetjs/parquet-thrift/PageHeader.js +8 -0
  38. package/dist/parquetjs/parquet-thrift/PageLocation.js +3 -0
  39. package/dist/parquetjs/parquet-thrift/RowGroup.js +4 -0
  40. package/dist/parquetjs/parquet-thrift/SchemaElement.js +10 -0
  41. package/dist/parquetjs/parquet-thrift/SortingColumn.js +3 -0
  42. package/dist/parquetjs/parquet-thrift/Statistics.js +6 -0
  43. package/dist/parquetjs/parquet-thrift/TimeType.js +2 -0
  44. package/dist/parquetjs/parquet-thrift/TimeUnit.js +2 -0
  45. package/dist/parquetjs/parquet-thrift/TimestampType.js +2 -0
  46. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  47. package/dist/parquetjs/parser/parquet-reader.js +8 -5
  48. package/dist/parquetjs/schema/declare.js +4 -0
  49. package/dist/parquetjs/schema/schema.js +3 -0
  50. package/dist/parquetjs/schema/types.js +2 -0
  51. package/dist/parquetjs/utils/read-utils.js +1 -4
  52. package/dist/polyfills/buffer/buffer.js +9 -12
  53. package/dist/polyfills/buffer/install-buffer-polyfill.d.ts +28 -1
  54. package/dist/polyfills/buffer/install-buffer-polyfill.d.ts.map +1 -1
  55. package/package.json +15 -15
  56. package/src/lib/parsers/parse-parquet-to-columns.ts +6 -0
  57. package/src/lib/parsers/parse-parquet.ts +5 -0
  58. package/src/parquet-loader.ts +52 -51
  59. package/src/parquet-wasm-loader.ts +7 -4
  60. package/src/parquet-wasm-writer.ts +2 -2
  61. package/src/parquet-writer.ts +2 -2
  62. package/src/parquetjs/compression.ts +6 -2
  63. package/src/parquetjs/parser/parquet-reader.ts +2 -1
  64. package/src/parquetjs/schema/types.ts +3 -1
  65. package/src/polyfills/buffer/buffer.ts +0 -3
@@ -12,6 +12,7 @@ import {
12
12
  LZ4Compression,
13
13
  ZstdCompression
14
14
  } from '@loaders.gl/compression';
15
+ import {registerJSModules} from '@loaders.gl/loader-utils';
15
16
 
16
17
  import {ParquetCompression} from './schema/declare';
17
18
 
@@ -72,9 +73,12 @@ export const PARQUET_COMPRESSION_METHODS: Record<ParquetCompression, Compression
72
73
  * Register compressions that have big external libraries
73
74
  * @param options.modules External library dependencies
74
75
  */
75
- export async function preloadCompressions(options?: {modules: {[key: string]: any}}) {
76
+ export async function preloadCompressions(options?: {modules?: {[key: string]: any}}) {
77
+ registerJSModules(options?.modules);
76
78
  const compressions = Object.values(PARQUET_COMPRESSION_METHODS);
77
- return await Promise.all(compressions.map((compression) => compression.preload()));
79
+ return await Promise.all(
80
+ compressions.map((compression) => compression.preload(options?.modules))
81
+ );
78
82
  }
79
83
 
80
84
  /**
@@ -36,7 +36,8 @@ export type ParquetIterationProps = {
36
36
  */
37
37
  export class ParquetReader {
38
38
  static defaultProps: Required<ParquetReaderProps> = {
39
- defaultDictionarySize: 1e6,
39
+ // max ArrayBuffer size in js is 2Gb
40
+ defaultDictionarySize: 2147483648,
40
41
  preserveBinary: false
41
42
  };
42
43
 
@@ -340,7 +340,8 @@ function fromPrimitive_JSON(value: any): unknown {
340
340
  }
341
341
 
342
342
  function toPrimitive_BSON(value: any): Buffer {
343
- const arrayBuffer = BSONWriter.encodeSync?.(value) as ArrayBuffer;
343
+ // @ts-ignore
344
+ const arrayBuffer: ArrayBuffer = BSONWriter.encodeSync?.(value);
344
345
  return Buffer.from(arrayBuffer);
345
346
  }
346
347
 
@@ -350,6 +351,7 @@ function fromPrimitive_BSON(value: any) {
350
351
 
351
352
  function toPrimitive_TIME_MILLIS(value: any) {
352
353
  const v = parseInt(value, 10);
354
+ // eslint-disable-next-line @typescript-eslint/no-loss-of-precision
353
355
  if (v < 0 || v > 0xffffffffffffffff || isNaN(v)) {
354
356
  throw new Error(`invalid value for TIME_MILLIS: ${value}`);
355
357
  }
@@ -1901,7 +1901,6 @@ function writeDouble(buf: Buffer, value, offset, littleEndian, noAssert): number
1901
1901
  }
1902
1902
 
1903
1903
  // CUSTOM ERRORS
1904
- // =============
1905
1904
 
1906
1905
  // Simplified versions from Node, changed for Buffer-only usage
1907
1906
  const errors: Record<string, any> = {};
@@ -1993,7 +1992,6 @@ function addNumericalSeparator(val) {
1993
1992
  }
1994
1993
 
1995
1994
  // CHECK FUNCTIONS
1996
- // ===============
1997
1995
 
1998
1996
  function checkBounds(buf, offset, byteLength) {
1999
1997
  validateNumber(offset, 'offset');
@@ -2042,7 +2040,6 @@ function boundsError(value, length, type?) {
2042
2040
  }
2043
2041
 
2044
2042
  // HELPER FUNCTIONS
2045
- // ================
2046
2043
 
2047
2044
  const INVALID_BASE64_RE = /[^+/0-9A-Za-z-_]/g;
2048
2045