@loaders.gl/parquet 3.4.6 → 4.0.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/dist/dist.min.js +27 -34
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/index.js +6 -6
  4. package/dist/es5/index.js.map +1 -1
  5. package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -1
  6. package/dist/es5/lib/arrow/convert-schema-from-parquet.js +58 -42
  7. package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -1
  8. package/dist/es5/lib/arrow/convert-schema-to-parquet.js +33 -31
  9. package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -1
  10. package/dist/es5/lib/geo/decode-geo-metadata.js +12 -8
  11. package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -1
  12. package/dist/es5/lib/parsers/parse-parquet-to-columns.js +11 -7
  13. package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -1
  14. package/dist/es5/lib/parsers/parse-parquet-to-rows.js +51 -29
  15. package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -1
  16. package/dist/es5/lib/wasm/parse-parquet-wasm.js +6 -6
  17. package/dist/es5/lib/wasm/parse-parquet-wasm.js.map +1 -1
  18. package/dist/es5/parquet-loader.js +16 -4
  19. package/dist/es5/parquet-loader.js.map +1 -1
  20. package/dist/es5/parquet-wasm-loader.js +1 -1
  21. package/dist/es5/parquet-wasm-loader.js.map +1 -1
  22. package/dist/es5/parquet-wasm-writer.js +1 -1
  23. package/dist/es5/parquet-wasm-writer.js.map +1 -1
  24. package/dist/es5/parquet-writer.js +1 -1
  25. package/dist/es5/parquet-writer.js.map +1 -1
  26. package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -1
  27. package/dist/es5/parquetjs/parser/decoders.js.map +1 -1
  28. package/dist/es5/parquetjs/parser/parquet-reader.js +1 -1
  29. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  30. package/dist/es5/parquetjs/schema/declare.js +4 -4
  31. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  32. package/dist/es5/parquetjs/schema/schema.js +7 -7
  33. package/dist/es5/parquetjs/schema/schema.js.map +1 -1
  34. package/dist/es5/parquetjs/schema/shred.js +117 -22
  35. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  36. package/dist/esm/index.js +5 -5
  37. package/dist/esm/index.js.map +1 -1
  38. package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -1
  39. package/dist/esm/lib/arrow/convert-schema-from-parquet.js +57 -41
  40. package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -1
  41. package/dist/esm/lib/arrow/convert-schema-to-parquet.js +33 -31
  42. package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -1
  43. package/dist/esm/lib/geo/decode-geo-metadata.js +12 -8
  44. package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -1
  45. package/dist/esm/lib/parsers/parse-parquet-to-columns.js +12 -8
  46. package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -1
  47. package/dist/esm/lib/parsers/parse-parquet-to-rows.js +14 -3
  48. package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -1
  49. package/dist/esm/lib/wasm/parse-parquet-wasm.js +3 -3
  50. package/dist/esm/lib/wasm/parse-parquet-wasm.js.map +1 -1
  51. package/dist/esm/parquet-loader.js +14 -2
  52. package/dist/esm/parquet-loader.js.map +1 -1
  53. package/dist/esm/parquet-wasm-loader.js +1 -1
  54. package/dist/esm/parquet-wasm-loader.js.map +1 -1
  55. package/dist/esm/parquet-wasm-writer.js +1 -1
  56. package/dist/esm/parquet-wasm-writer.js.map +1 -1
  57. package/dist/esm/parquet-writer.js +1 -1
  58. package/dist/esm/parquet-writer.js.map +1 -1
  59. package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -1
  60. package/dist/esm/parquetjs/parser/decoders.js.map +1 -1
  61. package/dist/esm/parquetjs/parser/parquet-reader.js +2 -2
  62. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
  63. package/dist/esm/parquetjs/schema/declare.js +1 -1
  64. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  65. package/dist/esm/parquetjs/schema/schema.js +6 -6
  66. package/dist/esm/parquetjs/schema/schema.js.map +1 -1
  67. package/dist/esm/parquetjs/schema/shred.js +108 -21
  68. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  69. package/dist/index.d.ts +8 -49
  70. package/dist/index.d.ts.map +1 -1
  71. package/dist/index.js +8 -6
  72. package/dist/lib/arrow/convert-row-group-to-columns.d.ts +2 -2
  73. package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -1
  74. package/dist/lib/arrow/convert-schema-from-parquet.d.ts +4 -4
  75. package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -1
  76. package/dist/lib/arrow/convert-schema-from-parquet.js +48 -44
  77. package/dist/lib/arrow/convert-schema-to-parquet.d.ts +1 -1
  78. package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -1
  79. package/dist/lib/arrow/convert-schema-to-parquet.js +30 -31
  80. package/dist/lib/geo/decode-geo-metadata.js +12 -8
  81. package/dist/lib/parsers/parse-parquet-to-columns.d.ts +2 -2
  82. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -1
  83. package/dist/lib/parsers/parse-parquet-to-columns.js +13 -7
  84. package/dist/lib/parsers/parse-parquet-to-rows.d.ts +3 -2
  85. package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -1
  86. package/dist/lib/parsers/parse-parquet-to-rows.js +16 -19
  87. package/dist/lib/wasm/parse-parquet-wasm.d.ts +3 -3
  88. package/dist/lib/wasm/parse-parquet-wasm.d.ts.map +1 -1
  89. package/dist/lib/wasm/parse-parquet-wasm.js +3 -3
  90. package/dist/parquet-loader.d.ts +3 -14
  91. package/dist/parquet-loader.d.ts.map +1 -1
  92. package/dist/parquet-loader.js +14 -2
  93. package/dist/parquet-worker.js +31 -38
  94. package/dist/parquet-worker.js.map +3 -3
  95. package/dist/parquet-writer.d.ts +2 -1
  96. package/dist/parquet-writer.d.ts.map +1 -1
  97. package/dist/parquet-writer.js +1 -0
  98. package/dist/parquetjs/encoder/parquet-encoder.d.ts +4 -4
  99. package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -1
  100. package/dist/parquetjs/parser/decoders.d.ts +2 -2
  101. package/dist/parquetjs/parser/decoders.d.ts.map +1 -1
  102. package/dist/parquetjs/parser/parquet-reader.d.ts +6 -6
  103. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  104. package/dist/parquetjs/parser/parquet-reader.js +1 -1
  105. package/dist/parquetjs/schema/declare.d.ts +6 -5
  106. package/dist/parquetjs/schema/declare.d.ts.map +1 -1
  107. package/dist/parquetjs/schema/declare.js +3 -3
  108. package/dist/parquetjs/schema/schema.d.ts +4 -4
  109. package/dist/parquetjs/schema/schema.d.ts.map +1 -1
  110. package/dist/parquetjs/schema/schema.js +5 -5
  111. package/dist/parquetjs/schema/shred.d.ts +17 -111
  112. package/dist/parquetjs/schema/shred.d.ts.map +1 -1
  113. package/dist/parquetjs/schema/shred.js +127 -119
  114. package/package.json +8 -8
  115. package/src/index.ts +32 -9
  116. package/src/lib/arrow/convert-row-group-to-columns.ts +2 -2
  117. package/src/lib/arrow/convert-schema-from-parquet.ts +56 -66
  118. package/src/lib/arrow/convert-schema-to-parquet.ts +32 -44
  119. package/src/lib/geo/decode-geo-metadata.ts +17 -8
  120. package/src/lib/parsers/parse-parquet-to-columns.ts +22 -11
  121. package/src/lib/parsers/parse-parquet-to-rows.ts +28 -23
  122. package/src/lib/wasm/parse-parquet-wasm.ts +7 -7
  123. package/src/parquet-loader.ts +25 -2
  124. package/src/parquet-writer.ts +4 -1
  125. package/src/parquetjs/encoder/parquet-encoder.ts +11 -10
  126. package/src/parquetjs/parser/decoders.ts +3 -3
  127. package/src/parquetjs/parser/parquet-reader.ts +7 -7
  128. package/src/parquetjs/schema/declare.ts +6 -5
  129. package/src/parquetjs/schema/schema.ts +8 -8
  130. package/src/parquetjs/schema/shred.ts +142 -103
@@ -2,43 +2,42 @@
2
2
  // loaders.gl, MIT license
3
3
  Object.defineProperty(exports, "__esModule", { value: true });
4
4
  exports.convertToParquetSchema = exports.PARQUET_TYPE_MAPPING = void 0;
5
- const schema_1 = require("@loaders.gl/schema");
6
5
  exports.PARQUET_TYPE_MAPPING = {
7
- BOOLEAN: schema_1.Bool,
8
- INT32: schema_1.Int32,
9
- INT64: schema_1.Float64,
10
- INT96: schema_1.Float64,
11
- FLOAT: schema_1.Float32,
12
- DOUBLE: schema_1.Float64,
13
- BYTE_ARRAY: schema_1.Binary,
14
- FIXED_LEN_BYTE_ARRAY: schema_1.Binary,
15
- UTF8: schema_1.Utf8,
16
- DATE: schema_1.Int32,
17
- TIME_MILLIS: schema_1.Int64,
18
- TIME_MICROS: schema_1.Int64,
19
- TIMESTAMP_MILLIS: schema_1.Int64,
20
- TIMESTAMP_MICROS: schema_1.Int64,
21
- UINT_8: schema_1.Int32,
22
- UINT_16: schema_1.Uint16,
23
- UINT_32: schema_1.Uint32,
24
- UINT_64: schema_1.Uint64,
25
- INT_8: schema_1.Int8,
26
- INT_16: schema_1.Int16,
27
- INT_32: schema_1.Int32,
28
- INT_64: schema_1.Int64,
29
- JSON: schema_1.Binary,
30
- BSON: schema_1.Binary,
6
+ BOOLEAN: 'bool',
7
+ INT32: 'int32',
8
+ INT64: 'float64',
9
+ INT96: 'float64',
10
+ FLOAT: 'float32',
11
+ DOUBLE: 'float64',
12
+ BYTE_ARRAY: 'binary',
13
+ FIXED_LEN_BYTE_ARRAY: 'binary',
14
+ UTF8: 'utf8',
15
+ DATE: 'int32',
16
+ TIME_MILLIS: 'int64',
17
+ TIME_MICROS: 'int64',
18
+ TIMESTAMP_MILLIS: 'int64',
19
+ TIMESTAMP_MICROS: 'int64',
20
+ UINT_8: 'int32',
21
+ UINT_16: 'uint16',
22
+ UINT_32: 'uint32',
23
+ UINT_64: 'uint64',
24
+ INT_8: 'int8',
25
+ INT_16: 'int16',
26
+ INT_32: 'int32',
27
+ INT_64: 'int64',
28
+ JSON: 'binary',
29
+ BSON: 'binary',
31
30
  // TODO check interval type
32
- INTERVAL: schema_1.Binary,
33
- DECIMAL_INT32: schema_1.Float32,
34
- DECIMAL_INT64: schema_1.Float64,
35
- DECIMAL_BYTE_ARRAY: schema_1.Float64,
36
- DECIMAL_FIXED_LEN_BYTE_ARRAY: schema_1.Float64
31
+ INTERVAL: 'binary',
32
+ DECIMAL_INT32: 'float32',
33
+ DECIMAL_INT64: 'float64',
34
+ DECIMAL_BYTE_ARRAY: 'float64',
35
+ DECIMAL_FIXED_LEN_BYTE_ARRAY: 'float64'
37
36
  };
38
37
  function convertToParquetSchema(schema) {
39
38
  const fields = []; // getFields(schema.fields);
40
39
  // TODO add metadata if needed.
41
- return new schema_1.Schema(fields);
40
+ return { fields, metadata: {} };
42
41
  }
43
42
  exports.convertToParquetSchema = convertToParquetSchema;
44
43
  // function getFields(schema: Field[]): Definition[] {
@@ -5,7 +5,7 @@ exports.unpackGeoMetadata = exports.setGeoMetadata = exports.getGeoMetadata = vo
5
5
  * Reads the GeoMetadata object from the metadata
6
6
  * @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata */
7
7
  function getGeoMetadata(schema) {
8
- const stringifiedGeoMetadata = schema.metadata.get('geo');
8
+ const stringifiedGeoMetadata = schema.metadata.geo;
9
9
  if (!stringifiedGeoMetadata) {
10
10
  return null;
11
11
  }
@@ -24,7 +24,7 @@ exports.getGeoMetadata = getGeoMetadata;
24
24
  */
25
25
  function setGeoMetadata(schema, geoMetadata) {
26
26
  const stringifiedGeoMetadata = JSON.stringify(geoMetadata);
27
- schema.metadata.set('geo', stringifiedGeoMetadata);
27
+ schema.metadata.geo = stringifiedGeoMetadata;
28
28
  }
29
29
  exports.setGeoMetadata = setGeoMetadata;
30
30
  /**
@@ -39,18 +39,18 @@ function unpackGeoMetadata(schema) {
39
39
  // Store Parquet Schema Level Metadata
40
40
  const { version, primary_column, columns } = geoMetadata;
41
41
  if (version) {
42
- schema.metadata.set('geo.version', version);
42
+ schema.metadata['geo.version'] = version;
43
43
  }
44
44
  if (primary_column) {
45
- schema.metadata.set('geo.primary_column', primary_column);
45
+ schema.metadata['geo.primary_column'] = primary_column;
46
46
  }
47
47
  // store column names as comma separated list
48
- schema.metadata.set('geo.columns', Object.keys(columns || {}).join(''));
48
+ schema.metadata['geo.columns'] = Object.keys(columns || {}).join('');
49
49
  for (const [columnName, columnMetadata] of Object.entries(columns || {})) {
50
50
  const field = schema.fields.find((field) => field.name === columnName);
51
51
  if (field) {
52
52
  if (field.name === primary_column) {
53
- field.metadata.set('geo.primary_field', 'true');
53
+ setFieldMetadata(field, 'geo.primary_field', 'true');
54
54
  }
55
55
  unpackGeoFieldMetadata(field, columnMetadata);
56
56
  }
@@ -61,13 +61,17 @@ function unpackGeoFieldMetadata(field, columnMetadata) {
61
61
  for (const [key, value] of Object.entries(columnMetadata || {})) {
62
62
  switch (key) {
63
63
  case 'geometry_type':
64
- field.metadata.set(`geo.${key}`, value.join(','));
64
+ setFieldMetadata(field, `geo.${key}`, value.join(','));
65
65
  break;
66
66
  case 'bbox':
67
67
  case 'crs':
68
68
  case 'edges':
69
69
  default:
70
- field.metadata.set(`geo.${key}`, typeof value === 'string' ? value : JSON.stringify(value));
70
+ setFieldMetadata(field, `geo.${key}`, typeof value === 'string' ? value : JSON.stringify(value));
71
71
  }
72
72
  }
73
73
  }
74
+ function setFieldMetadata(field, key, value) {
75
+ field.metadata = field.metadata || {};
76
+ field.metadata[key] = value;
77
+ }
@@ -1,5 +1,5 @@
1
- import { ColumnarTableBatch } from '@loaders.gl/schema';
1
+ import { ColumnarTable, ColumnarTableBatch } from '@loaders.gl/schema';
2
2
  import type { ParquetLoaderOptions } from '../../parquet-loader';
3
- export declare function parseParquetInColumns(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions): Promise<ColumnarTableBatch | null>;
3
+ export declare function parseParquetInColumns(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions): Promise<ColumnarTable>;
4
4
  export declare function parseParquetFileInColumnarBatches(blob: Blob, options?: ParquetLoaderOptions): AsyncIterable<ColumnarTableBatch>;
5
5
  //# sourceMappingURL=parse-parquet-to-columns.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"parse-parquet-to-columns.d.ts","sourceRoot":"","sources":["../../../src/lib/parsers/parse-parquet-to-columns.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,kBAAkB,EAAS,MAAM,oBAAoB,CAAC;AAE9D,OAAO,KAAK,EAAC,oBAAoB,EAAC,MAAM,sBAAsB,CAAC;AAO/D,wBAAsB,qBAAqB,CACzC,WAAW,EAAE,WAAW,EACxB,OAAO,CAAC,EAAE,oBAAoB,sCAO/B;AAED,wBAAuB,iCAAiC,CACtD,IAAI,EAAE,IAAI,EACV,OAAO,CAAC,EAAE,oBAAoB,GAC7B,aAAa,CAAC,kBAAkB,CAAC,CAWnC"}
1
+ {"version":3,"file":"parse-parquet-to-columns.d.ts","sourceRoot":"","sources":["../../../src/lib/parsers/parse-parquet-to-columns.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,aAAa,EAAE,kBAAkB,EAAS,MAAM,oBAAoB,CAAC;AAE7E,OAAO,KAAK,EAAC,oBAAoB,EAAC,MAAM,sBAAsB,CAAC;AAS/D,wBAAsB,qBAAqB,CACzC,WAAW,EAAE,WAAW,EACxB,OAAO,CAAC,EAAE,oBAAoB,GAC7B,OAAO,CAAC,aAAa,CAAC,CAUxB;AAED,wBAAuB,iCAAiC,CACtD,IAAI,EAAE,IAAI,EACV,OAAO,CAAC,EAAE,oBAAoB,GAC7B,aAAa,CAAC,kBAAkB,CAAC,CAWnC"}
@@ -5,14 +5,19 @@ exports.parseParquetFileInColumnarBatches = exports.parseParquetInColumns = void
5
5
  const loader_utils_1 = require("@loaders.gl/loader-utils");
6
6
  const parquet_reader_1 = require("../../parquetjs/parser/parquet-reader");
7
7
  const convert_schema_from_parquet_1 = require("../arrow/convert-schema-from-parquet");
8
- const convert_row_group_to_columns_1 = require("../arrow/convert-row-group-to-columns");
8
+ const shred_1 = require("../../parquetjs/schema/shred");
9
+ // import {convertParquetRowGroupToColumns} from '../arrow/convert-row-group-to-columns';
9
10
  const decode_geo_metadata_1 = require("../geo/decode-geo-metadata");
10
11
  async function parseParquetInColumns(arrayBuffer, options) {
11
12
  const blob = new Blob([arrayBuffer]);
12
13
  for await (const batch of parseParquetFileInColumnarBatches(blob, options)) {
13
- return batch;
14
+ return {
15
+ shape: 'columnar-table',
16
+ schema: batch.schema,
17
+ data: batch.data
18
+ };
14
19
  }
15
- return null;
20
+ throw new Error('empty table');
16
21
  }
17
22
  exports.parseParquetInColumns = parseParquetInColumns;
18
23
  async function* parseParquetFileInColumnarBatches(blob, options) {
@@ -20,16 +25,17 @@ async function* parseParquetFileInColumnarBatches(blob, options) {
20
25
  const reader = new parquet_reader_1.ParquetReader(file);
21
26
  const parquetSchema = await reader.getSchema();
22
27
  const parquetMetadata = await reader.getFileMetadata();
23
- const schema = (0, convert_schema_from_parquet_1.convertSchemaFromParquet)(parquetSchema, parquetMetadata);
28
+ const schema = (0, convert_schema_from_parquet_1.convertParquetSchema)(parquetSchema, parquetMetadata);
24
29
  (0, decode_geo_metadata_1.unpackGeoMetadata)(schema);
25
30
  const rowGroups = reader.rowGroupIterator(options?.parquet);
26
31
  for await (const rowGroup of rowGroups) {
27
- yield convertRowGroupToTableBatch(schema, rowGroup);
32
+ yield convertRowGroupToTableBatch(parquetSchema, rowGroup, schema);
28
33
  }
29
34
  }
30
35
  exports.parseParquetFileInColumnarBatches = parseParquetFileInColumnarBatches;
31
- function convertRowGroupToTableBatch(schema, rowGroup) {
32
- const data = (0, convert_row_group_to_columns_1.convertParquetRowGroupToColumns)(schema, rowGroup);
36
+ function convertRowGroupToTableBatch(parquetSchema, rowGroup, schema) {
37
+ // const data = convertParquetRowGroupToColumns(schema, rowGroup);
38
+ const data = (0, shred_1.materializeColumns)(parquetSchema, rowGroup);
33
39
  return {
34
40
  shape: 'columnar-table',
35
41
  batchType: 'data',
@@ -1,4 +1,5 @@
1
+ import { ObjectRowTable, ObjectRowTableBatch } from '@loaders.gl/schema';
1
2
  import type { ParquetLoaderOptions } from '../../parquet-loader';
2
- export declare function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions): Promise<import("../../parquetjs/schema/declare").ParquetRecord[] | null>;
3
- export declare function parseParquetFileInBatches(blob: Blob, options?: ParquetLoaderOptions): AsyncGenerator<import("../../parquetjs/schema/declare").ParquetRecord[], void, unknown>;
3
+ export declare function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions): Promise<ObjectRowTable>;
4
+ export declare function parseParquetFileInBatches(blob: Blob, options?: ParquetLoaderOptions): AsyncIterable<ObjectRowTableBatch>;
4
5
  //# sourceMappingURL=parse-parquet-to-rows.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"parse-parquet-to-rows.d.ts","sourceRoot":"","sources":["../../../src/lib/parsers/parse-parquet-to-rows.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAC,oBAAoB,EAAC,MAAM,sBAAsB,CAAC;AAG/D,wBAAsB,YAAY,CAAC,WAAW,EAAE,WAAW,EAAE,OAAO,CAAC,EAAE,oBAAoB,4EAM1F;AAED,wBAAuB,yBAAyB,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,EAAE,oBAAoB,2FAO1F"}
1
+ {"version":3,"file":"parse-parquet-to-rows.d.ts","sourceRoot":"","sources":["../../../src/lib/parsers/parse-parquet-to-rows.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,cAAc,EAAE,mBAAmB,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,KAAK,EAAC,oBAAoB,EAAC,MAAM,sBAAsB,CAAC;AAI/D,wBAAsB,YAAY,CAChC,WAAW,EAAE,WAAW,EACxB,OAAO,CAAC,EAAE,oBAAoB,GAC7B,OAAO,CAAC,cAAc,CAAC,CAgBzB;AAED,wBAAuB,yBAAyB,CAC9C,IAAI,EAAE,IAAI,EACV,OAAO,CAAC,EAAE,oBAAoB,GAC7B,aAAa,CAAC,mBAAmB,CAAC,CAYpC"}
@@ -7,10 +7,18 @@ const loader_utils_1 = require("@loaders.gl/loader-utils");
7
7
  const parquet_reader_1 = require("../../parquetjs/parser/parquet-reader");
8
8
  async function parseParquet(arrayBuffer, options) {
9
9
  const blob = new Blob([arrayBuffer]);
10
+ const rows = [];
10
11
  for await (const batch of parseParquetFileInBatches(blob, options)) {
11
- return batch;
12
+ // we have only one input batch so return
13
+ for (const row of batch.data) {
14
+ rows.push(row);
15
+ }
12
16
  }
13
- return null;
17
+ return {
18
+ shape: 'object-row-table',
19
+ // TODO - spread can fail for very large number of batches
20
+ data: rows
21
+ };
14
22
  }
15
23
  exports.parseParquet = parseParquet;
16
24
  async function* parseParquetFileInBatches(blob, options) {
@@ -18,23 +26,12 @@ async function* parseParquetFileInBatches(blob, options) {
18
26
  const reader = new parquet_reader_1.ParquetReader(file);
19
27
  const rowBatches = reader.rowBatchIterator(options?.parquet);
20
28
  for await (const rows of rowBatches) {
21
- yield rows;
29
+ yield {
30
+ shape: 'object-row-table',
31
+ data: rows,
32
+ batchType: 'data',
33
+ length: rows.length
34
+ };
22
35
  }
23
36
  }
24
37
  exports.parseParquetFileInBatches = parseParquetFileInBatches;
25
- // export async function* parseParquetFileInColumnarBatches(blob: Blob, options?: {columnList?: string[][]}): AsyncIterable<ColumnarTableBatch> {
26
- // const rowGroupReader = new ParquetRowGroupReader({data: blob, columnList: options?.columnList});
27
- // try {
28
- // for await (const rowGroup of rowGroupReader) {
29
- // yield convertRowGroupToTableBatch(rowGroup);
30
- // }
31
- // } finally {
32
- // await rowGroupReader.close();
33
- // }
34
- // }
35
- // function convertRowGroupToTableBatch(rowGroup): ColumnarTableBatch {
36
- // // @ts-expect-error
37
- // return {
38
- // data: rowGroup
39
- // };
40
- // }
@@ -1,10 +1,10 @@
1
1
  import type { LoaderOptions } from '@loaders.gl/loader-utils';
2
- import { Table } from 'apache-arrow';
3
- export type ParquetLoaderOptions = LoaderOptions & {
2
+ import { Table as ArrowTable } from 'apache-arrow';
3
+ export type ParquetWasmLoaderOptions = LoaderOptions & {
4
4
  parquet?: {
5
5
  type?: 'arrow-table';
6
6
  wasmUrl?: string;
7
7
  };
8
8
  };
9
- export declare function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions): Promise<Table>;
9
+ export declare function parseParquetWasm(arrayBuffer: ArrayBuffer, options?: ParquetWasmLoaderOptions): Promise<ArrowTable>;
10
10
  //# sourceMappingURL=parse-parquet-wasm.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"parse-parquet-wasm.d.ts","sourceRoot":"","sources":["../../../src/lib/wasm/parse-parquet-wasm.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,0BAA0B,CAAC;AAC5D,OAAO,EAAC,KAAK,EAA0B,MAAM,cAAc,CAAC;AAG5D,MAAM,MAAM,oBAAoB,GAAG,aAAa,GAAG;IACjD,OAAO,CAAC,EAAE;QACR,IAAI,CAAC,EAAE,aAAa,CAAC;QACrB,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;CACH,CAAC;AAEF,wBAAsB,YAAY,CAChC,WAAW,EAAE,WAAW,EACxB,OAAO,CAAC,EAAE,oBAAoB,GAC7B,OAAO,CAAC,KAAK,CAAC,CAYhB"}
1
+ {"version":3,"file":"parse-parquet-wasm.d.ts","sourceRoot":"","sources":["../../../src/lib/wasm/parse-parquet-wasm.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,0BAA0B,CAAC;AAC5D,OAAO,EAAC,KAAK,IAAI,UAAU,EAA0B,MAAM,cAAc,CAAC;AAG1E,MAAM,MAAM,wBAAwB,GAAG,aAAa,GAAG;IACrD,OAAO,CAAC,EAAE;QACR,IAAI,CAAC,EAAE,aAAa,CAAC;QACrB,OAAO,CAAC,EAAE,MAAM,CAAC;KAClB,CAAC;CACH,CAAC;AAEF,wBAAsB,gBAAgB,CACpC,WAAW,EAAE,WAAW,EACxB,OAAO,CAAC,EAAE,wBAAwB,GACjC,OAAO,CAAC,UAAU,CAAC,CAYrB"}
@@ -1,9 +1,9 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.parseParquet = void 0;
3
+ exports.parseParquetWasm = void 0;
4
4
  const apache_arrow_1 = require("apache-arrow");
5
5
  const load_wasm_node_1 = require("./load-wasm/load-wasm-node");
6
- async function parseParquet(arrayBuffer, options) {
6
+ async function parseParquetWasm(arrayBuffer, options) {
7
7
  const wasmUrl = options?.parquet?.wasmUrl;
8
8
  const wasm = await (0, load_wasm_node_1.loadWasm)(wasmUrl);
9
9
  const arr = new Uint8Array(arrayBuffer);
@@ -12,7 +12,7 @@ async function parseParquet(arrayBuffer, options) {
12
12
  const arrowTable = tableFromIPC(arrowIPCBuffer);
13
13
  return arrowTable;
14
14
  }
15
- exports.parseParquet = parseParquet;
15
+ exports.parseParquetWasm = parseParquetWasm;
16
16
  /**
17
17
  * Deserialize the IPC format into a {@link Table}. This function is a
18
18
  * convenience wrapper for {@link RecordBatchReader}. Opposite of {@link tableToIPC}.
@@ -1,4 +1,5 @@
1
1
  import type { Loader, LoaderOptions } from '@loaders.gl/loader-utils';
2
+ import type { ObjectRowTable, ObjectRowTableBatch, ColumnarTable, ColumnarTableBatch } from '@loaders.gl/schema';
2
3
  export type ParquetLoaderOptions = LoaderOptions & {
3
4
  parquet?: {
4
5
  type?: 'object-row-table';
@@ -8,18 +9,6 @@ export type ParquetLoaderOptions = LoaderOptions & {
8
9
  };
9
10
  };
10
11
  /** ParquetJS table loader */
11
- export declare const ParquetLoader: {
12
- name: string;
13
- id: string;
14
- module: string;
15
- version: any;
16
- worker: boolean;
17
- category: string;
18
- extensions: string[];
19
- mimeTypes: string[];
20
- binary: boolean;
21
- tests: string[];
22
- options: ParquetLoaderOptions;
23
- };
24
- export declare const _typecheckParquetLoader: Loader;
12
+ export declare const ParquetLoader: Loader<ObjectRowTable, ObjectRowTableBatch, ParquetLoaderOptions>;
13
+ export declare const ParqueColumnnartLoader: Loader<ColumnarTable, ColumnarTableBatch, ParquetLoaderOptions>;
25
14
  //# sourceMappingURL=parquet-loader.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"parquet-loader.d.ts","sourceRoot":"","sources":["../src/parquet-loader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,MAAM,EAAE,aAAa,EAAC,MAAM,0BAA0B,CAAC;AAMpE,MAAM,MAAM,oBAAoB,GAAG,aAAa,GAAG;IACjD,OAAO,CAAC,EAAE;QACR,IAAI,CAAC,EAAE,kBAAkB,CAAC;QAC1B,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,UAAU,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,EAAE,CAAC;QACnC,UAAU,CAAC,EAAE,OAAO,CAAC;KACtB,CAAC;CACH,CAAC;AAWF,6BAA6B;AAC7B,eAAO,MAAM,aAAa;;;;;;;;;;;;CAYzB,CAAC;AAEF,eAAO,MAAM,uBAAuB,EAAE,MAAsB,CAAC"}
1
+ {"version":3,"file":"parquet-loader.d.ts","sourceRoot":"","sources":["../src/parquet-loader.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAC,MAAM,EAAE,aAAa,EAAC,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EACV,cAAc,EACd,mBAAmB,EACnB,aAAa,EACb,kBAAkB,EACnB,MAAM,oBAAoB,CAAC;AAM5B,MAAM,MAAM,oBAAoB,GAAG,aAAa,GAAG;IACjD,OAAO,CAAC,EAAE;QACR,IAAI,CAAC,EAAE,kBAAkB,CAAC;QAC1B,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,UAAU,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,EAAE,CAAC;QACnC,UAAU,CAAC,EAAE,OAAO,CAAC;KACtB,CAAC;CACH,CAAC;AAWF,6BAA6B;AAC7B,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,cAAc,EAAE,mBAAmB,EAAE,oBAAoB,CAY3F,CAAC;AAEF,eAAO,MAAM,sBAAsB,EAAE,MAAM,CACzC,aAAa,EACb,kBAAkB,EAClB,oBAAoB,CAarB,CAAC"}
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports._typecheckParquetLoader = exports.ParquetLoader = void 0;
3
+ exports.ParqueColumnnartLoader = exports.ParquetLoader = void 0;
4
4
  // __VERSION__ is injected by babel-plugin-version-inline
5
5
  // @ts-ignore TS2304: Cannot find name '__VERSION__'.
6
6
  const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
@@ -26,4 +26,16 @@ exports.ParquetLoader = {
26
26
  tests: ['PAR1', 'PARE'],
27
27
  options: DEFAULT_PARQUET_LOADER_OPTIONS
28
28
  };
29
- exports._typecheckParquetLoader = exports.ParquetLoader;
29
+ exports.ParqueColumnnartLoader = {
30
+ name: 'Apache Parquet',
31
+ id: 'parquet',
32
+ module: 'parquet',
33
+ version: VERSION,
34
+ worker: true,
35
+ category: 'table',
36
+ extensions: ['parquet'],
37
+ mimeTypes: ['application/octet-stream'],
38
+ binary: true,
39
+ tests: ['PAR1', 'PARE'],
40
+ options: DEFAULT_PARQUET_LOADER_OPTIONS
41
+ };