@loaders.gl/parquet 4.0.0-alpha.23 → 4.0.0-alpha.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/dist.min.js +17 -17
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/parquet-loader.js +1 -1
  4. package/dist/es5/parquet-wasm-loader.js +1 -1
  5. package/dist/es5/parquet-wasm-writer.js +1 -1
  6. package/dist/es5/parquet-writer.js +1 -1
  7. package/dist/esm/parquet-loader.js +1 -1
  8. package/dist/esm/parquet-wasm-loader.js +1 -1
  9. package/dist/esm/parquet-wasm-writer.js +1 -1
  10. package/dist/esm/parquet-writer.js +1 -1
  11. package/dist/parquet-worker.js +17 -17
  12. package/dist/parquet-worker.js.map +3 -3
  13. package/package.json +11 -9
  14. package/dist/bundle.js +0 -5
  15. package/dist/constants.js +0 -18
  16. package/dist/index.js +0 -56
  17. package/dist/lib/arrow/convert-columns-to-row-group.js +0 -1
  18. package/dist/lib/arrow/convert-row-group-to-columns.js +0 -12
  19. package/dist/lib/arrow/convert-schema-from-parquet.js +0 -86
  20. package/dist/lib/arrow/convert-schema-to-parquet.js +0 -71
  21. package/dist/lib/geo/decode-geo-metadata.js +0 -77
  22. package/dist/lib/geo/geoparquet-schema.js +0 -69
  23. package/dist/lib/parsers/parse-parquet-to-columns.js +0 -46
  24. package/dist/lib/parsers/parse-parquet-to-rows.js +0 -37
  25. package/dist/lib/wasm/encode-parquet-wasm.js +0 -30
  26. package/dist/lib/wasm/load-wasm/index.js +0 -5
  27. package/dist/lib/wasm/load-wasm/load-wasm-browser.js +0 -38
  28. package/dist/lib/wasm/load-wasm/load-wasm-node.js +0 -31
  29. package/dist/lib/wasm/parse-parquet-wasm.js +0 -27
  30. package/dist/parquet-loader.js +0 -41
  31. package/dist/parquet-wasm-loader.js +0 -26
  32. package/dist/parquet-wasm-writer.js +0 -24
  33. package/dist/parquet-writer.js +0 -21
  34. package/dist/parquetjs/codecs/declare.js +0 -2
  35. package/dist/parquetjs/codecs/dictionary.js +0 -14
  36. package/dist/parquetjs/codecs/index.js +0 -55
  37. package/dist/parquetjs/codecs/plain.js +0 -211
  38. package/dist/parquetjs/codecs/rle.js +0 -145
  39. package/dist/parquetjs/compression.js +0 -183
  40. package/dist/parquetjs/encoder/parquet-encoder.js +0 -484
  41. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +0 -15
  42. package/dist/parquetjs/parquet-thrift/BsonType.js +0 -62
  43. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +0 -211
  44. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +0 -217
  45. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +0 -402
  46. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +0 -108
  47. package/dist/parquetjs/parquet-thrift/CompressionCodec.js +0 -20
  48. package/dist/parquetjs/parquet-thrift/ConvertedType.js +0 -34
  49. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +0 -170
  50. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -230
  51. package/dist/parquetjs/parquet-thrift/DateType.js +0 -62
  52. package/dist/parquetjs/parquet-thrift/DecimalType.js +0 -109
  53. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -126
  54. package/dist/parquetjs/parquet-thrift/Encoding.js +0 -20
  55. package/dist/parquetjs/parquet-thrift/EnumType.js +0 -62
  56. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +0 -15
  57. package/dist/parquetjs/parquet-thrift/FileMetaData.js +0 -260
  58. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +0 -62
  59. package/dist/parquetjs/parquet-thrift/IntType.js +0 -109
  60. package/dist/parquetjs/parquet-thrift/JsonType.js +0 -62
  61. package/dist/parquetjs/parquet-thrift/KeyValue.js +0 -106
  62. package/dist/parquetjs/parquet-thrift/ListType.js +0 -62
  63. package/dist/parquetjs/parquet-thrift/LogicalType.js +0 -384
  64. package/dist/parquetjs/parquet-thrift/MapType.js +0 -62
  65. package/dist/parquetjs/parquet-thrift/MicroSeconds.js +0 -62
  66. package/dist/parquetjs/parquet-thrift/MilliSeconds.js +0 -62
  67. package/dist/parquetjs/parquet-thrift/NullType.js +0 -62
  68. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +0 -101
  69. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +0 -131
  70. package/dist/parquetjs/parquet-thrift/PageHeader.js +0 -220
  71. package/dist/parquetjs/parquet-thrift/PageLocation.js +0 -145
  72. package/dist/parquetjs/parquet-thrift/PageType.js +0 -16
  73. package/dist/parquetjs/parquet-thrift/RowGroup.js +0 -186
  74. package/dist/parquetjs/parquet-thrift/SchemaElement.js +0 -243
  75. package/dist/parquetjs/parquet-thrift/SortingColumn.js +0 -131
  76. package/dist/parquetjs/parquet-thrift/Statistics.js +0 -180
  77. package/dist/parquetjs/parquet-thrift/StringType.js +0 -62
  78. package/dist/parquetjs/parquet-thrift/TimeType.js +0 -110
  79. package/dist/parquetjs/parquet-thrift/TimeUnit.js +0 -131
  80. package/dist/parquetjs/parquet-thrift/TimestampType.js +0 -110
  81. package/dist/parquetjs/parquet-thrift/Type.js +0 -20
  82. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -62
  83. package/dist/parquetjs/parquet-thrift/UUIDType.js +0 -62
  84. package/dist/parquetjs/parquet-thrift/index.js +0 -65
  85. package/dist/parquetjs/parser/decoders.js +0 -318
  86. package/dist/parquetjs/parser/parquet-reader.js +0 -200
  87. package/dist/parquetjs/schema/declare.js +0 -12
  88. package/dist/parquetjs/schema/schema.js +0 -162
  89. package/dist/parquetjs/schema/shred.js +0 -355
  90. package/dist/parquetjs/schema/types.js +0 -416
  91. package/dist/parquetjs/utils/file-utils.js +0 -43
  92. package/dist/parquetjs/utils/read-utils.js +0 -109
  93. package/dist/workers/parquet-worker.js +0 -5
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loaders.gl/parquet",
3
- "version": "4.0.0-alpha.23",
3
+ "version": "4.0.0-alpha.24",
4
4
  "description": "Framework-independent loader for Apache Parquet files",
5
5
  "license": "MIT",
6
6
  "publishConfig": {
@@ -38,13 +38,15 @@
38
38
  "tls": false,
39
39
  "stream": false,
40
40
  "fs": false,
41
- "./src/lib/wasm/load-wasm/load-wasm-node.ts": "./src/lib/wasm/load-wasm/load-wasm-browser.ts"
41
+ "./src/lib/wasm/load-wasm/load-wasm-node.ts": "./src/lib/wasm/load-wasm/load-wasm-browser.ts",
42
+ "./dist/es5/lib/wasm/load-wasm/load-wasm-node.js": "./dist/es5/lib/wasm/load-wasm/load-wasm-browser.js",
43
+ "./dist/esm/lib/wasm/load-wasm/load-wasm-node.js": "./dist/esm/lib/wasm/load-wasm/load-wasm-browser.js"
42
44
  },
43
45
  "dependencies": {
44
- "@loaders.gl/bson": "4.0.0-alpha.23",
45
- "@loaders.gl/compression": "4.0.0-alpha.23",
46
- "@loaders.gl/loader-utils": "4.0.0-alpha.23",
47
- "@loaders.gl/schema": "4.0.0-alpha.23",
46
+ "@loaders.gl/bson": "4.0.0-alpha.24",
47
+ "@loaders.gl/compression": "4.0.0-alpha.24",
48
+ "@loaders.gl/loader-utils": "4.0.0-alpha.24",
49
+ "@loaders.gl/schema": "4.0.0-alpha.24",
48
50
  "async-mutex": "^0.2.2",
49
51
  "brotli": "^1.3.2",
50
52
  "int53": "^0.2.4",
@@ -58,14 +60,14 @@
58
60
  "zstd-codec": "^0.1"
59
61
  },
60
62
  "peerDependencies": {
61
- "apache-arrow": "^9.0.0"
63
+ "apache-arrow": "^13.0.0"
62
64
  },
63
65
  "devDependencies": {
64
66
  "@types/node": "^10.14.15",
65
67
  "@types/node-int64": "^0.4.29",
66
68
  "@types/thrift": "^0.10.8",
67
69
  "@types/varint": "^5.0.0",
68
- "apache-arrow": "^9.0.0"
70
+ "apache-arrow": "^13.0.0"
69
71
  },
70
- "gitHead": "e212f2a0c0e342f7cb65ce84fa2ff39f64b7d94b"
72
+ "gitHead": "97a8990595c132fb14e3445a8768d9f4cb98ff05"
71
73
  }
package/dist/bundle.js DELETED
@@ -1,5 +0,0 @@
1
- "use strict";
2
- // @ts-nocheck
3
- const moduleExports = require('./index');
4
- globalThis.loaders = globalThis.loaders || {};
5
- module.exports = Object.assign(globalThis.loaders, moduleExports);
package/dist/constants.js DELETED
@@ -1,18 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.PARQUET_RDLVL_ENCODING = exports.PARQUET_RDLVL_TYPE = exports.PARQUET_VERSION = exports.PARQUET_MAGIC_ENCRYPTED = exports.PARQUET_MAGIC = void 0;
4
- // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
5
- /**
6
- * Parquet File Magic String
7
- */
8
- exports.PARQUET_MAGIC = 'PAR1';
9
- exports.PARQUET_MAGIC_ENCRYPTED = 'PARE';
10
- /**
11
- * Parquet File Format Version
12
- */
13
- exports.PARQUET_VERSION = 1;
14
- /**
15
- * Internal type used for repetition/definition levels
16
- */
17
- exports.PARQUET_RDLVL_TYPE = 'INT32';
18
- exports.PARQUET_RDLVL_ENCODING = 'RLE';
package/dist/index.js DELETED
@@ -1,56 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- var __importDefault = (this && this.__importDefault) || function (mod) {
4
- return (mod && mod.__esModule) ? mod : { "default": mod };
5
- };
6
- Object.defineProperty(exports, "__esModule", { value: true });
7
- exports.unpackGeoMetadata = exports.setGeoMetadata = exports.getGeoMetadata = exports.geoJSONSchema = exports.convertParquetToArrowSchema = exports.convertParquetSchema = exports.ParquetEncoder = exports.ParquetReader = exports.ParquetSchema = exports.preloadCompressions = exports.ParquetWasmWriter = exports._ParquetWriter = exports.ParquetWasmLoader = exports.ParquetColumnarLoader = exports.ParquetLoader = exports.ParquetWasmWorkerLoader = exports.ParquetWorkerLoader = void 0;
8
- // ParquetLoader
9
- const parquet_loader_1 = require("./parquet-loader");
10
- Object.defineProperty(exports, "ParquetWorkerLoader", { enumerable: true, get: function () { return parquet_loader_1.ParquetLoader; } });
11
- const parse_parquet_to_rows_1 = require("./lib/parsers/parse-parquet-to-rows");
12
- const parse_parquet_to_columns_1 = require("./lib/parsers/parse-parquet-to-columns");
13
- const parse_parquet_wasm_1 = require("./lib/wasm/parse-parquet-wasm");
14
- const parquet_wasm_loader_1 = require("./parquet-wasm-loader");
15
- Object.defineProperty(exports, "ParquetWasmWorkerLoader", { enumerable: true, get: function () { return parquet_wasm_loader_1.ParquetWasmLoader; } });
16
- /** ParquetJS table loader */
17
- exports.ParquetLoader = {
18
- ...parquet_loader_1.ParquetLoader,
19
- parse: parse_parquet_to_rows_1.parseParquet,
20
- parseFileInBatches: parse_parquet_to_rows_1.parseParquetFileInBatches
21
- };
22
- /** ParquetJS table loader */
23
- // @ts-expect-error
24
- exports.ParquetColumnarLoader = {
25
- ...parquet_loader_1.ParquetLoader,
26
- parse: parse_parquet_to_columns_1.parseParquetInColumns,
27
- parseFileInBatches: parse_parquet_to_columns_1.parseParquetFileInColumnarBatches
28
- };
29
- exports.ParquetWasmLoader = {
30
- ...parquet_wasm_loader_1.ParquetWasmLoader,
31
- parse: parse_parquet_wasm_1.parseParquetWasm
32
- };
33
- // ParquetWriter
34
- var parquet_writer_1 = require("./parquet-writer");
35
- Object.defineProperty(exports, "_ParquetWriter", { enumerable: true, get: function () { return parquet_writer_1.ParquetWriter; } });
36
- var parquet_wasm_writer_1 = require("./parquet-wasm-writer");
37
- Object.defineProperty(exports, "ParquetWasmWriter", { enumerable: true, get: function () { return parquet_wasm_writer_1.ParquetWasmWriter; } });
38
- // EXPERIMENTAL - expose the internal parquetjs API
39
- var compression_1 = require("./parquetjs/compression");
40
- Object.defineProperty(exports, "preloadCompressions", { enumerable: true, get: function () { return compression_1.preloadCompressions; } });
41
- var schema_1 = require("./parquetjs/schema/schema");
42
- Object.defineProperty(exports, "ParquetSchema", { enumerable: true, get: function () { return schema_1.ParquetSchema; } });
43
- var parquet_reader_1 = require("./parquetjs/parser/parquet-reader");
44
- Object.defineProperty(exports, "ParquetReader", { enumerable: true, get: function () { return parquet_reader_1.ParquetReader; } });
45
- var parquet_encoder_1 = require("./parquetjs/encoder/parquet-encoder");
46
- Object.defineProperty(exports, "ParquetEncoder", { enumerable: true, get: function () { return parquet_encoder_1.ParquetEncoder; } });
47
- var convert_schema_from_parquet_1 = require("./lib/arrow/convert-schema-from-parquet");
48
- Object.defineProperty(exports, "convertParquetSchema", { enumerable: true, get: function () { return convert_schema_from_parquet_1.convertParquetSchema; } });
49
- Object.defineProperty(exports, "convertParquetToArrowSchema", { enumerable: true, get: function () { return convert_schema_from_parquet_1.convertParquetSchema; } });
50
- // Geo Metadata
51
- var geoparquet_schema_1 = require("./lib/geo/geoparquet-schema");
52
- Object.defineProperty(exports, "geoJSONSchema", { enumerable: true, get: function () { return __importDefault(geoparquet_schema_1).default; } });
53
- var decode_geo_metadata_1 = require("./lib/geo/decode-geo-metadata");
54
- Object.defineProperty(exports, "getGeoMetadata", { enumerable: true, get: function () { return decode_geo_metadata_1.getGeoMetadata; } });
55
- Object.defineProperty(exports, "setGeoMetadata", { enumerable: true, get: function () { return decode_geo_metadata_1.setGeoMetadata; } });
56
- Object.defineProperty(exports, "unpackGeoMetadata", { enumerable: true, get: function () { return decode_geo_metadata_1.unpackGeoMetadata; } });
@@ -1 +0,0 @@
1
- "use strict";
@@ -1,12 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.convertParquetRowGroupToColumns = void 0;
5
- function convertParquetRowGroupToColumns(schema, rowGroup) {
6
- const columns = {};
7
- for (const [columnName, data] of Object.entries(rowGroup.columnData)) {
8
- columns[columnName] = columns[columnName] || data.values;
9
- }
10
- return columns;
11
- }
12
- exports.convertParquetRowGroupToColumns = convertParquetRowGroupToColumns;
@@ -1,86 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.convertParquetSchema = exports.PARQUET_TYPE_MAPPING = void 0;
5
- exports.PARQUET_TYPE_MAPPING = {
6
- BOOLEAN: 'bool',
7
- INT32: 'int32',
8
- INT64: 'float64',
9
- INT96: 'float64',
10
- FLOAT: 'float32',
11
- DOUBLE: 'float64',
12
- BYTE_ARRAY: 'binary',
13
- FIXED_LEN_BYTE_ARRAY: 'binary',
14
- UTF8: 'utf8',
15
- DATE: 'int32',
16
- TIME_MILLIS: 'int64',
17
- TIME_MICROS: 'int64',
18
- TIMESTAMP_MILLIS: 'int64',
19
- TIMESTAMP_MICROS: 'int64',
20
- UINT_8: 'int32',
21
- UINT_16: 'uint16',
22
- UINT_32: 'uint32',
23
- UINT_64: 'uint64',
24
- INT_8: 'int8',
25
- INT_16: 'int16',
26
- INT_32: 'int32',
27
- INT_64: 'int64',
28
- JSON: 'binary',
29
- BSON: 'binary',
30
- // TODO check interal type
31
- INTERVAL: 'binary',
32
- DECIMAL_INT32: 'float32',
33
- DECIMAL_INT64: 'float64',
34
- DECIMAL_BYTE_ARRAY: 'float64',
35
- DECIMAL_FIXED_LEN_BYTE_ARRAY: 'float64'
36
- };
37
- function convertParquetSchema(parquetSchema, parquetMetadata) {
38
- const fields = getFields(parquetSchema.schema);
39
- const metadata = parquetMetadata && getSchemaMetadata(parquetMetadata);
40
- const schema = {
41
- fields,
42
- metadata: metadata || {}
43
- };
44
- return schema;
45
- }
46
- exports.convertParquetSchema = convertParquetSchema;
47
- function getFields(schema) {
48
- const fields = [];
49
- for (const name in schema) {
50
- const field = schema[name];
51
- if (field.fields) {
52
- const children = getFields(field.fields);
53
- fields.push({ name, type: { type: 'struct', children }, nullable: field.optional });
54
- }
55
- else {
56
- const type = exports.PARQUET_TYPE_MAPPING[field.type];
57
- const metadata = getFieldMetadata(field);
58
- const arrowField = { name, type, nullable: field.optional, metadata };
59
- fields.push(arrowField);
60
- }
61
- }
62
- return fields;
63
- }
64
- function getFieldMetadata(field) {
65
- let metadata;
66
- for (const key in field) {
67
- if (key !== 'name') {
68
- let value = field[key] || '';
69
- value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
70
- metadata = metadata || {};
71
- metadata[key] = value;
72
- }
73
- }
74
- return metadata;
75
- }
76
- function getSchemaMetadata(parquetMetadata) {
77
- let metadata;
78
- const keyValueList = parquetMetadata.key_value_metadata || [];
79
- for (const { key, value } of keyValueList) {
80
- if (typeof value === 'string') {
81
- metadata = metadata || {};
82
- metadata[key] = value;
83
- }
84
- }
85
- return metadata;
86
- }
@@ -1,71 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.convertToParquetSchema = exports.PARQUET_TYPE_MAPPING = void 0;
5
- exports.PARQUET_TYPE_MAPPING = {
6
- BOOLEAN: 'bool',
7
- INT32: 'int32',
8
- INT64: 'float64',
9
- INT96: 'float64',
10
- FLOAT: 'float32',
11
- DOUBLE: 'float64',
12
- BYTE_ARRAY: 'binary',
13
- FIXED_LEN_BYTE_ARRAY: 'binary',
14
- UTF8: 'utf8',
15
- DATE: 'int32',
16
- TIME_MILLIS: 'int64',
17
- TIME_MICROS: 'int64',
18
- TIMESTAMP_MILLIS: 'int64',
19
- TIMESTAMP_MICROS: 'int64',
20
- UINT_8: 'int32',
21
- UINT_16: 'uint16',
22
- UINT_32: 'uint32',
23
- UINT_64: 'uint64',
24
- INT_8: 'int8',
25
- INT_16: 'int16',
26
- INT_32: 'int32',
27
- INT_64: 'int64',
28
- JSON: 'binary',
29
- BSON: 'binary',
30
- // TODO check interval type
31
- INTERVAL: 'binary',
32
- DECIMAL_INT32: 'float32',
33
- DECIMAL_INT64: 'float64',
34
- DECIMAL_BYTE_ARRAY: 'float64',
35
- DECIMAL_FIXED_LEN_BYTE_ARRAY: 'float64'
36
- };
37
- function convertToParquetSchema(schema) {
38
- const fields = []; // getFields(schema.fields);
39
- // TODO add metadata if needed.
40
- return { fields, metadata: {} };
41
- }
42
- exports.convertToParquetSchema = convertToParquetSchema;
43
- // function getFields(schema: Field[]): Definition[] {
44
- // const fields: Field[] = [];
45
- // for (const name in schema) {
46
- // const field = schema[name];
47
- // // @ts-ignore
48
- // const children = field.children as DataType[];
49
- // if (children) {
50
- // const childField = getFields(field.fields);
51
- // const nestedField = new Field(name, new Struct(childField), field.optional);
52
- // fields.push(nestedField);
53
- // } else {
54
- // const FieldType = PARQUET_TYPE_MAPPING[field.type];
55
- // const metadata = getFieldMetadata(field);
56
- // const arrowField = new Field(name, new FieldType(), field.optional, metadata);
57
- // fields.push(arrowField);
58
- // }
59
- // }
60
- // return fields;
61
- // }
62
- // function getFieldMetadata(field: ParquetField): Map<string, string> {
63
- // const metadata = new Map();
64
- // for (const key in field) {
65
- // if (key !== 'name') {
66
- // const value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
67
- // metadata.set(key, value);
68
- // }
69
- // }
70
- // return metadata;
71
- // }
@@ -1,77 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.unpackGeoMetadata = exports.setGeoMetadata = exports.getGeoMetadata = void 0;
4
- /**
5
- * Reads the GeoMetadata object from the metadata
6
- * @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata */
7
- function getGeoMetadata(schema) {
8
- const stringifiedGeoMetadata = schema.metadata.geo;
9
- if (!stringifiedGeoMetadata) {
10
- return null;
11
- }
12
- try {
13
- const geoMetadata = JSON.parse(stringifiedGeoMetadata);
14
- return geoMetadata;
15
- }
16
- catch {
17
- return null;
18
- }
19
- }
20
- exports.getGeoMetadata = getGeoMetadata;
21
- /**
22
- * Stores a geoarrow / geoparquet geo metadata object in the schema
23
- * @note geoarrow / geoparquet geo metadata is a single stringified JSON field
24
- */
25
- function setGeoMetadata(schema, geoMetadata) {
26
- const stringifiedGeoMetadata = JSON.stringify(geoMetadata);
27
- schema.metadata.geo = stringifiedGeoMetadata;
28
- }
29
- exports.setGeoMetadata = setGeoMetadata;
30
- /**
31
- * Unpacks geo metadata into separate metadata fields (parses the long JSON string)
32
- * @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata
33
- */
34
- function unpackGeoMetadata(schema) {
35
- const geoMetadata = getGeoMetadata(schema);
36
- if (!geoMetadata) {
37
- return;
38
- }
39
- // Store Parquet Schema Level Metadata
40
- const { version, primary_column, columns } = geoMetadata;
41
- if (version) {
42
- schema.metadata['geo.version'] = version;
43
- }
44
- if (primary_column) {
45
- schema.metadata['geo.primary_column'] = primary_column;
46
- }
47
- // store column names as comma separated list
48
- schema.metadata['geo.columns'] = Object.keys(columns || {}).join('');
49
- for (const [columnName, columnMetadata] of Object.entries(columns || {})) {
50
- const field = schema.fields.find((field) => field.name === columnName);
51
- if (field) {
52
- if (field.name === primary_column) {
53
- setFieldMetadata(field, 'geo.primary_field', 'true');
54
- }
55
- unpackGeoFieldMetadata(field, columnMetadata);
56
- }
57
- }
58
- }
59
- exports.unpackGeoMetadata = unpackGeoMetadata;
60
- function unpackGeoFieldMetadata(field, columnMetadata) {
61
- for (const [key, value] of Object.entries(columnMetadata || {})) {
62
- switch (key) {
63
- case 'geometry_type':
64
- setFieldMetadata(field, `geo.${key}`, value.join(','));
65
- break;
66
- case 'bbox':
67
- case 'crs':
68
- case 'edges':
69
- default:
70
- setFieldMetadata(field, `geo.${key}`, typeof value === 'string' ? value : JSON.stringify(value));
71
- }
72
- }
73
- }
74
- function setFieldMetadata(field, key, value) {
75
- field.metadata = field.metadata || {};
76
- field.metadata[key] = value;
77
- }
@@ -1,69 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- /* eslint-disable camelcase */
5
- /**
6
- * Geoparquet JSON schema for geo metadata
7
- * @see https://github.com/geoarrow/geoarrow/blob/main/metadata.md
8
- * @see https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md
9
- */
10
- exports.default = {
11
- $schema: 'http://json-schema.org/draft-07/schema#',
12
- title: 'GeoParquet',
13
- description: 'Parquet metadata included in the geo field.',
14
- type: 'object',
15
- required: ['version', 'primary_column', 'columns'],
16
- properties: {
17
- version: { type: 'string', const: '1.0.0-beta.1' },
18
- primary_column: { type: 'string', minLength: 1 },
19
- columns: {
20
- type: 'object',
21
- minProperties: 1,
22
- patternProperties: {
23
- '.+': {
24
- type: 'object',
25
- required: ['encoding', 'geometry_types'],
26
- properties: {
27
- encoding: { type: 'string', const: 'WKB' },
28
- geometry_types: {
29
- type: 'array',
30
- uniqueItems: true,
31
- items: {
32
- type: 'string',
33
- pattern: '^(GeometryCollection|(Multi)?(Point|LineString|Polygon))( Z)?$'
34
- }
35
- },
36
- crs: {
37
- oneOf: [
38
- {
39
- $ref: 'https://proj.org/schemas/v0.5/projjson.schema.json'
40
- },
41
- { type: 'null' }
42
- ]
43
- },
44
- edges: { type: 'string', enum: ['planar', 'spherical'] },
45
- orientation: { type: 'string', const: 'counterclockwise' },
46
- bbox: {
47
- type: 'array',
48
- items: { type: 'number' },
49
- oneOf: [
50
- {
51
- description: '2D bbox consisting of (xmin, ymin, xmax, ymax)',
52
- minItems: 4,
53
- maxItems: 4
54
- },
55
- {
56
- description: '3D bbox consisting of (xmin, ymin, zmin, xmax, ymax, zmax)',
57
- minItems: 6,
58
- maxItems: 6
59
- }
60
- ]
61
- },
62
- epoch: { type: 'number' }
63
- }
64
- }
65
- },
66
- additionalProperties: false
67
- }
68
- }
69
- };
@@ -1,46 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.parseParquetFileInColumnarBatches = exports.parseParquetInColumns = void 0;
5
- const loader_utils_1 = require("@loaders.gl/loader-utils");
6
- const parquet_reader_1 = require("../../parquetjs/parser/parquet-reader");
7
- const convert_schema_from_parquet_1 = require("../arrow/convert-schema-from-parquet");
8
- const shred_1 = require("../../parquetjs/schema/shred");
9
- // import {convertParquetRowGroupToColumns} from '../arrow/convert-row-group-to-columns';
10
- const decode_geo_metadata_1 = require("../geo/decode-geo-metadata");
11
- async function parseParquetInColumns(arrayBuffer, options) {
12
- const blob = new Blob([arrayBuffer]);
13
- for await (const batch of parseParquetFileInColumnarBatches(blob, options)) {
14
- return {
15
- shape: 'columnar-table',
16
- schema: batch.schema,
17
- data: batch.data
18
- };
19
- }
20
- throw new Error('empty table');
21
- }
22
- exports.parseParquetInColumns = parseParquetInColumns;
23
- async function* parseParquetFileInColumnarBatches(blob, options) {
24
- const file = (0, loader_utils_1.makeReadableFile)(blob);
25
- const reader = new parquet_reader_1.ParquetReader(file);
26
- const parquetSchema = await reader.getSchema();
27
- const parquetMetadata = await reader.getFileMetadata();
28
- const schema = (0, convert_schema_from_parquet_1.convertParquetSchema)(parquetSchema, parquetMetadata);
29
- (0, decode_geo_metadata_1.unpackGeoMetadata)(schema);
30
- const rowGroups = reader.rowGroupIterator(options?.parquet);
31
- for await (const rowGroup of rowGroups) {
32
- yield convertRowGroupToTableBatch(parquetSchema, rowGroup, schema);
33
- }
34
- }
35
- exports.parseParquetFileInColumnarBatches = parseParquetFileInColumnarBatches;
36
- function convertRowGroupToTableBatch(parquetSchema, rowGroup, schema) {
37
- // const data = convertParquetRowGroupToColumns(schema, rowGroup);
38
- const data = (0, shred_1.materializeColumns)(parquetSchema, rowGroup);
39
- return {
40
- shape: 'columnar-table',
41
- batchType: 'data',
42
- schema,
43
- data,
44
- length: rowGroup.rowCount
45
- };
46
- }
@@ -1,37 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.parseParquetFileInBatches = exports.parseParquet = void 0;
4
- // import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
5
- // import {ColumnarTableBatch} from '@loaders.gl/schema';
6
- const loader_utils_1 = require("@loaders.gl/loader-utils");
7
- const parquet_reader_1 = require("../../parquetjs/parser/parquet-reader");
8
- async function parseParquet(arrayBuffer, options) {
9
- const blob = new Blob([arrayBuffer]);
10
- const rows = [];
11
- for await (const batch of parseParquetFileInBatches(blob, options)) {
12
- // we have only one input batch so return
13
- for (const row of batch.data) {
14
- rows.push(row);
15
- }
16
- }
17
- return {
18
- shape: 'object-row-table',
19
- // TODO - spread can fail for very large number of batches
20
- data: rows
21
- };
22
- }
23
- exports.parseParquet = parseParquet;
24
- async function* parseParquetFileInBatches(blob, options) {
25
- const file = (0, loader_utils_1.makeReadableFile)(blob);
26
- const reader = new parquet_reader_1.ParquetReader(file);
27
- const rowBatches = reader.rowBatchIterator(options?.parquet);
28
- for await (const rows of rowBatches) {
29
- yield {
30
- shape: 'object-row-table',
31
- data: rows,
32
- batchType: 'data',
33
- length: rows.length
34
- };
35
- }
36
- }
37
- exports.parseParquetFileInBatches = parseParquetFileInBatches;
@@ -1,30 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.tableToIPC = exports.encode = void 0;
4
- const apache_arrow_1 = require("apache-arrow");
5
- const load_wasm_1 = require("./load-wasm");
6
- /**
7
- * Encode Arrow Table to Parquet buffer
8
- */
9
- async function encode(table, options) {
10
- const wasmUrl = options?.parquet?.wasmUrl;
11
- const wasm = await (0, load_wasm_1.loadWasm)(wasmUrl);
12
- const arrowIPCBytes = tableToIPC(table);
13
- // TODO: provide options for how to write table.
14
- const writerProperties = new wasm.WriterPropertiesBuilder().build();
15
- const parquetBytes = wasm.writeParquet(arrowIPCBytes, writerProperties);
16
- return parquetBytes.buffer.slice(parquetBytes.byteOffset, parquetBytes.byteLength + parquetBytes.byteOffset);
17
- }
18
- exports.encode = encode;
19
- /**
20
- * Serialize a {@link Table} to the IPC format. This function is a convenience
21
- * wrapper for {@link RecordBatchStreamWriter} and {@link RecordBatchFileWriter}.
22
- * Opposite of {@link tableFromIPC}.
23
- *
24
- * @param table The Table to serialize.
25
- * @param type Whether to serialize the Table as a file or a stream.
26
- */
27
- function tableToIPC(table) {
28
- return apache_arrow_1.RecordBatchStreamWriter.writeAll(table).toUint8Array(true);
29
- }
30
- exports.tableToIPC = tableToIPC;
@@ -1,5 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.loadWasm = void 0;
4
- var load_wasm_node_1 = require("./load-wasm-node");
5
- Object.defineProperty(exports, "loadWasm", { enumerable: true, get: function () { return load_wasm_node_1.loadWasm; } });
@@ -1,38 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || function (mod) {
19
- if (mod && mod.__esModule) return mod;
20
- var result = {};
21
- if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
- __setModuleDefault(result, mod);
23
- return result;
24
- };
25
- Object.defineProperty(exports, "__esModule", { value: true });
26
- exports.loadWasm = void 0;
27
- const wasmEsm = __importStar(require("parquet-wasm/esm2/arrow1"));
28
- let cached = null;
29
- async function loadWasm(wasmUrl) {
30
- if (cached !== null) {
31
- return cached;
32
- }
33
- // For ESM bundles, need to await the default export, which loads the WASM
34
- await wasmEsm.default(wasmUrl);
35
- cached = wasmEsm;
36
- return wasmEsm;
37
- }
38
- exports.loadWasm = loadWasm;
@@ -1,31 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || function (mod) {
19
- if (mod && mod.__esModule) return mod;
20
- var result = {};
21
- if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
- __setModuleDefault(result, mod);
23
- return result;
24
- };
25
- Object.defineProperty(exports, "__esModule", { value: true });
26
- exports.loadWasm = void 0;
27
- const wasmNode = __importStar(require("parquet-wasm/node/arrow1"));
28
- async function loadWasm(wasmUrl) {
29
- return wasmNode;
30
- }
31
- exports.loadWasm = loadWasm;