@loaders.gl/parquet 4.0.0-alpha.22 → 4.0.0-alpha.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/dist/dist.min.js +17 -17
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/index.js +0 -3
  4. package/dist/es5/index.js.map +1 -1
  5. package/dist/es5/parquet-loader.js +1 -1
  6. package/dist/es5/parquet-wasm-loader.js +1 -1
  7. package/dist/es5/parquet-wasm-writer.js +1 -1
  8. package/dist/es5/parquet-writer.js +2 -3
  9. package/dist/es5/parquet-writer.js.map +1 -1
  10. package/dist/esm/index.js +0 -1
  11. package/dist/esm/index.js.map +1 -1
  12. package/dist/esm/parquet-loader.js +1 -1
  13. package/dist/esm/parquet-wasm-loader.js +1 -1
  14. package/dist/esm/parquet-wasm-writer.js +1 -1
  15. package/dist/esm/parquet-writer.js +2 -3
  16. package/dist/esm/parquet-writer.js.map +1 -1
  17. package/dist/index.d.ts +0 -1
  18. package/dist/index.d.ts.map +1 -1
  19. package/dist/parquet-worker.js +17 -17
  20. package/dist/parquet-worker.js.map +3 -3
  21. package/dist/parquet-writer.d.ts.map +1 -1
  22. package/package.json +11 -9
  23. package/src/index.ts +0 -3
  24. package/src/parquet-writer.ts +1 -3
  25. package/dist/bundle.js +0 -5
  26. package/dist/constants.js +0 -18
  27. package/dist/index.js +0 -58
  28. package/dist/lib/arrow/convert-columns-to-row-group.js +0 -1
  29. package/dist/lib/arrow/convert-row-group-to-columns.js +0 -12
  30. package/dist/lib/arrow/convert-schema-from-parquet.js +0 -86
  31. package/dist/lib/arrow/convert-schema-to-parquet.js +0 -71
  32. package/dist/lib/geo/decode-geo-metadata.js +0 -77
  33. package/dist/lib/geo/geoparquet-schema.js +0 -69
  34. package/dist/lib/parsers/parse-parquet-to-columns.js +0 -46
  35. package/dist/lib/parsers/parse-parquet-to-rows.js +0 -37
  36. package/dist/lib/wasm/encode-parquet-wasm.js +0 -30
  37. package/dist/lib/wasm/load-wasm/index.js +0 -5
  38. package/dist/lib/wasm/load-wasm/load-wasm-browser.js +0 -38
  39. package/dist/lib/wasm/load-wasm/load-wasm-node.js +0 -31
  40. package/dist/lib/wasm/parse-parquet-wasm.js +0 -27
  41. package/dist/parquet-loader.js +0 -41
  42. package/dist/parquet-wasm-loader.js +0 -26
  43. package/dist/parquet-wasm-writer.js +0 -24
  44. package/dist/parquet-writer.js +0 -22
  45. package/dist/parquetjs/codecs/declare.js +0 -2
  46. package/dist/parquetjs/codecs/dictionary.js +0 -14
  47. package/dist/parquetjs/codecs/index.js +0 -55
  48. package/dist/parquetjs/codecs/plain.js +0 -211
  49. package/dist/parquetjs/codecs/rle.js +0 -145
  50. package/dist/parquetjs/compression.js +0 -183
  51. package/dist/parquetjs/encoder/parquet-encoder.js +0 -484
  52. package/dist/parquetjs/parquet-thrift/BoundaryOrder.js +0 -15
  53. package/dist/parquetjs/parquet-thrift/BsonType.js +0 -62
  54. package/dist/parquetjs/parquet-thrift/ColumnChunk.js +0 -211
  55. package/dist/parquetjs/parquet-thrift/ColumnIndex.js +0 -217
  56. package/dist/parquetjs/parquet-thrift/ColumnMetaData.js +0 -402
  57. package/dist/parquetjs/parquet-thrift/ColumnOrder.js +0 -108
  58. package/dist/parquetjs/parquet-thrift/CompressionCodec.js +0 -20
  59. package/dist/parquetjs/parquet-thrift/ConvertedType.js +0 -34
  60. package/dist/parquetjs/parquet-thrift/DataPageHeader.js +0 -170
  61. package/dist/parquetjs/parquet-thrift/DataPageHeaderV2.js +0 -230
  62. package/dist/parquetjs/parquet-thrift/DateType.js +0 -62
  63. package/dist/parquetjs/parquet-thrift/DecimalType.js +0 -109
  64. package/dist/parquetjs/parquet-thrift/DictionaryPageHeader.js +0 -126
  65. package/dist/parquetjs/parquet-thrift/Encoding.js +0 -20
  66. package/dist/parquetjs/parquet-thrift/EnumType.js +0 -62
  67. package/dist/parquetjs/parquet-thrift/FieldRepetitionType.js +0 -15
  68. package/dist/parquetjs/parquet-thrift/FileMetaData.js +0 -260
  69. package/dist/parquetjs/parquet-thrift/IndexPageHeader.js +0 -62
  70. package/dist/parquetjs/parquet-thrift/IntType.js +0 -109
  71. package/dist/parquetjs/parquet-thrift/JsonType.js +0 -62
  72. package/dist/parquetjs/parquet-thrift/KeyValue.js +0 -106
  73. package/dist/parquetjs/parquet-thrift/ListType.js +0 -62
  74. package/dist/parquetjs/parquet-thrift/LogicalType.js +0 -384
  75. package/dist/parquetjs/parquet-thrift/MapType.js +0 -62
  76. package/dist/parquetjs/parquet-thrift/MicroSeconds.js +0 -62
  77. package/dist/parquetjs/parquet-thrift/MilliSeconds.js +0 -62
  78. package/dist/parquetjs/parquet-thrift/NullType.js +0 -62
  79. package/dist/parquetjs/parquet-thrift/OffsetIndex.js +0 -101
  80. package/dist/parquetjs/parquet-thrift/PageEncodingStats.js +0 -131
  81. package/dist/parquetjs/parquet-thrift/PageHeader.js +0 -220
  82. package/dist/parquetjs/parquet-thrift/PageLocation.js +0 -145
  83. package/dist/parquetjs/parquet-thrift/PageType.js +0 -16
  84. package/dist/parquetjs/parquet-thrift/RowGroup.js +0 -186
  85. package/dist/parquetjs/parquet-thrift/SchemaElement.js +0 -243
  86. package/dist/parquetjs/parquet-thrift/SortingColumn.js +0 -131
  87. package/dist/parquetjs/parquet-thrift/Statistics.js +0 -180
  88. package/dist/parquetjs/parquet-thrift/StringType.js +0 -62
  89. package/dist/parquetjs/parquet-thrift/TimeType.js +0 -110
  90. package/dist/parquetjs/parquet-thrift/TimeUnit.js +0 -131
  91. package/dist/parquetjs/parquet-thrift/TimestampType.js +0 -110
  92. package/dist/parquetjs/parquet-thrift/Type.js +0 -20
  93. package/dist/parquetjs/parquet-thrift/TypeDefinedOrder.js +0 -62
  94. package/dist/parquetjs/parquet-thrift/UUIDType.js +0 -62
  95. package/dist/parquetjs/parquet-thrift/index.js +0 -65
  96. package/dist/parquetjs/parser/decoders.js +0 -318
  97. package/dist/parquetjs/parser/parquet-reader.js +0 -200
  98. package/dist/parquetjs/schema/declare.js +0 -12
  99. package/dist/parquetjs/schema/schema.js +0 -162
  100. package/dist/parquetjs/schema/shred.js +0 -355
  101. package/dist/parquetjs/schema/types.js +0 -416
  102. package/dist/parquetjs/utils/file-utils.js +0 -43
  103. package/dist/parquetjs/utils/read-utils.js +0 -109
  104. package/dist/workers/parquet-worker.js +0 -5
@@ -1 +1 @@
1
- {"version":3,"file":"parquet-writer.d.ts","sourceRoot":"","sources":["../src/parquet-writer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAC,MAAM,EAAC,MAAM,0BAA0B,CAAC;AACrD,OAAO,EAAC,KAAK,EAAE,UAAU,EAAC,MAAM,oBAAoB,CAAC;AAMrD,MAAM,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAItC,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,KAAK,EAAE,UAAU,EAAE,oBAAoB,CAUzE,CAAC"}
1
+ {"version":3,"file":"parquet-writer.d.ts","sourceRoot":"","sources":["../src/parquet-writer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAC,MAAM,EAAC,MAAM,0BAA0B,CAAC;AACrD,OAAO,EAAC,KAAK,EAAE,UAAU,EAAC,MAAM,oBAAoB,CAAC;AAMrD,MAAM,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAEtC,eAAO,MAAM,aAAa,EAAE,MAAM,CAAC,KAAK,EAAE,UAAU,EAAE,oBAAoB,CAUzE,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loaders.gl/parquet",
3
- "version": "4.0.0-alpha.22",
3
+ "version": "4.0.0-alpha.24",
4
4
  "description": "Framework-independent loader for Apache Parquet files",
5
5
  "license": "MIT",
6
6
  "publishConfig": {
@@ -38,13 +38,15 @@
38
38
  "tls": false,
39
39
  "stream": false,
40
40
  "fs": false,
41
- "./src/lib/wasm/load-wasm/load-wasm-node.ts": "./src/lib/wasm/load-wasm/load-wasm-browser.ts"
41
+ "./src/lib/wasm/load-wasm/load-wasm-node.ts": "./src/lib/wasm/load-wasm/load-wasm-browser.ts",
42
+ "./dist/es5/lib/wasm/load-wasm/load-wasm-node.js": "./dist/es5/lib/wasm/load-wasm/load-wasm-browser.js",
43
+ "./dist/esm/lib/wasm/load-wasm/load-wasm-node.js": "./dist/esm/lib/wasm/load-wasm/load-wasm-browser.js"
42
44
  },
43
45
  "dependencies": {
44
- "@loaders.gl/bson": "4.0.0-alpha.22",
45
- "@loaders.gl/compression": "4.0.0-alpha.22",
46
- "@loaders.gl/loader-utils": "4.0.0-alpha.22",
47
- "@loaders.gl/schema": "4.0.0-alpha.22",
46
+ "@loaders.gl/bson": "4.0.0-alpha.24",
47
+ "@loaders.gl/compression": "4.0.0-alpha.24",
48
+ "@loaders.gl/loader-utils": "4.0.0-alpha.24",
49
+ "@loaders.gl/schema": "4.0.0-alpha.24",
48
50
  "async-mutex": "^0.2.2",
49
51
  "brotli": "^1.3.2",
50
52
  "int53": "^0.2.4",
@@ -58,14 +60,14 @@
58
60
  "zstd-codec": "^0.1"
59
61
  },
60
62
  "peerDependencies": {
61
- "apache-arrow": "^9.0.0"
63
+ "apache-arrow": "^13.0.0"
62
64
  },
63
65
  "devDependencies": {
64
66
  "@types/node": "^10.14.15",
65
67
  "@types/node-int64": "^0.4.29",
66
68
  "@types/thrift": "^0.10.8",
67
69
  "@types/varint": "^5.0.0",
68
- "apache-arrow": "^9.0.0"
70
+ "apache-arrow": "^13.0.0"
69
71
  },
70
- "gitHead": "0da838c506d1275383f2fd3d244d9c72b25397d2"
72
+ "gitHead": "97a8990595c132fb14e3445a8768d9f4cb98ff05"
71
73
  }
package/src/index.ts CHANGED
@@ -73,9 +73,6 @@ export {
73
73
  convertParquetSchema as convertParquetToArrowSchema
74
74
  } from './lib/arrow/convert-schema-from-parquet';
75
75
 
76
- // TESTS
77
- export const _typecheckParquetLoader: LoaderWithParser = ParquetLoader;
78
-
79
76
  // Geo Metadata
80
77
  export {default as geoJSONSchema} from './lib/geo/geoparquet-schema';
81
78
 
@@ -9,8 +9,6 @@ const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
9
9
 
10
10
  export type ParquetWriterOptions = {};
11
11
 
12
- const DEFAULT_PARQUET_LOADER_OPTIONS = {};
13
-
14
12
  export const ParquetWriter: Writer<Table, TableBatch, ParquetWriterOptions> = {
15
13
  name: 'Apache Parquet',
16
14
  id: 'parquet',
@@ -20,7 +18,7 @@ export const ParquetWriter: Writer<Table, TableBatch, ParquetWriterOptions> = {
20
18
  mimeTypes: ['application/octet-stream'],
21
19
  encodeSync,
22
20
  binary: true,
23
- options: DEFAULT_PARQUET_LOADER_OPTIONS
21
+ options: {}
24
22
  };
25
23
 
26
24
  function encodeSync(data, options?: ParquetWriterOptions) {
package/dist/bundle.js DELETED
@@ -1,5 +0,0 @@
1
- "use strict";
2
- // @ts-nocheck
3
- const moduleExports = require('./index');
4
- globalThis.loaders = globalThis.loaders || {};
5
- module.exports = Object.assign(globalThis.loaders, moduleExports);
package/dist/constants.js DELETED
@@ -1,18 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.PARQUET_RDLVL_ENCODING = exports.PARQUET_RDLVL_TYPE = exports.PARQUET_VERSION = exports.PARQUET_MAGIC_ENCRYPTED = exports.PARQUET_MAGIC = void 0;
4
- // Forked from https://github.com/kbajalc/parquets under MIT license (Copyright (c) 2017 ironSource Ltd.)
5
- /**
6
- * Parquet File Magic String
7
- */
8
- exports.PARQUET_MAGIC = 'PAR1';
9
- exports.PARQUET_MAGIC_ENCRYPTED = 'PARE';
10
- /**
11
- * Parquet File Format Version
12
- */
13
- exports.PARQUET_VERSION = 1;
14
- /**
15
- * Internal type used for repetition/definition levels
16
- */
17
- exports.PARQUET_RDLVL_TYPE = 'INT32';
18
- exports.PARQUET_RDLVL_ENCODING = 'RLE';
package/dist/index.js DELETED
@@ -1,58 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- var __importDefault = (this && this.__importDefault) || function (mod) {
4
- return (mod && mod.__esModule) ? mod : { "default": mod };
5
- };
6
- Object.defineProperty(exports, "__esModule", { value: true });
7
- exports.unpackGeoMetadata = exports.setGeoMetadata = exports.getGeoMetadata = exports.geoJSONSchema = exports._typecheckParquetLoader = exports.convertParquetToArrowSchema = exports.convertParquetSchema = exports.ParquetEncoder = exports.ParquetReader = exports.ParquetSchema = exports.preloadCompressions = exports.ParquetWasmWriter = exports._ParquetWriter = exports.ParquetWasmLoader = exports.ParquetColumnarLoader = exports.ParquetLoader = exports.ParquetWasmWorkerLoader = exports.ParquetWorkerLoader = void 0;
8
- // ParquetLoader
9
- const parquet_loader_1 = require("./parquet-loader");
10
- Object.defineProperty(exports, "ParquetWorkerLoader", { enumerable: true, get: function () { return parquet_loader_1.ParquetLoader; } });
11
- const parse_parquet_to_rows_1 = require("./lib/parsers/parse-parquet-to-rows");
12
- const parse_parquet_to_columns_1 = require("./lib/parsers/parse-parquet-to-columns");
13
- const parse_parquet_wasm_1 = require("./lib/wasm/parse-parquet-wasm");
14
- const parquet_wasm_loader_1 = require("./parquet-wasm-loader");
15
- Object.defineProperty(exports, "ParquetWasmWorkerLoader", { enumerable: true, get: function () { return parquet_wasm_loader_1.ParquetWasmLoader; } });
16
- /** ParquetJS table loader */
17
- exports.ParquetLoader = {
18
- ...parquet_loader_1.ParquetLoader,
19
- parse: parse_parquet_to_rows_1.parseParquet,
20
- parseFileInBatches: parse_parquet_to_rows_1.parseParquetFileInBatches
21
- };
22
- /** ParquetJS table loader */
23
- // @ts-expect-error
24
- exports.ParquetColumnarLoader = {
25
- ...parquet_loader_1.ParquetLoader,
26
- parse: parse_parquet_to_columns_1.parseParquetInColumns,
27
- parseFileInBatches: parse_parquet_to_columns_1.parseParquetFileInColumnarBatches
28
- };
29
- exports.ParquetWasmLoader = {
30
- ...parquet_wasm_loader_1.ParquetWasmLoader,
31
- parse: parse_parquet_wasm_1.parseParquetWasm
32
- };
33
- // ParquetWriter
34
- var parquet_writer_1 = require("./parquet-writer");
35
- Object.defineProperty(exports, "_ParquetWriter", { enumerable: true, get: function () { return parquet_writer_1.ParquetWriter; } });
36
- var parquet_wasm_writer_1 = require("./parquet-wasm-writer");
37
- Object.defineProperty(exports, "ParquetWasmWriter", { enumerable: true, get: function () { return parquet_wasm_writer_1.ParquetWasmWriter; } });
38
- // EXPERIMENTAL - expose the internal parquetjs API
39
- var compression_1 = require("./parquetjs/compression");
40
- Object.defineProperty(exports, "preloadCompressions", { enumerable: true, get: function () { return compression_1.preloadCompressions; } });
41
- var schema_1 = require("./parquetjs/schema/schema");
42
- Object.defineProperty(exports, "ParquetSchema", { enumerable: true, get: function () { return schema_1.ParquetSchema; } });
43
- var parquet_reader_1 = require("./parquetjs/parser/parquet-reader");
44
- Object.defineProperty(exports, "ParquetReader", { enumerable: true, get: function () { return parquet_reader_1.ParquetReader; } });
45
- var parquet_encoder_1 = require("./parquetjs/encoder/parquet-encoder");
46
- Object.defineProperty(exports, "ParquetEncoder", { enumerable: true, get: function () { return parquet_encoder_1.ParquetEncoder; } });
47
- var convert_schema_from_parquet_1 = require("./lib/arrow/convert-schema-from-parquet");
48
- Object.defineProperty(exports, "convertParquetSchema", { enumerable: true, get: function () { return convert_schema_from_parquet_1.convertParquetSchema; } });
49
- Object.defineProperty(exports, "convertParquetToArrowSchema", { enumerable: true, get: function () { return convert_schema_from_parquet_1.convertParquetSchema; } });
50
- // TESTS
51
- exports._typecheckParquetLoader = exports.ParquetLoader;
52
- // Geo Metadata
53
- var geoparquet_schema_1 = require("./lib/geo/geoparquet-schema");
54
- Object.defineProperty(exports, "geoJSONSchema", { enumerable: true, get: function () { return __importDefault(geoparquet_schema_1).default; } });
55
- var decode_geo_metadata_1 = require("./lib/geo/decode-geo-metadata");
56
- Object.defineProperty(exports, "getGeoMetadata", { enumerable: true, get: function () { return decode_geo_metadata_1.getGeoMetadata; } });
57
- Object.defineProperty(exports, "setGeoMetadata", { enumerable: true, get: function () { return decode_geo_metadata_1.setGeoMetadata; } });
58
- Object.defineProperty(exports, "unpackGeoMetadata", { enumerable: true, get: function () { return decode_geo_metadata_1.unpackGeoMetadata; } });
@@ -1 +0,0 @@
1
- "use strict";
@@ -1,12 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.convertParquetRowGroupToColumns = void 0;
5
- function convertParquetRowGroupToColumns(schema, rowGroup) {
6
- const columns = {};
7
- for (const [columnName, data] of Object.entries(rowGroup.columnData)) {
8
- columns[columnName] = columns[columnName] || data.values;
9
- }
10
- return columns;
11
- }
12
- exports.convertParquetRowGroupToColumns = convertParquetRowGroupToColumns;
@@ -1,86 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.convertParquetSchema = exports.PARQUET_TYPE_MAPPING = void 0;
5
- exports.PARQUET_TYPE_MAPPING = {
6
- BOOLEAN: 'bool',
7
- INT32: 'int32',
8
- INT64: 'float64',
9
- INT96: 'float64',
10
- FLOAT: 'float32',
11
- DOUBLE: 'float64',
12
- BYTE_ARRAY: 'binary',
13
- FIXED_LEN_BYTE_ARRAY: 'binary',
14
- UTF8: 'utf8',
15
- DATE: 'int32',
16
- TIME_MILLIS: 'int64',
17
- TIME_MICROS: 'int64',
18
- TIMESTAMP_MILLIS: 'int64',
19
- TIMESTAMP_MICROS: 'int64',
20
- UINT_8: 'int32',
21
- UINT_16: 'uint16',
22
- UINT_32: 'uint32',
23
- UINT_64: 'uint64',
24
- INT_8: 'int8',
25
- INT_16: 'int16',
26
- INT_32: 'int32',
27
- INT_64: 'int64',
28
- JSON: 'binary',
29
- BSON: 'binary',
30
- // TODO check interal type
31
- INTERVAL: 'binary',
32
- DECIMAL_INT32: 'float32',
33
- DECIMAL_INT64: 'float64',
34
- DECIMAL_BYTE_ARRAY: 'float64',
35
- DECIMAL_FIXED_LEN_BYTE_ARRAY: 'float64'
36
- };
37
- function convertParquetSchema(parquetSchema, parquetMetadata) {
38
- const fields = getFields(parquetSchema.schema);
39
- const metadata = parquetMetadata && getSchemaMetadata(parquetMetadata);
40
- const schema = {
41
- fields,
42
- metadata: metadata || {}
43
- };
44
- return schema;
45
- }
46
- exports.convertParquetSchema = convertParquetSchema;
47
- function getFields(schema) {
48
- const fields = [];
49
- for (const name in schema) {
50
- const field = schema[name];
51
- if (field.fields) {
52
- const children = getFields(field.fields);
53
- fields.push({ name, type: { type: 'struct', children }, nullable: field.optional });
54
- }
55
- else {
56
- const type = exports.PARQUET_TYPE_MAPPING[field.type];
57
- const metadata = getFieldMetadata(field);
58
- const arrowField = { name, type, nullable: field.optional, metadata };
59
- fields.push(arrowField);
60
- }
61
- }
62
- return fields;
63
- }
64
- function getFieldMetadata(field) {
65
- let metadata;
66
- for (const key in field) {
67
- if (key !== 'name') {
68
- let value = field[key] || '';
69
- value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
70
- metadata = metadata || {};
71
- metadata[key] = value;
72
- }
73
- }
74
- return metadata;
75
- }
76
- function getSchemaMetadata(parquetMetadata) {
77
- let metadata;
78
- const keyValueList = parquetMetadata.key_value_metadata || [];
79
- for (const { key, value } of keyValueList) {
80
- if (typeof value === 'string') {
81
- metadata = metadata || {};
82
- metadata[key] = value;
83
- }
84
- }
85
- return metadata;
86
- }
@@ -1,71 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.convertToParquetSchema = exports.PARQUET_TYPE_MAPPING = void 0;
5
- exports.PARQUET_TYPE_MAPPING = {
6
- BOOLEAN: 'bool',
7
- INT32: 'int32',
8
- INT64: 'float64',
9
- INT96: 'float64',
10
- FLOAT: 'float32',
11
- DOUBLE: 'float64',
12
- BYTE_ARRAY: 'binary',
13
- FIXED_LEN_BYTE_ARRAY: 'binary',
14
- UTF8: 'utf8',
15
- DATE: 'int32',
16
- TIME_MILLIS: 'int64',
17
- TIME_MICROS: 'int64',
18
- TIMESTAMP_MILLIS: 'int64',
19
- TIMESTAMP_MICROS: 'int64',
20
- UINT_8: 'int32',
21
- UINT_16: 'uint16',
22
- UINT_32: 'uint32',
23
- UINT_64: 'uint64',
24
- INT_8: 'int8',
25
- INT_16: 'int16',
26
- INT_32: 'int32',
27
- INT_64: 'int64',
28
- JSON: 'binary',
29
- BSON: 'binary',
30
- // TODO check interval type
31
- INTERVAL: 'binary',
32
- DECIMAL_INT32: 'float32',
33
- DECIMAL_INT64: 'float64',
34
- DECIMAL_BYTE_ARRAY: 'float64',
35
- DECIMAL_FIXED_LEN_BYTE_ARRAY: 'float64'
36
- };
37
- function convertToParquetSchema(schema) {
38
- const fields = []; // getFields(schema.fields);
39
- // TODO add metadata if needed.
40
- return { fields, metadata: {} };
41
- }
42
- exports.convertToParquetSchema = convertToParquetSchema;
43
- // function getFields(schema: Field[]): Definition[] {
44
- // const fields: Field[] = [];
45
- // for (const name in schema) {
46
- // const field = schema[name];
47
- // // @ts-ignore
48
- // const children = field.children as DataType[];
49
- // if (children) {
50
- // const childField = getFields(field.fields);
51
- // const nestedField = new Field(name, new Struct(childField), field.optional);
52
- // fields.push(nestedField);
53
- // } else {
54
- // const FieldType = PARQUET_TYPE_MAPPING[field.type];
55
- // const metadata = getFieldMetadata(field);
56
- // const arrowField = new Field(name, new FieldType(), field.optional, metadata);
57
- // fields.push(arrowField);
58
- // }
59
- // }
60
- // return fields;
61
- // }
62
- // function getFieldMetadata(field: ParquetField): Map<string, string> {
63
- // const metadata = new Map();
64
- // for (const key in field) {
65
- // if (key !== 'name') {
66
- // const value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
67
- // metadata.set(key, value);
68
- // }
69
- // }
70
- // return metadata;
71
- // }
@@ -1,77 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.unpackGeoMetadata = exports.setGeoMetadata = exports.getGeoMetadata = void 0;
4
- /**
5
- * Reads the GeoMetadata object from the metadata
6
- * @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata */
7
- function getGeoMetadata(schema) {
8
- const stringifiedGeoMetadata = schema.metadata.geo;
9
- if (!stringifiedGeoMetadata) {
10
- return null;
11
- }
12
- try {
13
- const geoMetadata = JSON.parse(stringifiedGeoMetadata);
14
- return geoMetadata;
15
- }
16
- catch {
17
- return null;
18
- }
19
- }
20
- exports.getGeoMetadata = getGeoMetadata;
21
- /**
22
- * Stores a geoarrow / geoparquet geo metadata object in the schema
23
- * @note geoarrow / geoparquet geo metadata is a single stringified JSON field
24
- */
25
- function setGeoMetadata(schema, geoMetadata) {
26
- const stringifiedGeoMetadata = JSON.stringify(geoMetadata);
27
- schema.metadata.geo = stringifiedGeoMetadata;
28
- }
29
- exports.setGeoMetadata = setGeoMetadata;
30
- /**
31
- * Unpacks geo metadata into separate metadata fields (parses the long JSON string)
32
- * @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata
33
- */
34
- function unpackGeoMetadata(schema) {
35
- const geoMetadata = getGeoMetadata(schema);
36
- if (!geoMetadata) {
37
- return;
38
- }
39
- // Store Parquet Schema Level Metadata
40
- const { version, primary_column, columns } = geoMetadata;
41
- if (version) {
42
- schema.metadata['geo.version'] = version;
43
- }
44
- if (primary_column) {
45
- schema.metadata['geo.primary_column'] = primary_column;
46
- }
47
- // store column names as comma separated list
48
- schema.metadata['geo.columns'] = Object.keys(columns || {}).join('');
49
- for (const [columnName, columnMetadata] of Object.entries(columns || {})) {
50
- const field = schema.fields.find((field) => field.name === columnName);
51
- if (field) {
52
- if (field.name === primary_column) {
53
- setFieldMetadata(field, 'geo.primary_field', 'true');
54
- }
55
- unpackGeoFieldMetadata(field, columnMetadata);
56
- }
57
- }
58
- }
59
- exports.unpackGeoMetadata = unpackGeoMetadata;
60
- function unpackGeoFieldMetadata(field, columnMetadata) {
61
- for (const [key, value] of Object.entries(columnMetadata || {})) {
62
- switch (key) {
63
- case 'geometry_type':
64
- setFieldMetadata(field, `geo.${key}`, value.join(','));
65
- break;
66
- case 'bbox':
67
- case 'crs':
68
- case 'edges':
69
- default:
70
- setFieldMetadata(field, `geo.${key}`, typeof value === 'string' ? value : JSON.stringify(value));
71
- }
72
- }
73
- }
74
- function setFieldMetadata(field, key, value) {
75
- field.metadata = field.metadata || {};
76
- field.metadata[key] = value;
77
- }
@@ -1,69 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- /* eslint-disable camelcase */
5
- /**
6
- * Geoparquet JSON schema for geo metadata
7
- * @see https://github.com/geoarrow/geoarrow/blob/main/metadata.md
8
- * @see https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md
9
- */
10
- exports.default = {
11
- $schema: 'http://json-schema.org/draft-07/schema#',
12
- title: 'GeoParquet',
13
- description: 'Parquet metadata included in the geo field.',
14
- type: 'object',
15
- required: ['version', 'primary_column', 'columns'],
16
- properties: {
17
- version: { type: 'string', const: '1.0.0-beta.1' },
18
- primary_column: { type: 'string', minLength: 1 },
19
- columns: {
20
- type: 'object',
21
- minProperties: 1,
22
- patternProperties: {
23
- '.+': {
24
- type: 'object',
25
- required: ['encoding', 'geometry_types'],
26
- properties: {
27
- encoding: { type: 'string', const: 'WKB' },
28
- geometry_types: {
29
- type: 'array',
30
- uniqueItems: true,
31
- items: {
32
- type: 'string',
33
- pattern: '^(GeometryCollection|(Multi)?(Point|LineString|Polygon))( Z)?$'
34
- }
35
- },
36
- crs: {
37
- oneOf: [
38
- {
39
- $ref: 'https://proj.org/schemas/v0.5/projjson.schema.json'
40
- },
41
- { type: 'null' }
42
- ]
43
- },
44
- edges: { type: 'string', enum: ['planar', 'spherical'] },
45
- orientation: { type: 'string', const: 'counterclockwise' },
46
- bbox: {
47
- type: 'array',
48
- items: { type: 'number' },
49
- oneOf: [
50
- {
51
- description: '2D bbox consisting of (xmin, ymin, xmax, ymax)',
52
- minItems: 4,
53
- maxItems: 4
54
- },
55
- {
56
- description: '3D bbox consisting of (xmin, ymin, zmin, xmax, ymax, zmax)',
57
- minItems: 6,
58
- maxItems: 6
59
- }
60
- ]
61
- },
62
- epoch: { type: 'number' }
63
- }
64
- }
65
- },
66
- additionalProperties: false
67
- }
68
- }
69
- };
@@ -1,46 +0,0 @@
1
- "use strict";
2
- // loaders.gl, MIT license
3
- Object.defineProperty(exports, "__esModule", { value: true });
4
- exports.parseParquetFileInColumnarBatches = exports.parseParquetInColumns = void 0;
5
- const loader_utils_1 = require("@loaders.gl/loader-utils");
6
- const parquet_reader_1 = require("../../parquetjs/parser/parquet-reader");
7
- const convert_schema_from_parquet_1 = require("../arrow/convert-schema-from-parquet");
8
- const shred_1 = require("../../parquetjs/schema/shred");
9
- // import {convertParquetRowGroupToColumns} from '../arrow/convert-row-group-to-columns';
10
- const decode_geo_metadata_1 = require("../geo/decode-geo-metadata");
11
- async function parseParquetInColumns(arrayBuffer, options) {
12
- const blob = new Blob([arrayBuffer]);
13
- for await (const batch of parseParquetFileInColumnarBatches(blob, options)) {
14
- return {
15
- shape: 'columnar-table',
16
- schema: batch.schema,
17
- data: batch.data
18
- };
19
- }
20
- throw new Error('empty table');
21
- }
22
- exports.parseParquetInColumns = parseParquetInColumns;
23
- async function* parseParquetFileInColumnarBatches(blob, options) {
24
- const file = (0, loader_utils_1.makeReadableFile)(blob);
25
- const reader = new parquet_reader_1.ParquetReader(file);
26
- const parquetSchema = await reader.getSchema();
27
- const parquetMetadata = await reader.getFileMetadata();
28
- const schema = (0, convert_schema_from_parquet_1.convertParquetSchema)(parquetSchema, parquetMetadata);
29
- (0, decode_geo_metadata_1.unpackGeoMetadata)(schema);
30
- const rowGroups = reader.rowGroupIterator(options?.parquet);
31
- for await (const rowGroup of rowGroups) {
32
- yield convertRowGroupToTableBatch(parquetSchema, rowGroup, schema);
33
- }
34
- }
35
- exports.parseParquetFileInColumnarBatches = parseParquetFileInColumnarBatches;
36
- function convertRowGroupToTableBatch(parquetSchema, rowGroup, schema) {
37
- // const data = convertParquetRowGroupToColumns(schema, rowGroup);
38
- const data = (0, shred_1.materializeColumns)(parquetSchema, rowGroup);
39
- return {
40
- shape: 'columnar-table',
41
- batchType: 'data',
42
- schema,
43
- data,
44
- length: rowGroup.rowCount
45
- };
46
- }
@@ -1,37 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.parseParquetFileInBatches = exports.parseParquet = void 0;
4
- // import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
5
- // import {ColumnarTableBatch} from '@loaders.gl/schema';
6
- const loader_utils_1 = require("@loaders.gl/loader-utils");
7
- const parquet_reader_1 = require("../../parquetjs/parser/parquet-reader");
8
- async function parseParquet(arrayBuffer, options) {
9
- const blob = new Blob([arrayBuffer]);
10
- const rows = [];
11
- for await (const batch of parseParquetFileInBatches(blob, options)) {
12
- // we have only one input batch so return
13
- for (const row of batch.data) {
14
- rows.push(row);
15
- }
16
- }
17
- return {
18
- shape: 'object-row-table',
19
- // TODO - spread can fail for very large number of batches
20
- data: rows
21
- };
22
- }
23
- exports.parseParquet = parseParquet;
24
- async function* parseParquetFileInBatches(blob, options) {
25
- const file = (0, loader_utils_1.makeReadableFile)(blob);
26
- const reader = new parquet_reader_1.ParquetReader(file);
27
- const rowBatches = reader.rowBatchIterator(options?.parquet);
28
- for await (const rows of rowBatches) {
29
- yield {
30
- shape: 'object-row-table',
31
- data: rows,
32
- batchType: 'data',
33
- length: rows.length
34
- };
35
- }
36
- }
37
- exports.parseParquetFileInBatches = parseParquetFileInBatches;
@@ -1,30 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.tableToIPC = exports.encode = void 0;
4
- const apache_arrow_1 = require("apache-arrow");
5
- const load_wasm_1 = require("./load-wasm");
6
- /**
7
- * Encode Arrow Table to Parquet buffer
8
- */
9
- async function encode(table, options) {
10
- const wasmUrl = options?.parquet?.wasmUrl;
11
- const wasm = await (0, load_wasm_1.loadWasm)(wasmUrl);
12
- const arrowIPCBytes = tableToIPC(table);
13
- // TODO: provide options for how to write table.
14
- const writerProperties = new wasm.WriterPropertiesBuilder().build();
15
- const parquetBytes = wasm.writeParquet(arrowIPCBytes, writerProperties);
16
- return parquetBytes.buffer.slice(parquetBytes.byteOffset, parquetBytes.byteLength + parquetBytes.byteOffset);
17
- }
18
- exports.encode = encode;
19
- /**
20
- * Serialize a {@link Table} to the IPC format. This function is a convenience
21
- * wrapper for {@link RecordBatchStreamWriter} and {@link RecordBatchFileWriter}.
22
- * Opposite of {@link tableFromIPC}.
23
- *
24
- * @param table The Table to serialize.
25
- * @param type Whether to serialize the Table as a file or a stream.
26
- */
27
- function tableToIPC(table) {
28
- return apache_arrow_1.RecordBatchStreamWriter.writeAll(table).toUint8Array(true);
29
- }
30
- exports.tableToIPC = tableToIPC;
@@ -1,5 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.loadWasm = void 0;
4
- var load_wasm_node_1 = require("./load-wasm-node");
5
- Object.defineProperty(exports, "loadWasm", { enumerable: true, get: function () { return load_wasm_node_1.loadWasm; } });
@@ -1,38 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || function (mod) {
19
- if (mod && mod.__esModule) return mod;
20
- var result = {};
21
- if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
22
- __setModuleDefault(result, mod);
23
- return result;
24
- };
25
- Object.defineProperty(exports, "__esModule", { value: true });
26
- exports.loadWasm = void 0;
27
- const wasmEsm = __importStar(require("parquet-wasm/esm2/arrow1"));
28
- let cached = null;
29
- async function loadWasm(wasmUrl) {
30
- if (cached !== null) {
31
- return cached;
32
- }
33
- // For ESM bundles, need to await the default export, which loads the WASM
34
- await wasmEsm.default(wasmUrl);
35
- cached = wasmEsm;
36
- return wasmEsm;
37
- }
38
- exports.loadWasm = loadWasm;