@loaders.gl/parquet 3.3.2 → 3.4.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/dist/dist.min.js +16 -16
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/index.js +46 -5
  4. package/dist/es5/index.js.map +1 -1
  5. package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
  6. package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  7. package/dist/es5/lib/arrow/convert-row-group-to-columns.js +20 -0
  8. package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  9. package/dist/es5/lib/arrow/convert-schema-from-parquet.js +98 -0
  10. package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  11. package/dist/es5/lib/{convert-schema.js → arrow/convert-schema-to-parquet.js} +5 -31
  12. package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  13. package/dist/es5/lib/geo/decode-geo-metadata.js +82 -0
  14. package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
  15. package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
  16. package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
  17. package/dist/es5/lib/parsers/parse-parquet-to-columns.js +177 -0
  18. package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  19. package/dist/es5/lib/{parse-parquet.js → parsers/parse-parquet-to-rows.js} +2 -2
  20. package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  21. package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
  22. package/dist/es5/parquet-loader.js +3 -2
  23. package/dist/es5/parquet-loader.js.map +1 -1
  24. package/dist/es5/parquet-wasm-loader.js +1 -1
  25. package/dist/es5/parquet-wasm-loader.js.map +1 -1
  26. package/dist/es5/parquet-wasm-writer.js +1 -1
  27. package/dist/es5/parquet-wasm-writer.js.map +1 -1
  28. package/dist/es5/parquet-writer.js +1 -1
  29. package/dist/es5/parquet-writer.js.map +1 -1
  30. package/dist/esm/index.js +12 -2
  31. package/dist/esm/index.js.map +1 -1
  32. package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
  33. package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  34. package/dist/esm/lib/arrow/convert-row-group-to-columns.js +10 -0
  35. package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  36. package/dist/esm/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +32 -16
  37. package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  38. package/dist/esm/lib/arrow/convert-schema-to-parquet.js +40 -0
  39. package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  40. package/dist/esm/lib/geo/decode-geo-metadata.js +64 -0
  41. package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
  42. package/dist/esm/lib/geo/geoparquet-schema.js +78 -0
  43. package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
  44. package/dist/esm/lib/parsers/parse-parquet-to-columns.js +37 -0
  45. package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  46. package/dist/esm/lib/{parse-parquet.js → parsers/parse-parquet-to-rows.js} +2 -2
  47. package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  48. package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
  49. package/dist/esm/parquet-loader.js +3 -2
  50. package/dist/esm/parquet-loader.js.map +1 -1
  51. package/dist/esm/parquet-wasm-loader.js +1 -1
  52. package/dist/esm/parquet-wasm-loader.js.map +1 -1
  53. package/dist/esm/parquet-wasm-writer.js +1 -1
  54. package/dist/esm/parquet-wasm-writer.js.map +1 -1
  55. package/dist/esm/parquet-writer.js +1 -1
  56. package/dist/esm/parquet-writer.js.map +1 -1
  57. package/dist/index.d.ts +23 -3
  58. package/dist/index.d.ts.map +1 -1
  59. package/dist/index.js +24 -6
  60. package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
  61. package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
  62. package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
  63. package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
  64. package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
  65. package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
  66. package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
  67. package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
  68. package/dist/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -18
  69. package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
  70. package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
  71. package/dist/lib/arrow/convert-schema-to-parquet.js +72 -0
  72. package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
  73. package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
  74. package/dist/lib/geo/decode-geo-metadata.js +73 -0
  75. package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
  76. package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
  77. package/dist/lib/geo/geoparquet-schema.js +69 -0
  78. package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
  79. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
  80. package/dist/lib/parsers/parse-parquet-to-columns.js +40 -0
  81. package/dist/lib/parsers/parse-parquet-to-rows.d.ts +4 -0
  82. package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
  83. package/dist/lib/{parse-parquet.js → parsers/parse-parquet-to-rows.js} +1 -1
  84. package/dist/parquet-loader.d.ts +1 -0
  85. package/dist/parquet-loader.d.ts.map +1 -1
  86. package/dist/parquet-loader.js +2 -1
  87. package/dist/parquet-worker.js +19 -19
  88. package/dist/parquet-worker.js.map +3 -3
  89. package/package.json +5 -5
  90. package/src/index.ts +22 -2
  91. package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
  92. package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
  93. package/src/lib/{convert-schema.ts → arrow/convert-schema-from-parquet.ts} +41 -22
  94. package/src/lib/arrow/convert-schema-to-parquet.ts +102 -0
  95. package/src/lib/geo/decode-geo-metadata.ts +99 -0
  96. package/src/lib/geo/geoparquet-schema.ts +69 -0
  97. package/src/lib/parsers/parse-parquet-to-columns.ts +49 -0
  98. package/src/lib/{parse-parquet.ts → parsers/parse-parquet-to-rows.ts} +2 -2
  99. package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
  100. package/src/parquet-loader.ts +3 -1
  101. package/dist/es5/lib/convert-schema.js.map +0 -1
  102. package/dist/es5/lib/parse-parquet.js.map +0 -1
  103. package/dist/es5/lib/read-array-buffer.js +0 -43
  104. package/dist/es5/lib/read-array-buffer.js.map +0 -1
  105. package/dist/esm/lib/convert-schema.js.map +0 -1
  106. package/dist/esm/lib/parse-parquet.js.map +0 -1
  107. package/dist/esm/lib/read-array-buffer.js +0 -10
  108. package/dist/esm/lib/read-array-buffer.js.map +0 -1
  109. package/dist/lib/convert-schema.d.ts +0 -8
  110. package/dist/lib/convert-schema.d.ts.map +0 -1
  111. package/dist/lib/parse-parquet.d.ts +0 -4
  112. package/dist/lib/parse-parquet.d.ts.map +0 -1
  113. package/dist/lib/read-array-buffer.d.ts +0 -19
  114. package/dist/lib/read-array-buffer.d.ts.map +0 -1
  115. package/dist/lib/read-array-buffer.js +0 -29
  116. package/src/lib/read-array-buffer.ts +0 -31
  117. /package/dist/es5/lib/{convert-schema-deep.ts.disabled → wip/convert-schema-deep.java.disabled} +0 -0
  118. /package/dist/esm/lib/{convert-schema-deep.ts.disabled → wip/convert-schema-deep.java.disabled} +0 -0
  119. /package/src/lib/{convert-schema-deep.ts.disabled → wip/convert-schema-deep.java.disabled} +0 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@loaders.gl/parquet",
3
- "version": "3.3.2",
3
+ "version": "3.4.0-alpha.2",
4
4
  "description": "Framework-independent loader for Apache Parquet files",
5
5
  "license": "MIT",
6
6
  "publishConfig": {
@@ -42,9 +42,9 @@
42
42
  "./src/lib/wasm/load-wasm/load-wasm-node.ts": "./src/lib/wasm/load-wasm/load-wasm-browser.ts"
43
43
  },
44
44
  "dependencies": {
45
- "@loaders.gl/compression": "3.3.2",
46
- "@loaders.gl/loader-utils": "3.3.2",
47
- "@loaders.gl/schema": "3.3.2",
45
+ "@loaders.gl/compression": "3.4.0-alpha.2",
46
+ "@loaders.gl/loader-utils": "3.4.0-alpha.2",
47
+ "@loaders.gl/schema": "3.4.0-alpha.2",
48
48
  "async-mutex": "^0.2.2",
49
49
  "brotli": "^1.3.2",
50
50
  "bson": "^1.0.4",
@@ -70,5 +70,5 @@
70
70
  "@types/varint": "^5.0.0",
71
71
  "apache-arrow": "^4.0.0"
72
72
  },
73
- "gitHead": "a37b0af509ca05468aec6e5667d6a427b9c15d29"
73
+ "gitHead": "f1c00c124d8d0c41a138ff40afb0d1a00711bf2e"
74
74
  }
package/src/index.ts CHANGED
@@ -4,7 +4,11 @@ import type {LoaderWithParser} from '@loaders.gl/loader-utils';
4
4
 
5
5
  import {ParquetWasmLoader as ParquetWasmWorkerLoader} from './parquet-wasm-loader';
6
6
  import {ParquetLoader as ParquetWorkerLoader} from './parquet-loader';
7
- import {parseParquet, parseParquetFileInBatches} from './lib/parse-parquet';
7
+ import {parseParquet, parseParquetFileInBatches} from './lib/parsers/parse-parquet-to-rows';
8
+ import {
9
+ parseParquetInColumns,
10
+ parseParquetFileInColumnarBatches
11
+ } from './lib/parsers/parse-parquet-to-columns';
8
12
  import {parseParquet as parseParquetWasm} from './lib/wasm/parse-parquet-wasm';
9
13
 
10
14
  export {ParquetWorkerLoader, ParquetWasmWorkerLoader};
@@ -16,6 +20,13 @@ export const ParquetLoader = {
16
20
  parseFileInBatches: parseParquetFileInBatches
17
21
  };
18
22
 
23
+ /** ParquetJS table loader */
24
+ export const ParquetColumnarLoader = {
25
+ ...ParquetWorkerLoader,
26
+ parse: parseParquetInColumns,
27
+ parseFileInBatches: parseParquetFileInColumnarBatches
28
+ };
29
+
19
30
  export const ParquetWasmLoader = {
20
31
  ...ParquetWasmWorkerLoader,
21
32
  parse: parseParquetWasm
@@ -34,7 +45,16 @@ export {ParquetSchema} from './parquetjs/schema/schema';
34
45
  export {ParquetReader} from './parquetjs/parser/parquet-reader';
35
46
  export {ParquetEncoder} from './parquetjs/encoder/parquet-encoder';
36
47
 
37
- export {convertParquetToArrowSchema} from './lib/convert-schema';
48
+ export {
49
+ convertSchemaFromParquet,
50
+ convertSchemaFromParquet as convertParquetToArrowSchema
51
+ } from './lib/arrow/convert-schema-from-parquet';
38
52
 
39
53
  // TESTS
40
54
  export const _typecheckParquetLoader: LoaderWithParser = ParquetLoader;
55
+
56
+ // Geo Metadata
57
+ export {default as geoJSONSchema} from './lib/geo/geoparquet-schema';
58
+
59
+ export type {GeoMetadata} from './lib/geo/decode-geo-metadata';
60
+ export {getGeoMetadata, setGeoMetadata, unpackGeoMetadata} from './lib/geo/decode-geo-metadata';
File without changes
@@ -0,0 +1,15 @@
1
+ // loaders.gl, MIT license
2
+
3
+ import {Schema} from '@loaders.gl/schema';
4
+ import {ParquetBuffer} from '@loaders.gl/parquet/parquetjs/schema/declare';
5
+
6
+ export function convertParquetRowGroupToColumns(
7
+ schema: Schema,
8
+ rowGroup: ParquetBuffer
9
+ ): Record<string, any[]> {
10
+ const columns: Record<string, any[]> = {};
11
+ for (const [columnName, data] of Object.entries(rowGroup.columnData)) {
12
+ columns[columnName] = columns[columnName] || data.values;
13
+ }
14
+ return columns;
15
+ }
@@ -1,5 +1,8 @@
1
- import type {ParquetSchema} from '../parquetjs/schema/schema';
2
- import type {FieldDefinition, ParquetField, ParquetType} from '../parquetjs/schema/declare';
1
+ // loaders.gl, MIT license
2
+
3
+ import type {ParquetSchema} from '../../parquetjs/schema/schema';
4
+ import type {FieldDefinition, ParquetField, ParquetType} from '../../parquetjs/schema/declare';
5
+ import {FileMetaData} from '@loaders.gl/parquet/parquetjs/parquet-thrift';
3
6
 
4
7
  import {
5
8
  Schema,
@@ -45,7 +48,7 @@ export const PARQUET_TYPE_MAPPING: {[type in ParquetType]: typeof DataType} = {
45
48
  INT_64: Int64,
46
49
  JSON: Binary,
47
50
  BSON: Binary,
48
- // TODO check interal type
51
+ // TODO check interval type
49
52
  INTERVAL: Binary,
50
53
  DECIMAL_INT32: Float32,
51
54
  DECIMAL_INT64: Float64,
@@ -53,24 +56,13 @@ export const PARQUET_TYPE_MAPPING: {[type in ParquetType]: typeof DataType} = {
53
56
  DECIMAL_FIXED_LEN_BYTE_ARRAY: Float64
54
57
  };
55
58
 
56
- export function convertParquetToArrowSchema(parquetSchema: ParquetSchema): Schema {
59
+ export function convertSchemaFromParquet(
60
+ parquetSchema: ParquetSchema,
61
+ parquetMetadata?: FileMetaData
62
+ ): Schema {
57
63
  const fields = getFields(parquetSchema.schema);
58
-
59
- // TODO add metadata if needed.
60
- return new Schema(fields);
61
- }
62
-
63
- function getFieldMetadata(field: ParquetField): Map<string, string> {
64
- const metadata = new Map();
65
-
66
- for (const key in field) {
67
- if (key !== 'name') {
68
- const value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
69
- metadata.set(key, value);
70
- }
71
- }
72
-
73
- return metadata;
64
+ const metadata = parquetMetadata && getSchemaMetadata(parquetMetadata);
65
+ return new Schema(fields, metadata);
74
66
  }
75
67
 
76
68
  function getFields(schema: FieldDefinition): Field[] {
@@ -80,8 +72,8 @@ function getFields(schema: FieldDefinition): Field[] {
80
72
  const field = schema[name];
81
73
 
82
74
  if (field.fields) {
83
- const childField = getFields(field.fields);
84
- const nestedField = new Field(name, new Struct(childField), field.optional);
75
+ const childFields = getFields(field.fields);
76
+ const nestedField = new Field(name, new Struct(childFields), field.optional);
85
77
  fields.push(nestedField);
86
78
  } else {
87
79
  const FieldType = PARQUET_TYPE_MAPPING[field.type];
@@ -93,3 +85,30 @@ function getFields(schema: FieldDefinition): Field[] {
93
85
 
94
86
  return fields;
95
87
  }
88
+
89
+ function getFieldMetadata(field: ParquetField): Map<string, string> {
90
+ const metadata = new Map();
91
+
92
+ for (const key in field) {
93
+ if (key !== 'name') {
94
+ let value = field[key] || '';
95
+ value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
96
+ metadata.set(key, value);
97
+ }
98
+ }
99
+
100
+ return metadata;
101
+ }
102
+
103
+ function getSchemaMetadata(parquetMetadata: FileMetaData): Map<string, string> {
104
+ const metadata = new Map();
105
+
106
+ const keyValueList = parquetMetadata.key_value_metadata || [];
107
+ for (const {key, value} of keyValueList) {
108
+ if (typeof value === 'string') {
109
+ metadata.set(key, value);
110
+ }
111
+ }
112
+
113
+ return metadata;
114
+ }
@@ -0,0 +1,102 @@
1
+ // loaders.gl, MIT license
2
+
3
+ // import type {ParquetSchema} from '../../parquetjs/schema/schema';
4
+ import type {
5
+ // FieldDefinition, ParquetField,
6
+ ParquetType
7
+ } from '../../parquetjs/schema/declare';
8
+
9
+ import {
10
+ Schema,
11
+ // Struct,
12
+ // Field,
13
+ DataType,
14
+ Bool,
15
+ Float64,
16
+ Int32,
17
+ Float32,
18
+ Binary,
19
+ Utf8,
20
+ Int64,
21
+ Uint16,
22
+ Uint32,
23
+ Uint64,
24
+ Int8,
25
+ Int16
26
+ } from '@loaders.gl/schema';
27
+
28
+ export const PARQUET_TYPE_MAPPING: {[type in ParquetType]: typeof DataType} = {
29
+ BOOLEAN: Bool,
30
+ INT32: Int32,
31
+ INT64: Float64,
32
+ INT96: Float64,
33
+ FLOAT: Float32,
34
+ DOUBLE: Float64,
35
+ BYTE_ARRAY: Binary,
36
+ FIXED_LEN_BYTE_ARRAY: Binary,
37
+ UTF8: Utf8,
38
+ DATE: Int32,
39
+ TIME_MILLIS: Int64,
40
+ TIME_MICROS: Int64,
41
+ TIMESTAMP_MILLIS: Int64,
42
+ TIMESTAMP_MICROS: Int64,
43
+ UINT_8: Int32,
44
+ UINT_16: Uint16,
45
+ UINT_32: Uint32,
46
+ UINT_64: Uint64,
47
+ INT_8: Int8,
48
+ INT_16: Int16,
49
+ INT_32: Int32,
50
+ INT_64: Int64,
51
+ JSON: Binary,
52
+ BSON: Binary,
53
+ // TODO check interval type
54
+ INTERVAL: Binary,
55
+ DECIMAL_INT32: Float32,
56
+ DECIMAL_INT64: Float64,
57
+ DECIMAL_BYTE_ARRAY: Float64,
58
+ DECIMAL_FIXED_LEN_BYTE_ARRAY: Float64
59
+ };
60
+
61
+ export function convertToParquetSchema(schema: Schema): Schema {
62
+ const fields = []; // getFields(schema.fields);
63
+
64
+ // TODO add metadata if needed.
65
+ return new Schema(fields);
66
+ }
67
+
68
+ // function getFields(schema: Field[]): Definition[] {
69
+ // const fields: Field[] = [];
70
+
71
+ // for (const name in schema) {
72
+ // const field = schema[name];
73
+
74
+ // // @ts-ignore
75
+ // const children = field.children as DataType[];
76
+ // if (children) {
77
+ // const childField = getFields(field.fields);
78
+ // const nestedField = new Field(name, new Struct(childField), field.optional);
79
+ // fields.push(nestedField);
80
+ // } else {
81
+ // const FieldType = PARQUET_TYPE_MAPPING[field.type];
82
+ // const metadata = getFieldMetadata(field);
83
+ // const arrowField = new Field(name, new FieldType(), field.optional, metadata);
84
+ // fields.push(arrowField);
85
+ // }
86
+ // }
87
+
88
+ // return fields;
89
+ // }
90
+
91
+ // function getFieldMetadata(field: ParquetField): Map<string, string> {
92
+ // const metadata = new Map();
93
+
94
+ // for (const key in field) {
95
+ // if (key !== 'name') {
96
+ // const value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
97
+ // metadata.set(key, value);
98
+ // }
99
+ // }
100
+
101
+ // return metadata;
102
+ // }
@@ -0,0 +1,99 @@
1
+ // loaders.gl, MIT license
2
+ import {Schema, Field} from '@loaders.gl/schema';
3
+
4
+ /* eslint-disable camelcase */
5
+
6
+ /** A geoarrow / geoparquet geo metadata object (stored in stringified form in the top level metadata 'geo' key) */
7
+ export type GeoMetadata = {
8
+ version?: string;
9
+ primary_column?: string;
10
+ columns: Record<string, GeoColumnMetadata>;
11
+ [key: string]: unknown;
12
+ };
13
+
14
+ /** A geoarrow / geoparquet geo metadata for one geometry column */
15
+ export type GeoColumnMetadata = {
16
+ bounding_box?:
17
+ | [number, number, number, number]
18
+ | [number, number, number, number, number, number];
19
+ crs?: string;
20
+ geometry_type?: string[];
21
+ edges?: string;
22
+ [key: string]: unknown;
23
+ };
24
+
25
+ /**
26
+ * Reads the GeoMetadata object from the metadata
27
+ * @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata */
28
+ export function getGeoMetadata(schema: Schema): GeoMetadata | null {
29
+ const stringifiedGeoMetadata = schema.metadata.get('geo');
30
+ if (!stringifiedGeoMetadata) {
31
+ return null;
32
+ }
33
+
34
+ try {
35
+ const geoMetadata = JSON.parse(stringifiedGeoMetadata) as GeoMetadata;
36
+ return geoMetadata;
37
+ } catch {
38
+ return null;
39
+ }
40
+ }
41
+
42
+ /**
43
+ * Stores a geoarrow / geoparquet geo metadata object in the schema
44
+ * @note geoarrow / geoparquet geo metadata is a single stringified JSON field
45
+ */
46
+ export function setGeoMetadata(schema: Schema, geoMetadata: GeoMetadata): void {
47
+ const stringifiedGeoMetadata = JSON.stringify(geoMetadata);
48
+ schema.metadata.set('geo', stringifiedGeoMetadata);
49
+ }
50
+
51
+ /**
52
+ * Unpacks geo metadata into separate metadata fields (parses the long JSON string)
53
+ * @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata
54
+ */
55
+ export function unpackGeoMetadata(schema: Schema): void {
56
+ const geoMetadata = getGeoMetadata(schema);
57
+ if (!geoMetadata) {
58
+ return;
59
+ }
60
+
61
+ // Store Parquet Schema Level Metadata
62
+
63
+ const {version, primary_column, columns} = geoMetadata;
64
+ if (version) {
65
+ schema.metadata.set('geo.version', version);
66
+ }
67
+
68
+ if (primary_column) {
69
+ schema.metadata.set('geo.primary_column', primary_column);
70
+ }
71
+
72
+ // store column names as comma separated list
73
+ schema.metadata.set('geo.columns', Object.keys(columns || {}).join(''));
74
+
75
+ for (const [columnName, columnMetadata] of Object.entries(columns || {})) {
76
+ const field = schema.fields.find((field) => field.name === columnName);
77
+ if (field) {
78
+ if (field.name === primary_column) {
79
+ field.metadata.set('geo.primary_field', 'true');
80
+ }
81
+ unpackGeoFieldMetadata(field, columnMetadata);
82
+ }
83
+ }
84
+ }
85
+
86
+ function unpackGeoFieldMetadata(field: Field, columnMetadata): void {
87
+ for (const [key, value] of Object.entries(columnMetadata || {})) {
88
+ switch (key) {
89
+ case 'geometry_type':
90
+ field.metadata.set(`geo.${key}`, (value as string[]).join(','));
91
+ break;
92
+ case 'bbox':
93
+ case 'crs':
94
+ case 'edges':
95
+ default:
96
+ field.metadata.set(`geo.${key}`, typeof value === 'string' ? value : JSON.stringify(value));
97
+ }
98
+ }
99
+ }
@@ -0,0 +1,69 @@
1
+ // loaders.gl, MIT license
2
+
3
+ /* eslint-disable camelcase */
4
+
5
+ /**
6
+ * Geoparquet JSON schema for geo metadata
7
+ * @see https://github.com/geoarrow/geoarrow/blob/main/metadata.md
8
+ * @see https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md
9
+ */
10
+ export default {
11
+ $schema: 'http://json-schema.org/draft-07/schema#',
12
+ title: 'GeoParquet',
13
+ description: 'Parquet metadata included in the geo field.',
14
+ type: 'object',
15
+ required: ['version', 'primary_column', 'columns'],
16
+ properties: {
17
+ version: {type: 'string', const: '1.0.0-beta.1'},
18
+ primary_column: {type: 'string', minLength: 1},
19
+ columns: {
20
+ type: 'object',
21
+ minProperties: 1,
22
+ patternProperties: {
23
+ '.+': {
24
+ type: 'object',
25
+ required: ['encoding', 'geometry_types'],
26
+ properties: {
27
+ encoding: {type: 'string', const: 'WKB'},
28
+ geometry_types: {
29
+ type: 'array',
30
+ uniqueItems: true,
31
+ items: {
32
+ type: 'string',
33
+ pattern: '^(GeometryCollection|(Multi)?(Point|LineString|Polygon))( Z)?$'
34
+ }
35
+ },
36
+ crs: {
37
+ oneOf: [
38
+ {
39
+ $ref: 'https://proj.org/schemas/v0.5/projjson.schema.json'
40
+ },
41
+ {type: 'null'}
42
+ ]
43
+ },
44
+ edges: {type: 'string', enum: ['planar', 'spherical']},
45
+ orientation: {type: 'string', const: 'counterclockwise'},
46
+ bbox: {
47
+ type: 'array',
48
+ items: {type: 'number'},
49
+ oneOf: [
50
+ {
51
+ description: '2D bbox consisting of (xmin, ymin, xmax, ymax)',
52
+ minItems: 4,
53
+ maxItems: 4
54
+ },
55
+ {
56
+ description: '3D bbox consisting of (xmin, ymin, zmin, xmax, ymax, zmax)',
57
+ minItems: 6,
58
+ maxItems: 6
59
+ }
60
+ ]
61
+ },
62
+ epoch: {type: 'number'}
63
+ }
64
+ }
65
+ },
66
+ additionalProperties: false
67
+ }
68
+ }
69
+ };
@@ -0,0 +1,49 @@
1
+ // loaders.gl, MIT license
2
+
3
+ // import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
4
+ import {ColumnarTableBatch, Schema} from '@loaders.gl/schema';
5
+ import {makeReadableFile} from '@loaders.gl/loader-utils';
6
+ import type {ParquetLoaderOptions} from '../../parquet-loader';
7
+ import {ParquetReader} from '../../parquetjs/parser/parquet-reader';
8
+ import {ParquetBuffer} from '../../parquetjs/schema/declare';
9
+ import {convertSchemaFromParquet} from '../arrow/convert-schema-from-parquet';
10
+ import {convertParquetRowGroupToColumns} from '../arrow/convert-row-group-to-columns';
11
+ import {unpackGeoMetadata} from '../geo/decode-geo-metadata';
12
+
13
+ export async function parseParquetInColumns(
14
+ arrayBuffer: ArrayBuffer,
15
+ options?: ParquetLoaderOptions
16
+ ) {
17
+ const blob = new Blob([arrayBuffer]);
18
+ for await (const batch of parseParquetFileInColumnarBatches(blob, options)) {
19
+ return batch;
20
+ }
21
+ return null;
22
+ }
23
+
24
+ export async function* parseParquetFileInColumnarBatches(
25
+ blob: Blob,
26
+ options?: ParquetLoaderOptions
27
+ ): AsyncIterable<ColumnarTableBatch> {
28
+ const file = makeReadableFile(blob);
29
+ const reader = new ParquetReader(file);
30
+ const parquetSchema = await reader.getSchema();
31
+ const parquetMetadata = await reader.getFileMetadata();
32
+ const schema = convertSchemaFromParquet(parquetSchema, parquetMetadata);
33
+ unpackGeoMetadata(schema);
34
+ const rowGroups = reader.rowGroupIterator(options?.parquet);
35
+ for await (const rowGroup of rowGroups) {
36
+ yield convertRowGroupToTableBatch(schema, rowGroup);
37
+ }
38
+ }
39
+
40
+ function convertRowGroupToTableBatch(schema: Schema, rowGroup: ParquetBuffer): ColumnarTableBatch {
41
+ const data = convertParquetRowGroupToColumns(schema, rowGroup);
42
+ return {
43
+ shape: 'columnar-table',
44
+ batchType: 'data',
45
+ schema,
46
+ data,
47
+ length: rowGroup.rowCount
48
+ };
49
+ }
@@ -1,8 +1,8 @@
1
1
  // import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
2
2
  // import {ColumnarTableBatch} from '@loaders.gl/schema';
3
3
  import {makeReadableFile} from '@loaders.gl/loader-utils';
4
- import type {ParquetLoaderOptions} from '../parquet-loader';
5
- import {ParquetReader} from '../parquetjs/parser/parquet-reader';
4
+ import type {ParquetLoaderOptions} from '../../parquet-loader';
5
+ import {ParquetReader} from '../../parquetjs/parser/parquet-reader';
6
6
 
7
7
  export async function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
8
8
  const blob = new Blob([arrayBuffer]);