@loaders.gl/parquet 3.4.0-alpha.1 → 3.4.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/dist/dist.min.js +19 -19
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/index.js +49 -8
  4. package/dist/es5/index.js.map +1 -1
  5. package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
  6. package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  7. package/dist/es5/lib/arrow/convert-row-group-to-columns.js +20 -0
  8. package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  9. package/dist/es5/lib/arrow/convert-schema-from-parquet.js +98 -0
  10. package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  11. package/dist/es5/lib/{convert-schema.js → arrow/convert-schema-to-parquet.js} +5 -31
  12. package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  13. package/dist/es5/lib/geo/decode-geo-metadata.js +82 -0
  14. package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
  15. package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
  16. package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
  17. package/dist/es5/lib/parsers/parse-parquet-to-columns.js +177 -0
  18. package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  19. package/dist/es5/lib/{parse-parquet.js → parsers/parse-parquet-to-rows.js} +51 -27
  20. package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  21. package/dist/es5/lib/wip/convert-schema-deep.java.disabled +910 -0
  22. package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
  23. package/dist/es5/parquet-loader.js +4 -2
  24. package/dist/es5/parquet-loader.js.map +1 -1
  25. package/dist/es5/parquet-wasm-loader.js +1 -1
  26. package/dist/es5/parquet-wasm-writer.js +1 -1
  27. package/dist/es5/parquet-writer.js +1 -1
  28. package/dist/es5/parquetjs/compression.js +15 -5
  29. package/dist/es5/parquetjs/compression.js.map +1 -1
  30. package/dist/es5/parquetjs/encoder/{writer.js → parquet-encoder.js} +70 -158
  31. package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -0
  32. package/dist/es5/parquetjs/parser/parquet-reader.js +553 -222
  33. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  34. package/dist/es5/parquetjs/schema/declare.js +3 -1
  35. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  36. package/dist/es5/parquetjs/schema/shred.js +39 -33
  37. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  38. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  39. package/dist/es5/parquetjs/utils/file-utils.js +2 -3
  40. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  41. package/dist/esm/index.js +13 -3
  42. package/dist/esm/index.js.map +1 -1
  43. package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
  44. package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  45. package/dist/esm/lib/arrow/convert-row-group-to-columns.js +10 -0
  46. package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  47. package/dist/esm/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +32 -16
  48. package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  49. package/dist/esm/lib/arrow/convert-schema-to-parquet.js +40 -0
  50. package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  51. package/dist/esm/lib/geo/decode-geo-metadata.js +64 -0
  52. package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
  53. package/dist/esm/lib/geo/geoparquet-schema.js +78 -0
  54. package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
  55. package/dist/esm/lib/parsers/parse-parquet-to-columns.js +37 -0
  56. package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  57. package/dist/esm/lib/parsers/parse-parquet-to-rows.js +19 -0
  58. package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  59. package/dist/esm/lib/wip/convert-schema-deep.java.disabled +910 -0
  60. package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
  61. package/dist/esm/parquet-loader.js +4 -2
  62. package/dist/esm/parquet-loader.js.map +1 -1
  63. package/dist/esm/parquet-wasm-loader.js +1 -1
  64. package/dist/esm/parquet-wasm-writer.js +1 -1
  65. package/dist/esm/parquet-writer.js +1 -1
  66. package/dist/esm/parquetjs/compression.js +10 -1
  67. package/dist/esm/parquetjs/compression.js.map +1 -1
  68. package/dist/esm/parquetjs/encoder/{writer.js → parquet-encoder.js} +7 -37
  69. package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -0
  70. package/dist/esm/parquetjs/parser/parquet-reader.js +158 -72
  71. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
  72. package/dist/esm/parquetjs/schema/declare.js +1 -0
  73. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  74. package/dist/esm/parquetjs/schema/shred.js +42 -34
  75. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  76. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  77. package/dist/esm/parquetjs/utils/file-utils.js +1 -1
  78. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
  79. package/dist/index.d.ts +24 -4
  80. package/dist/index.d.ts.map +1 -1
  81. package/dist/index.js +26 -9
  82. package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
  83. package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
  84. package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
  85. package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
  86. package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
  87. package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
  88. package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
  89. package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
  90. package/dist/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -18
  91. package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
  92. package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
  93. package/dist/lib/arrow/convert-schema-to-parquet.js +72 -0
  94. package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
  95. package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
  96. package/dist/lib/geo/decode-geo-metadata.js +73 -0
  97. package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
  98. package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
  99. package/dist/lib/geo/geoparquet-schema.js +69 -0
  100. package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
  101. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
  102. package/dist/lib/parsers/parse-parquet-to-columns.js +40 -0
  103. package/dist/lib/parsers/parse-parquet-to-rows.d.ts +4 -0
  104. package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
  105. package/dist/lib/parsers/parse-parquet-to-rows.js +40 -0
  106. package/dist/parquet-loader.d.ts +2 -0
  107. package/dist/parquet-loader.d.ts.map +1 -1
  108. package/dist/parquet-loader.js +3 -1
  109. package/dist/parquet-worker.js +20 -20
  110. package/dist/parquet-worker.js.map +3 -3
  111. package/dist/parquetjs/compression.d.ts.map +1 -1
  112. package/dist/parquetjs/compression.js +16 -5
  113. package/dist/parquetjs/encoder/{writer.d.ts → parquet-encoder.d.ts} +10 -19
  114. package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
  115. package/dist/parquetjs/encoder/{writer.js → parquet-encoder.js} +39 -37
  116. package/dist/parquetjs/parser/parquet-reader.d.ts +47 -57
  117. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  118. package/dist/parquetjs/parser/parquet-reader.js +168 -102
  119. package/dist/parquetjs/schema/declare.d.ts +14 -7
  120. package/dist/parquetjs/schema/declare.d.ts.map +1 -1
  121. package/dist/parquetjs/schema/declare.js +2 -0
  122. package/dist/parquetjs/schema/shred.d.ts +115 -0
  123. package/dist/parquetjs/schema/shred.d.ts.map +1 -1
  124. package/dist/parquetjs/schema/shred.js +161 -43
  125. package/dist/parquetjs/schema/types.d.ts +2 -2
  126. package/dist/parquetjs/schema/types.d.ts.map +1 -1
  127. package/dist/parquetjs/utils/file-utils.d.ts +3 -4
  128. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
  129. package/dist/parquetjs/utils/file-utils.js +2 -5
  130. package/package.json +7 -5
  131. package/src/index.ts +24 -4
  132. package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
  133. package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
  134. package/src/lib/{convert-schema.ts → arrow/convert-schema-from-parquet.ts} +41 -22
  135. package/src/lib/arrow/convert-schema-to-parquet.ts +102 -0
  136. package/src/lib/geo/decode-geo-metadata.ts +99 -0
  137. package/src/lib/geo/geoparquet-schema.ts +69 -0
  138. package/src/lib/parsers/parse-parquet-to-columns.ts +49 -0
  139. package/src/lib/parsers/parse-parquet-to-rows.ts +40 -0
  140. package/src/lib/wip/convert-schema-deep.java.disabled +910 -0
  141. package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
  142. package/src/parquet-loader.ts +5 -1
  143. package/src/parquetjs/compression.ts +14 -1
  144. package/src/parquetjs/encoder/{writer.ts → parquet-encoder.ts} +22 -28
  145. package/src/parquetjs/parser/parquet-reader.ts +239 -122
  146. package/src/parquetjs/schema/declare.ts +17 -9
  147. package/src/parquetjs/schema/shred.ts +157 -28
  148. package/src/parquetjs/schema/types.ts +21 -27
  149. package/src/parquetjs/utils/file-utils.ts +3 -4
  150. package/dist/es5/lib/convert-schema.js.map +0 -1
  151. package/dist/es5/lib/parse-parquet.js.map +0 -1
  152. package/dist/es5/lib/read-array-buffer.js +0 -43
  153. package/dist/es5/lib/read-array-buffer.js.map +0 -1
  154. package/dist/es5/parquetjs/encoder/writer.js.map +0 -1
  155. package/dist/es5/parquetjs/file.js +0 -94
  156. package/dist/es5/parquetjs/file.js.map +0 -1
  157. package/dist/es5/parquetjs/parser/parquet-cursor.js +0 -183
  158. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +0 -1
  159. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +0 -327
  160. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  161. package/dist/es5/parquetjs/utils/buffer-utils.js +0 -19
  162. package/dist/es5/parquetjs/utils/buffer-utils.js.map +0 -1
  163. package/dist/esm/lib/convert-schema.js.map +0 -1
  164. package/dist/esm/lib/parse-parquet.js +0 -25
  165. package/dist/esm/lib/parse-parquet.js.map +0 -1
  166. package/dist/esm/lib/read-array-buffer.js +0 -10
  167. package/dist/esm/lib/read-array-buffer.js.map +0 -1
  168. package/dist/esm/parquetjs/encoder/writer.js.map +0 -1
  169. package/dist/esm/parquetjs/file.js +0 -81
  170. package/dist/esm/parquetjs/file.js.map +0 -1
  171. package/dist/esm/parquetjs/parser/parquet-cursor.js +0 -78
  172. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +0 -1
  173. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +0 -129
  174. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  175. package/dist/esm/parquetjs/utils/buffer-utils.js +0 -13
  176. package/dist/esm/parquetjs/utils/buffer-utils.js.map +0 -1
  177. package/dist/lib/convert-schema.d.ts +0 -8
  178. package/dist/lib/convert-schema.d.ts.map +0 -1
  179. package/dist/lib/parse-parquet.d.ts +0 -4
  180. package/dist/lib/parse-parquet.d.ts.map +0 -1
  181. package/dist/lib/parse-parquet.js +0 -28
  182. package/dist/lib/read-array-buffer.d.ts +0 -19
  183. package/dist/lib/read-array-buffer.d.ts.map +0 -1
  184. package/dist/lib/read-array-buffer.js +0 -29
  185. package/dist/parquetjs/encoder/writer.d.ts.map +0 -1
  186. package/dist/parquetjs/file.d.ts +0 -10
  187. package/dist/parquetjs/file.d.ts.map +0 -1
  188. package/dist/parquetjs/file.js +0 -99
  189. package/dist/parquetjs/parser/parquet-cursor.d.ts +0 -36
  190. package/dist/parquetjs/parser/parquet-cursor.d.ts.map +0 -1
  191. package/dist/parquetjs/parser/parquet-cursor.js +0 -74
  192. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +0 -40
  193. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +0 -1
  194. package/dist/parquetjs/parser/parquet-envelope-reader.js +0 -136
  195. package/dist/parquetjs/utils/buffer-utils.d.ts +0 -10
  196. package/dist/parquetjs/utils/buffer-utils.d.ts.map +0 -1
  197. package/dist/parquetjs/utils/buffer-utils.js +0 -22
  198. package/src/lib/parse-parquet.ts +0 -27
  199. package/src/lib/read-array-buffer.ts +0 -31
  200. package/src/parquetjs/file.ts +0 -90
  201. package/src/parquetjs/parser/parquet-cursor.ts +0 -94
  202. package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -199
  203. package/src/parquetjs/utils/buffer-utils.ts +0 -18
@@ -0,0 +1,7 @@
1
+ import type { ParquetType } from '../../parquetjs/schema/declare';
2
+ import { Schema, DataType } from '@loaders.gl/schema';
3
+ export declare const PARQUET_TYPE_MAPPING: {
4
+ [type in ParquetType]: typeof DataType;
5
+ };
6
+ export declare function convertToParquetSchema(schema: Schema): Schema;
7
+ //# sourceMappingURL=convert-schema-to-parquet.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"convert-schema-to-parquet.d.ts","sourceRoot":"","sources":["../../../src/lib/arrow/convert-schema-to-parquet.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAEV,WAAW,EACZ,MAAM,gCAAgC,CAAC;AAExC,OAAO,EACL,MAAM,EAGN,QAAQ,EAaT,MAAM,oBAAoB,CAAC;AAE5B,eAAO,MAAM,oBAAoB,EAAE;KAAE,IAAI,IAAI,WAAW,GAAG,OAAO,QAAQ;CA+BzE,CAAC;AAEF,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAK7D"}
@@ -0,0 +1,72 @@
1
+ "use strict";
2
+ // loaders.gl, MIT license
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.convertToParquetSchema = exports.PARQUET_TYPE_MAPPING = void 0;
5
+ const schema_1 = require("@loaders.gl/schema");
6
+ exports.PARQUET_TYPE_MAPPING = {
7
+ BOOLEAN: schema_1.Bool,
8
+ INT32: schema_1.Int32,
9
+ INT64: schema_1.Float64,
10
+ INT96: schema_1.Float64,
11
+ FLOAT: schema_1.Float32,
12
+ DOUBLE: schema_1.Float64,
13
+ BYTE_ARRAY: schema_1.Binary,
14
+ FIXED_LEN_BYTE_ARRAY: schema_1.Binary,
15
+ UTF8: schema_1.Utf8,
16
+ DATE: schema_1.Int32,
17
+ TIME_MILLIS: schema_1.Int64,
18
+ TIME_MICROS: schema_1.Int64,
19
+ TIMESTAMP_MILLIS: schema_1.Int64,
20
+ TIMESTAMP_MICROS: schema_1.Int64,
21
+ UINT_8: schema_1.Int32,
22
+ UINT_16: schema_1.Uint16,
23
+ UINT_32: schema_1.Uint32,
24
+ UINT_64: schema_1.Uint64,
25
+ INT_8: schema_1.Int8,
26
+ INT_16: schema_1.Int16,
27
+ INT_32: schema_1.Int32,
28
+ INT_64: schema_1.Int64,
29
+ JSON: schema_1.Binary,
30
+ BSON: schema_1.Binary,
31
+ // TODO check interval type
32
+ INTERVAL: schema_1.Binary,
33
+ DECIMAL_INT32: schema_1.Float32,
34
+ DECIMAL_INT64: schema_1.Float64,
35
+ DECIMAL_BYTE_ARRAY: schema_1.Float64,
36
+ DECIMAL_FIXED_LEN_BYTE_ARRAY: schema_1.Float64
37
+ };
38
+ function convertToParquetSchema(schema) {
39
+ const fields = []; // getFields(schema.fields);
40
+ // TODO add metadata if needed.
41
+ return new schema_1.Schema(fields);
42
+ }
43
+ exports.convertToParquetSchema = convertToParquetSchema;
44
+ // function getFields(schema: Field[]): Definition[] {
45
+ // const fields: Field[] = [];
46
+ // for (const name in schema) {
47
+ // const field = schema[name];
48
+ // // @ts-ignore
49
+ // const children = field.children as DataType[];
50
+ // if (children) {
51
+ // const childField = getFields(field.fields);
52
+ // const nestedField = new Field(name, new Struct(childField), field.optional);
53
+ // fields.push(nestedField);
54
+ // } else {
55
+ // const FieldType = PARQUET_TYPE_MAPPING[field.type];
56
+ // const metadata = getFieldMetadata(field);
57
+ // const arrowField = new Field(name, new FieldType(), field.optional, metadata);
58
+ // fields.push(arrowField);
59
+ // }
60
+ // }
61
+ // return fields;
62
+ // }
63
+ // function getFieldMetadata(field: ParquetField): Map<string, string> {
64
+ // const metadata = new Map();
65
+ // for (const key in field) {
66
+ // if (key !== 'name') {
67
+ // const value = typeof field[key] !== 'string' ? JSON.stringify(field[key]) : field[key];
68
+ // metadata.set(key, value);
69
+ // }
70
+ // }
71
+ // return metadata;
72
+ // }
@@ -0,0 +1,31 @@
1
+ import { Schema } from '@loaders.gl/schema';
2
+ /** A geoarrow / geoparquet geo metadata object (stored in stringified form in the top level metadata 'geo' key) */
3
+ export type GeoMetadata = {
4
+ version?: string;
5
+ primary_column?: string;
6
+ columns: Record<string, GeoColumnMetadata>;
7
+ [key: string]: unknown;
8
+ };
9
+ /** A geoarrow / geoparquet geo metadata for one geometry column */
10
+ export type GeoColumnMetadata = {
11
+ bounding_box?: [number, number, number, number] | [number, number, number, number, number, number];
12
+ crs?: string;
13
+ geometry_type?: string[];
14
+ edges?: string;
15
+ [key: string]: unknown;
16
+ };
17
+ /**
18
+ * Reads the GeoMetadata object from the metadata
19
+ * @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata */
20
+ export declare function getGeoMetadata(schema: Schema): GeoMetadata | null;
21
+ /**
22
+ * Stores a geoarrow / geoparquet geo metadata object in the schema
23
+ * @note geoarrow / geoparquet geo metadata is a single stringified JSON field
24
+ */
25
+ export declare function setGeoMetadata(schema: Schema, geoMetadata: GeoMetadata): void;
26
+ /**
27
+ * Unpacks geo metadata into separate metadata fields (parses the long JSON string)
28
+ * @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata
29
+ */
30
+ export declare function unpackGeoMetadata(schema: Schema): void;
31
+ //# sourceMappingURL=decode-geo-metadata.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"decode-geo-metadata.d.ts","sourceRoot":"","sources":["../../../src/lib/geo/decode-geo-metadata.ts"],"names":[],"mappings":"AACA,OAAO,EAAC,MAAM,EAAQ,MAAM,oBAAoB,CAAC;AAIjD,mHAAmH;AACnH,MAAM,MAAM,WAAW,GAAG;IACxB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,iBAAiB,CAAC,CAAC;IAC3C,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB,CAAC;AAEF,oEAAoE;AACpE,MAAM,MAAM,iBAAiB,GAAG;IAC9B,YAAY,CAAC,EACT,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,GAChC,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IACrD,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;CACxB,CAAC;AAEF;;yGAEyG;AACzG,wBAAgB,cAAc,CAAC,MAAM,EAAE,MAAM,GAAG,WAAW,GAAG,IAAI,CAYjE;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,WAAW,GAAG,IAAI,CAG7E;AAED;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CA6BtD"}
@@ -0,0 +1,73 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.unpackGeoMetadata = exports.setGeoMetadata = exports.getGeoMetadata = void 0;
4
+ /**
5
+ * Reads the GeoMetadata object from the metadata
6
+ * @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata */
7
+ function getGeoMetadata(schema) {
8
+ const stringifiedGeoMetadata = schema.metadata.get('geo');
9
+ if (!stringifiedGeoMetadata) {
10
+ return null;
11
+ }
12
+ try {
13
+ const geoMetadata = JSON.parse(stringifiedGeoMetadata);
14
+ return geoMetadata;
15
+ }
16
+ catch {
17
+ return null;
18
+ }
19
+ }
20
+ exports.getGeoMetadata = getGeoMetadata;
21
+ /**
22
+ * Stores a geoarrow / geoparquet geo metadata object in the schema
23
+ * @note geoarrow / geoparquet geo metadata is a single stringified JSON field
24
+ */
25
+ function setGeoMetadata(schema, geoMetadata) {
26
+ const stringifiedGeoMetadata = JSON.stringify(geoMetadata);
27
+ schema.metadata.set('geo', stringifiedGeoMetadata);
28
+ }
29
+ exports.setGeoMetadata = setGeoMetadata;
30
+ /**
31
+ * Unpacks geo metadata into separate metadata fields (parses the long JSON string)
32
+ * @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata
33
+ */
34
+ function unpackGeoMetadata(schema) {
35
+ const geoMetadata = getGeoMetadata(schema);
36
+ if (!geoMetadata) {
37
+ return;
38
+ }
39
+ // Store Parquet Schema Level Metadata
40
+ const { version, primary_column, columns } = geoMetadata;
41
+ if (version) {
42
+ schema.metadata.set('geo.version', version);
43
+ }
44
+ if (primary_column) {
45
+ schema.metadata.set('geo.primary_column', primary_column);
46
+ }
47
+ // store column names as comma separated list
48
+ schema.metadata.set('geo.columns', Object.keys(columns || {}).join(''));
49
+ for (const [columnName, columnMetadata] of Object.entries(columns || {})) {
50
+ const field = schema.fields.find((field) => field.name === columnName);
51
+ if (field) {
52
+ if (field.name === primary_column) {
53
+ field.metadata.set('geo.primary_field', 'true');
54
+ }
55
+ unpackGeoFieldMetadata(field, columnMetadata);
56
+ }
57
+ }
58
+ }
59
+ exports.unpackGeoMetadata = unpackGeoMetadata;
60
+ function unpackGeoFieldMetadata(field, columnMetadata) {
61
+ for (const [key, value] of Object.entries(columnMetadata || {})) {
62
+ switch (key) {
63
+ case 'geometry_type':
64
+ field.metadata.set(`geo.${key}`, value.join(','));
65
+ break;
66
+ case 'bbox':
67
+ case 'crs':
68
+ case 'edges':
69
+ default:
70
+ field.metadata.set(`geo.${key}`, typeof value === 'string' ? value : JSON.stringify(value));
71
+ }
72
+ }
73
+ }
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Geoparquet JSON schema for geo metadata
3
+ * @see https://github.com/geoarrow/geoarrow/blob/main/metadata.md
4
+ * @see https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md
5
+ */
6
+ declare const _default: {
7
+ $schema: string;
8
+ title: string;
9
+ description: string;
10
+ type: string;
11
+ required: string[];
12
+ properties: {
13
+ version: {
14
+ type: string;
15
+ const: string;
16
+ };
17
+ primary_column: {
18
+ type: string;
19
+ minLength: number;
20
+ };
21
+ columns: {
22
+ type: string;
23
+ minProperties: number;
24
+ patternProperties: {
25
+ '.+': {
26
+ type: string;
27
+ required: string[];
28
+ properties: {
29
+ encoding: {
30
+ type: string;
31
+ const: string;
32
+ };
33
+ geometry_types: {
34
+ type: string;
35
+ uniqueItems: boolean;
36
+ items: {
37
+ type: string;
38
+ pattern: string;
39
+ };
40
+ };
41
+ crs: {
42
+ oneOf: ({
43
+ $ref: string;
44
+ type?: undefined;
45
+ } | {
46
+ type: string;
47
+ $ref?: undefined;
48
+ })[];
49
+ };
50
+ edges: {
51
+ type: string;
52
+ enum: string[];
53
+ };
54
+ orientation: {
55
+ type: string;
56
+ const: string;
57
+ };
58
+ bbox: {
59
+ type: string;
60
+ items: {
61
+ type: string;
62
+ };
63
+ oneOf: {
64
+ description: string;
65
+ minItems: number;
66
+ maxItems: number;
67
+ }[];
68
+ };
69
+ epoch: {
70
+ type: string;
71
+ };
72
+ };
73
+ };
74
+ };
75
+ additionalProperties: boolean;
76
+ };
77
+ };
78
+ };
79
+ export default _default;
80
+ //# sourceMappingURL=geoparquet-schema.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"geoparquet-schema.d.ts","sourceRoot":"","sources":["../../../src/lib/geo/geoparquet-schema.ts"],"names":[],"mappings":"AAIA;;;;GAIG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AACH,wBA2DE"}
@@ -0,0 +1,69 @@
1
+ "use strict";
2
+ // loaders.gl, MIT license
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ /* eslint-disable camelcase */
5
+ /**
6
+ * Geoparquet JSON schema for geo metadata
7
+ * @see https://github.com/geoarrow/geoarrow/blob/main/metadata.md
8
+ * @see https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md
9
+ */
10
+ exports.default = {
11
+ $schema: 'http://json-schema.org/draft-07/schema#',
12
+ title: 'GeoParquet',
13
+ description: 'Parquet metadata included in the geo field.',
14
+ type: 'object',
15
+ required: ['version', 'primary_column', 'columns'],
16
+ properties: {
17
+ version: { type: 'string', const: '1.0.0-beta.1' },
18
+ primary_column: { type: 'string', minLength: 1 },
19
+ columns: {
20
+ type: 'object',
21
+ minProperties: 1,
22
+ patternProperties: {
23
+ '.+': {
24
+ type: 'object',
25
+ required: ['encoding', 'geometry_types'],
26
+ properties: {
27
+ encoding: { type: 'string', const: 'WKB' },
28
+ geometry_types: {
29
+ type: 'array',
30
+ uniqueItems: true,
31
+ items: {
32
+ type: 'string',
33
+ pattern: '^(GeometryCollection|(Multi)?(Point|LineString|Polygon))( Z)?$'
34
+ }
35
+ },
36
+ crs: {
37
+ oneOf: [
38
+ {
39
+ $ref: 'https://proj.org/schemas/v0.5/projjson.schema.json'
40
+ },
41
+ { type: 'null' }
42
+ ]
43
+ },
44
+ edges: { type: 'string', enum: ['planar', 'spherical'] },
45
+ orientation: { type: 'string', const: 'counterclockwise' },
46
+ bbox: {
47
+ type: 'array',
48
+ items: { type: 'number' },
49
+ oneOf: [
50
+ {
51
+ description: '2D bbox consisting of (xmin, ymin, xmax, ymax)',
52
+ minItems: 4,
53
+ maxItems: 4
54
+ },
55
+ {
56
+ description: '3D bbox consisting of (xmin, ymin, zmin, xmax, ymax, zmax)',
57
+ minItems: 6,
58
+ maxItems: 6
59
+ }
60
+ ]
61
+ },
62
+ epoch: { type: 'number' }
63
+ }
64
+ }
65
+ },
66
+ additionalProperties: false
67
+ }
68
+ }
69
+ };
@@ -0,0 +1,5 @@
1
+ import { ColumnarTableBatch } from '@loaders.gl/schema';
2
+ import type { ParquetLoaderOptions } from '../../parquet-loader';
3
+ export declare function parseParquetInColumns(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions): Promise<ColumnarTableBatch | null>;
4
+ export declare function parseParquetFileInColumnarBatches(blob: Blob, options?: ParquetLoaderOptions): AsyncIterable<ColumnarTableBatch>;
5
+ //# sourceMappingURL=parse-parquet-to-columns.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parse-parquet-to-columns.d.ts","sourceRoot":"","sources":["../../../src/lib/parsers/parse-parquet-to-columns.ts"],"names":[],"mappings":"AAGA,OAAO,EAAC,kBAAkB,EAAS,MAAM,oBAAoB,CAAC;AAE9D,OAAO,KAAK,EAAC,oBAAoB,EAAC,MAAM,sBAAsB,CAAC;AAO/D,wBAAsB,qBAAqB,CACzC,WAAW,EAAE,WAAW,EACxB,OAAO,CAAC,EAAE,oBAAoB,sCAO/B;AAED,wBAAuB,iCAAiC,CACtD,IAAI,EAAE,IAAI,EACV,OAAO,CAAC,EAAE,oBAAoB,GAC7B,aAAa,CAAC,kBAAkB,CAAC,CAWnC"}
@@ -0,0 +1,40 @@
1
+ "use strict";
2
+ // loaders.gl, MIT license
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.parseParquetFileInColumnarBatches = exports.parseParquetInColumns = void 0;
5
+ const loader_utils_1 = require("@loaders.gl/loader-utils");
6
+ const parquet_reader_1 = require("../../parquetjs/parser/parquet-reader");
7
+ const convert_schema_from_parquet_1 = require("../arrow/convert-schema-from-parquet");
8
+ const convert_row_group_to_columns_1 = require("../arrow/convert-row-group-to-columns");
9
+ const decode_geo_metadata_1 = require("../geo/decode-geo-metadata");
10
+ async function parseParquetInColumns(arrayBuffer, options) {
11
+ const blob = new Blob([arrayBuffer]);
12
+ for await (const batch of parseParquetFileInColumnarBatches(blob, options)) {
13
+ return batch;
14
+ }
15
+ return null;
16
+ }
17
+ exports.parseParquetInColumns = parseParquetInColumns;
18
+ async function* parseParquetFileInColumnarBatches(blob, options) {
19
+ const file = (0, loader_utils_1.makeReadableFile)(blob);
20
+ const reader = new parquet_reader_1.ParquetReader(file);
21
+ const parquetSchema = await reader.getSchema();
22
+ const parquetMetadata = await reader.getFileMetadata();
23
+ const schema = (0, convert_schema_from_parquet_1.convertSchemaFromParquet)(parquetSchema, parquetMetadata);
24
+ (0, decode_geo_metadata_1.unpackGeoMetadata)(schema);
25
+ const rowGroups = reader.rowGroupIterator(options?.parquet);
26
+ for await (const rowGroup of rowGroups) {
27
+ yield convertRowGroupToTableBatch(schema, rowGroup);
28
+ }
29
+ }
30
+ exports.parseParquetFileInColumnarBatches = parseParquetFileInColumnarBatches;
31
+ function convertRowGroupToTableBatch(schema, rowGroup) {
32
+ const data = (0, convert_row_group_to_columns_1.convertParquetRowGroupToColumns)(schema, rowGroup);
33
+ return {
34
+ shape: 'columnar-table',
35
+ batchType: 'data',
36
+ schema,
37
+ data,
38
+ length: rowGroup.rowCount
39
+ };
40
+ }
@@ -0,0 +1,4 @@
1
+ import type { ParquetLoaderOptions } from '../../parquet-loader';
2
+ export declare function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions): Promise<import("../../parquetjs/schema/declare").ParquetRecord[] | null>;
3
+ export declare function parseParquetFileInBatches(blob: Blob, options?: ParquetLoaderOptions): AsyncGenerator<import("../../parquetjs/schema/declare").ParquetRecord[], void, unknown>;
4
+ //# sourceMappingURL=parse-parquet-to-rows.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parse-parquet-to-rows.d.ts","sourceRoot":"","sources":["../../../src/lib/parsers/parse-parquet-to-rows.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAC,oBAAoB,EAAC,MAAM,sBAAsB,CAAC;AAG/D,wBAAsB,YAAY,CAAC,WAAW,EAAE,WAAW,EAAE,OAAO,CAAC,EAAE,oBAAoB,4EAM1F;AAED,wBAAuB,yBAAyB,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,EAAE,oBAAoB,2FAO1F"}
@@ -0,0 +1,40 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.parseParquetFileInBatches = exports.parseParquet = void 0;
4
+ // import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
5
+ // import {ColumnarTableBatch} from '@loaders.gl/schema';
6
+ const loader_utils_1 = require("@loaders.gl/loader-utils");
7
+ const parquet_reader_1 = require("../../parquetjs/parser/parquet-reader");
8
+ async function parseParquet(arrayBuffer, options) {
9
+ const blob = new Blob([arrayBuffer]);
10
+ for await (const batch of parseParquetFileInBatches(blob, options)) {
11
+ return batch;
12
+ }
13
+ return null;
14
+ }
15
+ exports.parseParquet = parseParquet;
16
+ async function* parseParquetFileInBatches(blob, options) {
17
+ const file = (0, loader_utils_1.makeReadableFile)(blob);
18
+ const reader = new parquet_reader_1.ParquetReader(file);
19
+ const rowBatches = reader.rowBatchIterator(options?.parquet);
20
+ for await (const rows of rowBatches) {
21
+ yield rows;
22
+ }
23
+ }
24
+ exports.parseParquetFileInBatches = parseParquetFileInBatches;
25
+ // export async function* parseParquetFileInColumnarBatches(blob: Blob, options?: {columnList?: string[][]}): AsyncIterable<ColumnarTableBatch> {
26
+ // const rowGroupReader = new ParquetRowGroupReader({data: blob, columnList: options?.columnList});
27
+ // try {
28
+ // for await (const rowGroup of rowGroupReader) {
29
+ // yield convertRowGroupToTableBatch(rowGroup);
30
+ // }
31
+ // } finally {
32
+ // await rowGroupReader.close();
33
+ // }
34
+ // }
35
+ // function convertRowGroupToTableBatch(rowGroup): ColumnarTableBatch {
36
+ // // @ts-expect-error
37
+ // return {
38
+ // data: rowGroup
39
+ // };
40
+ // }
@@ -3,6 +3,8 @@ export type ParquetLoaderOptions = LoaderOptions & {
3
3
  parquet?: {
4
4
  type?: 'object-row-table';
5
5
  url?: string;
6
+ columnList?: string[] | string[][];
7
+ geoparquet?: boolean;
6
8
  };
7
9
  };
8
10
  /** ParquetJS table loader */
@@ -1 +1 @@
1
- {"version":3,"file":"parquet-loader.d.ts","sourceRoot":"","sources":["../src/parquet-loader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,MAAM,EAAE,aAAa,EAAC,MAAM,0BAA0B,CAAC;AAMpE,MAAM,MAAM,oBAAoB,GAAG,aAAa,GAAG;IACjD,OAAO,CAAC,EAAE;QACR,IAAI,CAAC,EAAE,kBAAkB,CAAC;QAC1B,GAAG,CAAC,EAAE,MAAM,CAAC;KACd,CAAC;CACH,CAAC;AASF,6BAA6B;AAC7B,eAAO,MAAM,aAAa;;;;;;;;;;;;CAYzB,CAAC;AAEF,eAAO,MAAM,uBAAuB,EAAE,MAAsB,CAAC"}
1
+ {"version":3,"file":"parquet-loader.d.ts","sourceRoot":"","sources":["../src/parquet-loader.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,MAAM,EAAE,aAAa,EAAC,MAAM,0BAA0B,CAAC;AAMpE,MAAM,MAAM,oBAAoB,GAAG,aAAa,GAAG;IACjD,OAAO,CAAC,EAAE;QACR,IAAI,CAAC,EAAE,kBAAkB,CAAC;QAC1B,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,UAAU,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,EAAE,CAAC;QACnC,UAAU,CAAC,EAAE,OAAO,CAAC;KACtB,CAAC;CACH,CAAC;AAWF,6BAA6B;AAC7B,eAAO,MAAM,aAAa;;;;;;;;;;;;CAYzB,CAAC;AAEF,eAAO,MAAM,uBAAuB,EAAE,MAAsB,CAAC"}
@@ -7,7 +7,9 @@ const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
7
7
  const DEFAULT_PARQUET_LOADER_OPTIONS = {
8
8
  parquet: {
9
9
  type: 'object-row-table',
10
- url: undefined
10
+ url: undefined,
11
+ columnList: [],
12
+ geoparquet: true
11
13
  }
12
14
  };
13
15
  /** ParquetJS table loader */