@loaders.gl/parquet 3.4.0-alpha.1 → 3.4.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/dist/dist.min.js +19 -19
  2. package/dist/dist.min.js.map +3 -3
  3. package/dist/es5/index.js +49 -8
  4. package/dist/es5/index.js.map +1 -1
  5. package/dist/es5/lib/arrow/convert-columns-to-row-group.js +2 -0
  6. package/dist/es5/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  7. package/dist/es5/lib/arrow/convert-row-group-to-columns.js +20 -0
  8. package/dist/es5/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  9. package/dist/es5/lib/arrow/convert-schema-from-parquet.js +98 -0
  10. package/dist/es5/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  11. package/dist/es5/lib/{convert-schema.js → arrow/convert-schema-to-parquet.js} +5 -31
  12. package/dist/es5/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  13. package/dist/es5/lib/geo/decode-geo-metadata.js +82 -0
  14. package/dist/es5/lib/geo/decode-geo-metadata.js.map +1 -0
  15. package/dist/es5/lib/geo/geoparquet-schema.js +83 -0
  16. package/dist/es5/lib/geo/geoparquet-schema.js.map +1 -0
  17. package/dist/es5/lib/parsers/parse-parquet-to-columns.js +177 -0
  18. package/dist/es5/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  19. package/dist/es5/lib/{parse-parquet.js → parsers/parse-parquet-to-rows.js} +51 -27
  20. package/dist/es5/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  21. package/dist/es5/lib/wip/convert-schema-deep.java.disabled +910 -0
  22. package/dist/es5/lib/wip/convert-schema-deep.rs.disabled +976 -0
  23. package/dist/es5/parquet-loader.js +4 -2
  24. package/dist/es5/parquet-loader.js.map +1 -1
  25. package/dist/es5/parquet-wasm-loader.js +1 -1
  26. package/dist/es5/parquet-wasm-writer.js +1 -1
  27. package/dist/es5/parquet-writer.js +1 -1
  28. package/dist/es5/parquetjs/compression.js +15 -5
  29. package/dist/es5/parquetjs/compression.js.map +1 -1
  30. package/dist/es5/parquetjs/encoder/{writer.js → parquet-encoder.js} +70 -158
  31. package/dist/es5/parquetjs/encoder/parquet-encoder.js.map +1 -0
  32. package/dist/es5/parquetjs/parser/parquet-reader.js +553 -222
  33. package/dist/es5/parquetjs/parser/parquet-reader.js.map +1 -1
  34. package/dist/es5/parquetjs/schema/declare.js +3 -1
  35. package/dist/es5/parquetjs/schema/declare.js.map +1 -1
  36. package/dist/es5/parquetjs/schema/shred.js +39 -33
  37. package/dist/es5/parquetjs/schema/shred.js.map +1 -1
  38. package/dist/es5/parquetjs/schema/types.js.map +1 -1
  39. package/dist/es5/parquetjs/utils/file-utils.js +2 -3
  40. package/dist/es5/parquetjs/utils/file-utils.js.map +1 -1
  41. package/dist/esm/index.js +13 -3
  42. package/dist/esm/index.js.map +1 -1
  43. package/dist/esm/lib/arrow/convert-columns-to-row-group.js +2 -0
  44. package/dist/esm/lib/arrow/convert-columns-to-row-group.js.map +1 -0
  45. package/dist/esm/lib/arrow/convert-row-group-to-columns.js +10 -0
  46. package/dist/esm/lib/arrow/convert-row-group-to-columns.js.map +1 -0
  47. package/dist/esm/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +32 -16
  48. package/dist/esm/lib/arrow/convert-schema-from-parquet.js.map +1 -0
  49. package/dist/esm/lib/arrow/convert-schema-to-parquet.js +40 -0
  50. package/dist/esm/lib/arrow/convert-schema-to-parquet.js.map +1 -0
  51. package/dist/esm/lib/geo/decode-geo-metadata.js +64 -0
  52. package/dist/esm/lib/geo/decode-geo-metadata.js.map +1 -0
  53. package/dist/esm/lib/geo/geoparquet-schema.js +78 -0
  54. package/dist/esm/lib/geo/geoparquet-schema.js.map +1 -0
  55. package/dist/esm/lib/parsers/parse-parquet-to-columns.js +37 -0
  56. package/dist/esm/lib/parsers/parse-parquet-to-columns.js.map +1 -0
  57. package/dist/esm/lib/parsers/parse-parquet-to-rows.js +19 -0
  58. package/dist/esm/lib/parsers/parse-parquet-to-rows.js.map +1 -0
  59. package/dist/esm/lib/wip/convert-schema-deep.java.disabled +910 -0
  60. package/dist/esm/lib/wip/convert-schema-deep.rs.disabled +976 -0
  61. package/dist/esm/parquet-loader.js +4 -2
  62. package/dist/esm/parquet-loader.js.map +1 -1
  63. package/dist/esm/parquet-wasm-loader.js +1 -1
  64. package/dist/esm/parquet-wasm-writer.js +1 -1
  65. package/dist/esm/parquet-writer.js +1 -1
  66. package/dist/esm/parquetjs/compression.js +10 -1
  67. package/dist/esm/parquetjs/compression.js.map +1 -1
  68. package/dist/esm/parquetjs/encoder/{writer.js → parquet-encoder.js} +7 -37
  69. package/dist/esm/parquetjs/encoder/parquet-encoder.js.map +1 -0
  70. package/dist/esm/parquetjs/parser/parquet-reader.js +158 -72
  71. package/dist/esm/parquetjs/parser/parquet-reader.js.map +1 -1
  72. package/dist/esm/parquetjs/schema/declare.js +1 -0
  73. package/dist/esm/parquetjs/schema/declare.js.map +1 -1
  74. package/dist/esm/parquetjs/schema/shred.js +42 -34
  75. package/dist/esm/parquetjs/schema/shred.js.map +1 -1
  76. package/dist/esm/parquetjs/schema/types.js.map +1 -1
  77. package/dist/esm/parquetjs/utils/file-utils.js +1 -1
  78. package/dist/esm/parquetjs/utils/file-utils.js.map +1 -1
  79. package/dist/index.d.ts +24 -4
  80. package/dist/index.d.ts.map +1 -1
  81. package/dist/index.js +26 -9
  82. package/dist/lib/arrow/convert-columns-to-row-group.d.ts +1 -0
  83. package/dist/lib/arrow/convert-columns-to-row-group.d.ts.map +1 -0
  84. package/dist/lib/arrow/convert-columns-to-row-group.js +1 -0
  85. package/dist/lib/arrow/convert-row-group-to-columns.d.ts +4 -0
  86. package/dist/lib/arrow/convert-row-group-to-columns.d.ts.map +1 -0
  87. package/dist/lib/arrow/convert-row-group-to-columns.js +12 -0
  88. package/dist/lib/arrow/convert-schema-from-parquet.d.ts +9 -0
  89. package/dist/lib/arrow/convert-schema-from-parquet.d.ts.map +1 -0
  90. package/dist/lib/{convert-schema.js → arrow/convert-schema-from-parquet.js} +30 -18
  91. package/dist/lib/arrow/convert-schema-to-parquet.d.ts +7 -0
  92. package/dist/lib/arrow/convert-schema-to-parquet.d.ts.map +1 -0
  93. package/dist/lib/arrow/convert-schema-to-parquet.js +72 -0
  94. package/dist/lib/geo/decode-geo-metadata.d.ts +31 -0
  95. package/dist/lib/geo/decode-geo-metadata.d.ts.map +1 -0
  96. package/dist/lib/geo/decode-geo-metadata.js +73 -0
  97. package/dist/lib/geo/geoparquet-schema.d.ts +80 -0
  98. package/dist/lib/geo/geoparquet-schema.d.ts.map +1 -0
  99. package/dist/lib/geo/geoparquet-schema.js +69 -0
  100. package/dist/lib/parsers/parse-parquet-to-columns.d.ts +5 -0
  101. package/dist/lib/parsers/parse-parquet-to-columns.d.ts.map +1 -0
  102. package/dist/lib/parsers/parse-parquet-to-columns.js +40 -0
  103. package/dist/lib/parsers/parse-parquet-to-rows.d.ts +4 -0
  104. package/dist/lib/parsers/parse-parquet-to-rows.d.ts.map +1 -0
  105. package/dist/lib/parsers/parse-parquet-to-rows.js +40 -0
  106. package/dist/parquet-loader.d.ts +2 -0
  107. package/dist/parquet-loader.d.ts.map +1 -1
  108. package/dist/parquet-loader.js +3 -1
  109. package/dist/parquet-worker.js +20 -20
  110. package/dist/parquet-worker.js.map +3 -3
  111. package/dist/parquetjs/compression.d.ts.map +1 -1
  112. package/dist/parquetjs/compression.js +16 -5
  113. package/dist/parquetjs/encoder/{writer.d.ts → parquet-encoder.d.ts} +10 -19
  114. package/dist/parquetjs/encoder/parquet-encoder.d.ts.map +1 -0
  115. package/dist/parquetjs/encoder/{writer.js → parquet-encoder.js} +39 -37
  116. package/dist/parquetjs/parser/parquet-reader.d.ts +47 -57
  117. package/dist/parquetjs/parser/parquet-reader.d.ts.map +1 -1
  118. package/dist/parquetjs/parser/parquet-reader.js +168 -102
  119. package/dist/parquetjs/schema/declare.d.ts +14 -7
  120. package/dist/parquetjs/schema/declare.d.ts.map +1 -1
  121. package/dist/parquetjs/schema/declare.js +2 -0
  122. package/dist/parquetjs/schema/shred.d.ts +115 -0
  123. package/dist/parquetjs/schema/shred.d.ts.map +1 -1
  124. package/dist/parquetjs/schema/shred.js +161 -43
  125. package/dist/parquetjs/schema/types.d.ts +2 -2
  126. package/dist/parquetjs/schema/types.d.ts.map +1 -1
  127. package/dist/parquetjs/utils/file-utils.d.ts +3 -4
  128. package/dist/parquetjs/utils/file-utils.d.ts.map +1 -1
  129. package/dist/parquetjs/utils/file-utils.js +2 -5
  130. package/package.json +7 -5
  131. package/src/index.ts +24 -4
  132. package/src/lib/arrow/convert-columns-to-row-group.ts +0 -0
  133. package/src/lib/arrow/convert-row-group-to-columns.ts +15 -0
  134. package/src/lib/{convert-schema.ts → arrow/convert-schema-from-parquet.ts} +41 -22
  135. package/src/lib/arrow/convert-schema-to-parquet.ts +102 -0
  136. package/src/lib/geo/decode-geo-metadata.ts +99 -0
  137. package/src/lib/geo/geoparquet-schema.ts +69 -0
  138. package/src/lib/parsers/parse-parquet-to-columns.ts +49 -0
  139. package/src/lib/parsers/parse-parquet-to-rows.ts +40 -0
  140. package/src/lib/wip/convert-schema-deep.java.disabled +910 -0
  141. package/src/lib/wip/convert-schema-deep.rs.disabled +976 -0
  142. package/src/parquet-loader.ts +5 -1
  143. package/src/parquetjs/compression.ts +14 -1
  144. package/src/parquetjs/encoder/{writer.ts → parquet-encoder.ts} +22 -28
  145. package/src/parquetjs/parser/parquet-reader.ts +239 -122
  146. package/src/parquetjs/schema/declare.ts +17 -9
  147. package/src/parquetjs/schema/shred.ts +157 -28
  148. package/src/parquetjs/schema/types.ts +21 -27
  149. package/src/parquetjs/utils/file-utils.ts +3 -4
  150. package/dist/es5/lib/convert-schema.js.map +0 -1
  151. package/dist/es5/lib/parse-parquet.js.map +0 -1
  152. package/dist/es5/lib/read-array-buffer.js +0 -43
  153. package/dist/es5/lib/read-array-buffer.js.map +0 -1
  154. package/dist/es5/parquetjs/encoder/writer.js.map +0 -1
  155. package/dist/es5/parquetjs/file.js +0 -94
  156. package/dist/es5/parquetjs/file.js.map +0 -1
  157. package/dist/es5/parquetjs/parser/parquet-cursor.js +0 -183
  158. package/dist/es5/parquetjs/parser/parquet-cursor.js.map +0 -1
  159. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js +0 -327
  160. package/dist/es5/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  161. package/dist/es5/parquetjs/utils/buffer-utils.js +0 -19
  162. package/dist/es5/parquetjs/utils/buffer-utils.js.map +0 -1
  163. package/dist/esm/lib/convert-schema.js.map +0 -1
  164. package/dist/esm/lib/parse-parquet.js +0 -25
  165. package/dist/esm/lib/parse-parquet.js.map +0 -1
  166. package/dist/esm/lib/read-array-buffer.js +0 -10
  167. package/dist/esm/lib/read-array-buffer.js.map +0 -1
  168. package/dist/esm/parquetjs/encoder/writer.js.map +0 -1
  169. package/dist/esm/parquetjs/file.js +0 -81
  170. package/dist/esm/parquetjs/file.js.map +0 -1
  171. package/dist/esm/parquetjs/parser/parquet-cursor.js +0 -78
  172. package/dist/esm/parquetjs/parser/parquet-cursor.js.map +0 -1
  173. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js +0 -129
  174. package/dist/esm/parquetjs/parser/parquet-envelope-reader.js.map +0 -1
  175. package/dist/esm/parquetjs/utils/buffer-utils.js +0 -13
  176. package/dist/esm/parquetjs/utils/buffer-utils.js.map +0 -1
  177. package/dist/lib/convert-schema.d.ts +0 -8
  178. package/dist/lib/convert-schema.d.ts.map +0 -1
  179. package/dist/lib/parse-parquet.d.ts +0 -4
  180. package/dist/lib/parse-parquet.d.ts.map +0 -1
  181. package/dist/lib/parse-parquet.js +0 -28
  182. package/dist/lib/read-array-buffer.d.ts +0 -19
  183. package/dist/lib/read-array-buffer.d.ts.map +0 -1
  184. package/dist/lib/read-array-buffer.js +0 -29
  185. package/dist/parquetjs/encoder/writer.d.ts.map +0 -1
  186. package/dist/parquetjs/file.d.ts +0 -10
  187. package/dist/parquetjs/file.d.ts.map +0 -1
  188. package/dist/parquetjs/file.js +0 -99
  189. package/dist/parquetjs/parser/parquet-cursor.d.ts +0 -36
  190. package/dist/parquetjs/parser/parquet-cursor.d.ts.map +0 -1
  191. package/dist/parquetjs/parser/parquet-cursor.js +0 -74
  192. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts +0 -40
  193. package/dist/parquetjs/parser/parquet-envelope-reader.d.ts.map +0 -1
  194. package/dist/parquetjs/parser/parquet-envelope-reader.js +0 -136
  195. package/dist/parquetjs/utils/buffer-utils.d.ts +0 -10
  196. package/dist/parquetjs/utils/buffer-utils.d.ts.map +0 -1
  197. package/dist/parquetjs/utils/buffer-utils.js +0 -22
  198. package/src/lib/parse-parquet.ts +0 -27
  199. package/src/lib/read-array-buffer.ts +0 -31
  200. package/src/parquetjs/file.ts +0 -90
  201. package/src/parquetjs/parser/parquet-cursor.ts +0 -94
  202. package/src/parquetjs/parser/parquet-envelope-reader.ts +0 -199
  203. package/src/parquetjs/utils/buffer-utils.ts +0 -18
@@ -0,0 +1,99 @@
1
+ // loaders.gl, MIT license
2
+ import {Schema, Field} from '@loaders.gl/schema';
3
+
4
+ /* eslint-disable camelcase */
5
+
6
+ /** A geoarrow / geoparquet geo metadata object (stored in stringified form in the top level metadata 'geo' key) */
7
+ export type GeoMetadata = {
8
+ version?: string;
9
+ primary_column?: string;
10
+ columns: Record<string, GeoColumnMetadata>;
11
+ [key: string]: unknown;
12
+ };
13
+
14
+ /** A geoarrow / geoparquet geo metadata for one geometry column */
15
+ export type GeoColumnMetadata = {
16
+ bounding_box?:
17
+ | [number, number, number, number]
18
+ | [number, number, number, number, number, number];
19
+ crs?: string;
20
+ geometry_type?: string[];
21
+ edges?: string;
22
+ [key: string]: unknown;
23
+ };
24
+
25
+ /**
26
+ * Reads the GeoMetadata object from the metadata
27
+ * @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata */
28
+ export function getGeoMetadata(schema: Schema): GeoMetadata | null {
29
+ const stringifiedGeoMetadata = schema.metadata.get('geo');
30
+ if (!stringifiedGeoMetadata) {
31
+ return null;
32
+ }
33
+
34
+ try {
35
+ const geoMetadata = JSON.parse(stringifiedGeoMetadata) as GeoMetadata;
36
+ return geoMetadata;
37
+ } catch {
38
+ return null;
39
+ }
40
+ }
41
+
42
+ /**
43
+ * Stores a geoarrow / geoparquet geo metadata object in the schema
44
+ * @note geoarrow / geoparquet geo metadata is a single stringified JSON field
45
+ */
46
+ export function setGeoMetadata(schema: Schema, geoMetadata: GeoMetadata): void {
47
+ const stringifiedGeoMetadata = JSON.stringify(geoMetadata);
48
+ schema.metadata.set('geo', stringifiedGeoMetadata);
49
+ }
50
+
51
+ /**
52
+ * Unpacks geo metadata into separate metadata fields (parses the long JSON string)
53
+ * @note geoarrow / parquet schema is stringified into a single key-value pair in the parquet metadata
54
+ */
55
+ export function unpackGeoMetadata(schema: Schema): void {
56
+ const geoMetadata = getGeoMetadata(schema);
57
+ if (!geoMetadata) {
58
+ return;
59
+ }
60
+
61
+ // Store Parquet Schema Level Metadata
62
+
63
+ const {version, primary_column, columns} = geoMetadata;
64
+ if (version) {
65
+ schema.metadata.set('geo.version', version);
66
+ }
67
+
68
+ if (primary_column) {
69
+ schema.metadata.set('geo.primary_column', primary_column);
70
+ }
71
+
72
+ // store column names as comma separated list
73
+ schema.metadata.set('geo.columns', Object.keys(columns || {}).join(''));
74
+
75
+ for (const [columnName, columnMetadata] of Object.entries(columns || {})) {
76
+ const field = schema.fields.find((field) => field.name === columnName);
77
+ if (field) {
78
+ if (field.name === primary_column) {
79
+ field.metadata.set('geo.primary_field', 'true');
80
+ }
81
+ unpackGeoFieldMetadata(field, columnMetadata);
82
+ }
83
+ }
84
+ }
85
+
86
+ function unpackGeoFieldMetadata(field: Field, columnMetadata): void {
87
+ for (const [key, value] of Object.entries(columnMetadata || {})) {
88
+ switch (key) {
89
+ case 'geometry_type':
90
+ field.metadata.set(`geo.${key}`, (value as string[]).join(','));
91
+ break;
92
+ case 'bbox':
93
+ case 'crs':
94
+ case 'edges':
95
+ default:
96
+ field.metadata.set(`geo.${key}`, typeof value === 'string' ? value : JSON.stringify(value));
97
+ }
98
+ }
99
+ }
@@ -0,0 +1,69 @@
1
+ // loaders.gl, MIT license
2
+
3
+ /* eslint-disable camelcase */
4
+
5
+ /**
6
+ * Geoparquet JSON schema for geo metadata
7
+ * @see https://github.com/geoarrow/geoarrow/blob/main/metadata.md
8
+ * @see https://github.com/opengeospatial/geoparquet/blob/main/format-specs/geoparquet.md
9
+ */
10
+ export default {
11
+ $schema: 'http://json-schema.org/draft-07/schema#',
12
+ title: 'GeoParquet',
13
+ description: 'Parquet metadata included in the geo field.',
14
+ type: 'object',
15
+ required: ['version', 'primary_column', 'columns'],
16
+ properties: {
17
+ version: {type: 'string', const: '1.0.0-beta.1'},
18
+ primary_column: {type: 'string', minLength: 1},
19
+ columns: {
20
+ type: 'object',
21
+ minProperties: 1,
22
+ patternProperties: {
23
+ '.+': {
24
+ type: 'object',
25
+ required: ['encoding', 'geometry_types'],
26
+ properties: {
27
+ encoding: {type: 'string', const: 'WKB'},
28
+ geometry_types: {
29
+ type: 'array',
30
+ uniqueItems: true,
31
+ items: {
32
+ type: 'string',
33
+ pattern: '^(GeometryCollection|(Multi)?(Point|LineString|Polygon))( Z)?$'
34
+ }
35
+ },
36
+ crs: {
37
+ oneOf: [
38
+ {
39
+ $ref: 'https://proj.org/schemas/v0.5/projjson.schema.json'
40
+ },
41
+ {type: 'null'}
42
+ ]
43
+ },
44
+ edges: {type: 'string', enum: ['planar', 'spherical']},
45
+ orientation: {type: 'string', const: 'counterclockwise'},
46
+ bbox: {
47
+ type: 'array',
48
+ items: {type: 'number'},
49
+ oneOf: [
50
+ {
51
+ description: '2D bbox consisting of (xmin, ymin, xmax, ymax)',
52
+ minItems: 4,
53
+ maxItems: 4
54
+ },
55
+ {
56
+ description: '3D bbox consisting of (xmin, ymin, zmin, xmax, ymax, zmax)',
57
+ minItems: 6,
58
+ maxItems: 6
59
+ }
60
+ ]
61
+ },
62
+ epoch: {type: 'number'}
63
+ }
64
+ }
65
+ },
66
+ additionalProperties: false
67
+ }
68
+ }
69
+ };
@@ -0,0 +1,49 @@
1
+ // loaders.gl, MIT license
2
+
3
+ // import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
4
+ import {ColumnarTableBatch, Schema} from '@loaders.gl/schema';
5
+ import {makeReadableFile} from '@loaders.gl/loader-utils';
6
+ import type {ParquetLoaderOptions} from '../../parquet-loader';
7
+ import {ParquetReader} from '../../parquetjs/parser/parquet-reader';
8
+ import {ParquetBuffer} from '../../parquetjs/schema/declare';
9
+ import {convertSchemaFromParquet} from '../arrow/convert-schema-from-parquet';
10
+ import {convertParquetRowGroupToColumns} from '../arrow/convert-row-group-to-columns';
11
+ import {unpackGeoMetadata} from '../geo/decode-geo-metadata';
12
+
13
+ export async function parseParquetInColumns(
14
+ arrayBuffer: ArrayBuffer,
15
+ options?: ParquetLoaderOptions
16
+ ) {
17
+ const blob = new Blob([arrayBuffer]);
18
+ for await (const batch of parseParquetFileInColumnarBatches(blob, options)) {
19
+ return batch;
20
+ }
21
+ return null;
22
+ }
23
+
24
+ export async function* parseParquetFileInColumnarBatches(
25
+ blob: Blob,
26
+ options?: ParquetLoaderOptions
27
+ ): AsyncIterable<ColumnarTableBatch> {
28
+ const file = makeReadableFile(blob);
29
+ const reader = new ParquetReader(file);
30
+ const parquetSchema = await reader.getSchema();
31
+ const parquetMetadata = await reader.getFileMetadata();
32
+ const schema = convertSchemaFromParquet(parquetSchema, parquetMetadata);
33
+ unpackGeoMetadata(schema);
34
+ const rowGroups = reader.rowGroupIterator(options?.parquet);
35
+ for await (const rowGroup of rowGroups) {
36
+ yield convertRowGroupToTableBatch(schema, rowGroup);
37
+ }
38
+ }
39
+
40
+ function convertRowGroupToTableBatch(schema: Schema, rowGroup: ParquetBuffer): ColumnarTableBatch {
41
+ const data = convertParquetRowGroupToColumns(schema, rowGroup);
42
+ return {
43
+ shape: 'columnar-table',
44
+ batchType: 'data',
45
+ schema,
46
+ data,
47
+ length: rowGroup.rowCount
48
+ };
49
+ }
@@ -0,0 +1,40 @@
1
+ // import type {LoaderWithParser, Loader, LoaderOptions} from '@loaders.gl/loader-utils';
2
+ // import {ColumnarTableBatch} from '@loaders.gl/schema';
3
+ import {makeReadableFile} from '@loaders.gl/loader-utils';
4
+ import type {ParquetLoaderOptions} from '../../parquet-loader';
5
+ import {ParquetReader} from '../../parquetjs/parser/parquet-reader';
6
+
7
+ export async function parseParquet(arrayBuffer: ArrayBuffer, options?: ParquetLoaderOptions) {
8
+ const blob = new Blob([arrayBuffer]);
9
+ for await (const batch of parseParquetFileInBatches(blob, options)) {
10
+ return batch;
11
+ }
12
+ return null;
13
+ }
14
+
15
+ export async function* parseParquetFileInBatches(blob: Blob, options?: ParquetLoaderOptions) {
16
+ const file = makeReadableFile(blob);
17
+ const reader = new ParquetReader(file);
18
+ const rowBatches = reader.rowBatchIterator(options?.parquet);
19
+ for await (const rows of rowBatches) {
20
+ yield rows;
21
+ }
22
+ }
23
+
24
+ // export async function* parseParquetFileInColumnarBatches(blob: Blob, options?: {columnList?: string[][]}): AsyncIterable<ColumnarTableBatch> {
25
+ // const rowGroupReader = new ParquetRowGroupReader({data: blob, columnList: options?.columnList});
26
+ // try {
27
+ // for await (const rowGroup of rowGroupReader) {
28
+ // yield convertRowGroupToTableBatch(rowGroup);
29
+ // }
30
+ // } finally {
31
+ // await rowGroupReader.close();
32
+ // }
33
+ // }
34
+
35
+ // function convertRowGroupToTableBatch(rowGroup): ColumnarTableBatch {
36
+ // // @ts-expect-error
37
+ // return {
38
+ // data: rowGroup
39
+ // };
40
+ // }