@loaders.gl/arrow 4.0.4 → 4.1.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/arrow-worker.js +237 -2
  2. package/dist/dist.dev.js +1465 -630
  3. package/dist/geoarrow/convert-geoarrow-to-binary-geometry.d.ts +8 -4
  4. package/dist/geoarrow/convert-geoarrow-to-binary-geometry.d.ts.map +1 -1
  5. package/dist/geoarrow/convert-geoarrow-to-binary-geometry.js +75 -54
  6. package/dist/geoarrow/convert-geoarrow-to-binary-geometry.js.map +1 -1
  7. package/dist/geoarrow/convert-geoarrow-to-geojson-geometry.d.ts +13 -0
  8. package/dist/geoarrow/convert-geoarrow-to-geojson-geometry.d.ts.map +1 -0
  9. package/dist/geoarrow/{convert-geoarrow-to-geojson.js → convert-geoarrow-to-geojson-geometry.js} +34 -27
  10. package/dist/geoarrow/convert-geoarrow-to-geojson-geometry.js.map +1 -0
  11. package/dist/geoarrow-loader.d.ts.map +1 -1
  12. package/dist/geoarrow-loader.js +0 -1
  13. package/dist/geoarrow-loader.js.map +1 -1
  14. package/dist/index.cjs +429 -347
  15. package/dist/index.d.ts +5 -3
  16. package/dist/index.d.ts.map +1 -1
  17. package/dist/index.js +4 -3
  18. package/dist/index.js.map +1 -1
  19. package/dist/parsers/parse-arrow-sync.d.ts.map +1 -1
  20. package/dist/parsers/parse-arrow-sync.js +2 -0
  21. package/dist/parsers/parse-arrow-sync.js.map +1 -1
  22. package/dist/tables/convert-arrow-to-columnar-table.d.ts.map +1 -1
  23. package/dist/tables/convert-arrow-to-columnar-table.js +1 -0
  24. package/dist/tables/convert-arrow-to-columnar-table.js.map +1 -1
  25. package/dist/tables/convert-arrow-to-geojson-table.d.ts +1 -1
  26. package/dist/tables/convert-arrow-to-geojson-table.d.ts.map +1 -1
  27. package/dist/tables/convert-arrow-to-geojson-table.js +14 -8
  28. package/dist/tables/convert-arrow-to-geojson-table.js.map +1 -1
  29. package/dist/tables/convert-columnar-to-row-table.d.ts.map +1 -1
  30. package/dist/tables/convert-columnar-to-row-table.js +1 -0
  31. package/dist/tables/convert-columnar-to-row-table.js.map +1 -1
  32. package/dist/triangulate-on-worker.d.ts +40 -6
  33. package/dist/triangulate-on-worker.d.ts.map +1 -1
  34. package/dist/triangulate-on-worker.js +11 -1
  35. package/dist/triangulate-on-worker.js.map +1 -1
  36. package/dist/triangulation-worker.js +11703 -34
  37. package/dist/workers/hard-clone.d.ts +23 -0
  38. package/dist/workers/hard-clone.d.ts.map +1 -0
  39. package/dist/workers/hard-clone.js +57 -0
  40. package/dist/workers/hard-clone.js.map +1 -0
  41. package/dist/workers/triangulation-worker.js +37 -2
  42. package/dist/workers/triangulation-worker.js.map +1 -1
  43. package/package.json +16 -10
  44. package/src/geoarrow/convert-geoarrow-to-binary-geometry.ts +103 -61
  45. package/src/geoarrow/{convert-geoarrow-to-geojson.ts → convert-geoarrow-to-geojson-geometry.ts} +56 -46
  46. package/src/geoarrow-loader.ts +0 -4
  47. package/src/index.ts +9 -3
  48. package/src/parsers/parse-arrow-sync.ts +6 -1
  49. package/src/tables/convert-arrow-to-columnar-table.ts +1 -0
  50. package/src/tables/convert-arrow-to-geojson-table.ts +18 -7
  51. package/src/tables/convert-columnar-to-row-table.ts +1 -0
  52. package/src/triangulate-on-worker.ts +51 -8
  53. package/src/workers/hard-clone.ts +162 -0
  54. package/src/workers/triangulation-worker.ts +57 -3
  55. package/dist/geoarrow/convert-geoarrow-to-geojson.d.ts +0 -20
  56. package/dist/geoarrow/convert-geoarrow-to-geojson.d.ts.map +0 -1
  57. package/dist/geoarrow/convert-geoarrow-to-geojson.js.map +0 -1
@@ -1,81 +1,95 @@
1
1
  // loaders.gl, MIT license
2
2
  // Copyright (c) vis.gl contributors
3
3
 
4
- import * as arrow from 'apache-arrow';
4
+ // import * as arrow from 'apache-arrow';
5
5
  import {
6
- Feature,
7
6
  MultiPolygon,
8
7
  Position,
9
8
  Polygon,
10
9
  MultiPoint,
11
10
  Point,
12
11
  MultiLineString,
13
- LineString
12
+ LineString,
13
+ Geometry,
14
+ BinaryGeometry
14
15
  } from '@loaders.gl/schema';
15
- import type {GeoArrowEncoding} from '@loaders.gl/gis';
16
-
17
- type RawArrowFeature = {
18
- data: arrow.Vector;
19
- encoding?: GeoArrowEncoding;
20
- };
16
+ import {binaryToGeometry, type GeoArrowEncoding} from '@loaders.gl/gis';
17
+ import {WKBLoader, WKTLoader} from '@loaders.gl/wkt';
21
18
 
22
19
  /**
23
20
  * parse geometry from arrow data that is returned from processArrowData()
24
- * NOTE: this function could be duplicated with the binaryToFeature() in deck.gl,
25
- * it is currently only used for picking because currently deck.gl returns only the index of the feature
26
- * So the following functions could be deprecated once deck.gl returns the feature directly for binary geojson layer
21
+ * NOTE: this function could be deduplicated with the binaryToFeature() in deck.gl,
22
+ * it is currently used for deck.gl picking because currently deck.gl returns only the index of the feature
27
23
  *
28
- * @param rawData the raw geometry data returned from processArrowData, which is an object with two properties: encoding and data
29
- * @see processArrowData
24
+ * @param data data extraced from arrow vector representing a geometry
25
+ * @param encoding the geoarrow encoding of the geometry column
30
26
  * @returns Feature or null
31
27
  */
32
- export function parseGeometryFromArrow(rawData: RawArrowFeature): Feature | null {
33
- const encoding = rawData.encoding?.toLowerCase() as typeof rawData.encoding;
34
- const data = rawData.data;
35
- if (!encoding || !data) {
28
+ export function parseGeometryFromArrow(
29
+ arrowCellValue: any,
30
+ encoding?: GeoArrowEncoding
31
+ ): Geometry | null {
32
+ // sanity
33
+ encoding = encoding?.toLowerCase() as GeoArrowEncoding;
34
+ if (!encoding || !arrowCellValue) {
36
35
  return null;
37
36
  }
38
37
 
39
- let geometry;
38
+ let geometry: Geometry;
40
39
 
41
40
  switch (encoding) {
42
41
  case 'geoarrow.multipolygon':
43
- geometry = arrowMultiPolygonToFeature(data);
42
+ geometry = arrowMultiPolygonToFeature(arrowCellValue);
44
43
  break;
45
44
  case 'geoarrow.polygon':
46
- geometry = arrowPolygonToFeature(data);
45
+ geometry = arrowPolygonToFeature(arrowCellValue);
47
46
  break;
48
47
  case 'geoarrow.multipoint':
49
- geometry = arrowMultiPointToFeature(data);
48
+ geometry = arrowMultiPointToFeature(arrowCellValue);
50
49
  break;
51
50
  case 'geoarrow.point':
52
- geometry = arrowPointToFeature(data);
51
+ geometry = arrowPointToFeature(arrowCellValue);
53
52
  break;
54
53
  case 'geoarrow.multilinestring':
55
- geometry = arrowMultiLineStringToFeature(data);
54
+ geometry = arrowMultiLineStringToFeature(arrowCellValue);
56
55
  break;
57
56
  case 'geoarrow.linestring':
58
- geometry = arrowLineStringToFeature(data);
57
+ geometry = arrowLineStringToFeature(arrowCellValue);
59
58
  break;
60
59
  case 'geoarrow.wkb':
61
- throw Error(`GeoArrow encoding not supported ${encoding}`);
60
+ geometry = arrowWKBToFeature(arrowCellValue);
61
+ break;
62
62
  case 'geoarrow.wkt':
63
- throw Error(`GeoArrow encoding not supported ${encoding}`);
63
+ geometry = arrowWKTToFeature(arrowCellValue);
64
+ break;
64
65
  default: {
65
66
  throw Error(`GeoArrow encoding not supported ${encoding}`);
66
67
  }
67
68
  }
68
- return {
69
- type: 'Feature',
70
- geometry,
71
- properties: {}
72
- };
69
+
70
+ return geometry;
71
+ }
72
+
73
+ function arrowWKBToFeature(arrowCellValue: any) {
74
+ // The actual WKB array buffer starts from byteOffset and ends at byteOffset + byteLength
75
+ const arrayBuffer: ArrayBuffer = arrowCellValue.buffer.slice(
76
+ arrowCellValue.byteOffset,
77
+ arrowCellValue.byteOffset + arrowCellValue.byteLength
78
+ );
79
+ const binaryGeometry = WKBLoader.parseSync?.(arrayBuffer)! as BinaryGeometry;
80
+ const geometry = binaryToGeometry(binaryGeometry);
81
+ return geometry;
82
+ }
83
+
84
+ function arrowWKTToFeature(arrowCellValue: any) {
85
+ const string: string = arrowCellValue;
86
+ return WKTLoader.parseTextSync?.(string)!;
73
87
  }
74
88
 
75
89
  /**
76
90
  * convert Arrow MultiPolygon to geojson Feature
77
91
  */
78
- function arrowMultiPolygonToFeature(arrowMultiPolygon: arrow.Vector): MultiPolygon {
92
+ function arrowMultiPolygonToFeature(arrowMultiPolygon: any): MultiPolygon {
79
93
  const multiPolygon: Position[][][] = [];
80
94
  for (let m = 0; m < arrowMultiPolygon.length; m++) {
81
95
  const arrowPolygon = arrowMultiPolygon.get(m);
@@ -102,7 +116,7 @@ function arrowMultiPolygonToFeature(arrowMultiPolygon: arrow.Vector): MultiPolyg
102
116
  /**
103
117
  * convert Arrow Polygon to geojson Feature
104
118
  */
105
- function arrowPolygonToFeature(arrowPolygon: arrow.Vector): Polygon {
119
+ function arrowPolygonToFeature(arrowPolygon: any): Polygon {
106
120
  const polygon: Position[][] = [];
107
121
  for (let i = 0; arrowPolygon && i < arrowPolygon.length; i++) {
108
122
  const arrowRing = arrowPolygon.get(i);
@@ -124,7 +138,7 @@ function arrowPolygonToFeature(arrowPolygon: arrow.Vector): Polygon {
124
138
  /**
125
139
  * convert Arrow MultiPoint to geojson MultiPoint
126
140
  */
127
- function arrowMultiPointToFeature(arrowMultiPoint: arrow.Vector): MultiPoint {
141
+ function arrowMultiPointToFeature(arrowMultiPoint: any): MultiPoint {
128
142
  const multiPoint: Position[] = [];
129
143
  for (let i = 0; arrowMultiPoint && i < arrowMultiPoint.length; i++) {
130
144
  const arrowPoint = arrowMultiPoint.get(i);
@@ -133,29 +147,27 @@ function arrowMultiPointToFeature(arrowMultiPoint: arrow.Vector): MultiPoint {
133
147
  multiPoint.push(coord);
134
148
  }
135
149
  }
136
- const geometry: MultiPoint = {
150
+ return {
137
151
  type: 'MultiPoint',
138
152
  coordinates: multiPoint
139
153
  };
140
- return geometry;
141
154
  }
142
155
 
143
156
  /**
144
157
  * convert Arrow Point to geojson Point
145
158
  */
146
- function arrowPointToFeature(arrowPoint: arrow.Vector): Point {
159
+ function arrowPointToFeature(arrowPoint: any): Point {
147
160
  const point: Position = Array.from(arrowPoint);
148
- const geometry: Point = {
161
+ return {
149
162
  type: 'Point',
150
163
  coordinates: point
151
164
  };
152
- return geometry;
153
165
  }
154
166
 
155
167
  /**
156
168
  * convert Arrow MultiLineString to geojson MultiLineString
157
169
  */
158
- function arrowMultiLineStringToFeature(arrowMultiLineString: arrow.Vector): MultiLineString {
170
+ function arrowMultiLineStringToFeature(arrowMultiLineString: any): MultiLineString {
159
171
  const multiLineString: Position[][] = [];
160
172
  for (let i = 0; arrowMultiLineString && i < arrowMultiLineString.length; i++) {
161
173
  const arrowLineString = arrowMultiLineString.get(i);
@@ -169,17 +181,16 @@ function arrowMultiLineStringToFeature(arrowMultiLineString: arrow.Vector): Mult
169
181
  }
170
182
  multiLineString.push(lineString);
171
183
  }
172
- const geometry: MultiLineString = {
184
+ return {
173
185
  type: 'MultiLineString',
174
186
  coordinates: multiLineString
175
187
  };
176
- return geometry;
177
188
  }
178
189
 
179
190
  /**
180
191
  * convert Arrow LineString to geojson LineString
181
192
  */
182
- function arrowLineStringToFeature(arrowLineString: arrow.Vector): LineString {
193
+ function arrowLineStringToFeature(arrowLineString: any): LineString {
183
194
  const lineString: Position[] = [];
184
195
  for (let i = 0; arrowLineString && i < arrowLineString.length; i++) {
185
196
  const arrowCoord = arrowLineString.get(i);
@@ -188,9 +199,8 @@ function arrowLineStringToFeature(arrowLineString: arrow.Vector): LineString {
188
199
  lineString.push(coords);
189
200
  }
190
201
  }
191
- const geometry: LineString = {
202
+ return {
192
203
  type: 'LineString',
193
204
  coordinates: lineString
194
205
  };
195
- return geometry;
196
206
  }
@@ -8,10 +8,6 @@ import type {ArrowTable, ArrowTableBatch} from './lib/arrow-table';
8
8
  import {parseGeoArrowSync} from './parsers/parse-geoarrow-sync';
9
9
  import {parseGeoArrowInBatches} from './parsers/parse-geoarrow-in-batches';
10
10
 
11
- // __VERSION__ is injected by babel-plugin-version-inline
12
- // @ts-ignore TS2304: Cannot find name '__VERSION__'.
13
- const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
14
-
15
11
  export type GeoArrowLoaderOptions = LoaderOptions & {
16
12
  arrow?: {
17
13
  shape: 'arrow-table' | 'binary-geometry';
package/src/index.ts CHANGED
@@ -52,7 +52,7 @@ export type {
52
52
  BinaryGeometriesFromArrowOptions
53
53
  } from './geoarrow/convert-geoarrow-to-binary-geometry';
54
54
  export {
55
- BINARY_GEOMETRY_TEMPLATE,
55
+ getBinaryGeometryTemplate,
56
56
  getBinaryGeometriesFromArrow,
57
57
  getTriangleIndices,
58
58
  getMeanCentersFromBinaryGeometries
@@ -60,10 +60,16 @@ export {
60
60
 
61
61
  export {updateBoundsFromGeoArrowSamples} from './geoarrow/get-arrow-bounds';
62
62
 
63
- export {parseGeometryFromArrow} from './geoarrow/convert-geoarrow-to-geojson';
63
+ export {parseGeometryFromArrow} from './geoarrow/convert-geoarrow-to-geojson-geometry';
64
64
 
65
65
  export {convertArrowToGeoJSONTable} from './tables/convert-arrow-to-geojson-table';
66
66
 
67
67
  // EXPERIMENTAL WORKER
68
+ export {hardClone} from './workers/hard-clone';
68
69
 
69
- export {TriangulationWorker, triangulateOnWorker} from './triangulate-on-worker';
70
+ export type {ParseGeoArrowInput, ParseGeoArrowResult} from './triangulate-on-worker';
71
+ export {
72
+ TriangulationWorker,
73
+ triangulateOnWorker,
74
+ parseGeoArrowOnWorker
75
+ } from './triangulate-on-worker';
@@ -6,6 +6,7 @@ import type {ArrowTable} from '../lib/arrow-table';
6
6
  import {convertTable} from '@loaders.gl/schema';
7
7
  import * as arrow from 'apache-arrow';
8
8
  import {convertArrowToColumnarTable} from '../tables/convert-arrow-to-columnar-table';
9
+ import {serializeArrowSchema} from '../schema/convert-arrow-schema';
9
10
 
10
11
  // Parses arrow to a columnar table
11
12
  export function parseArrowSync(
@@ -13,7 +14,11 @@ export function parseArrowSync(
13
14
  options?: {shape?: 'arrow-table' | 'columnar-table' | 'object-row-table' | 'array-row-table'}
14
15
  ): ArrowTable | ColumnarTable | ObjectRowTable | ArrayRowTable {
15
16
  const apacheArrowTable = arrow.tableFromIPC([new Uint8Array(arrayBuffer)]);
16
- const arrowTable: ArrowTable = {shape: 'arrow-table', data: apacheArrowTable};
17
+ const arrowTable: ArrowTable = {
18
+ shape: 'arrow-table',
19
+ schema: serializeArrowSchema(apacheArrowTable.schema),
20
+ data: apacheArrowTable
21
+ };
17
22
 
18
23
  const shape = options?.shape || 'arrow-table';
19
24
  switch (shape) {
@@ -24,6 +24,7 @@ export function convertArrowToColumnarTable(table: ArrowTable): ColumnarTable {
24
24
 
25
25
  return {
26
26
  shape: 'columnar-table',
27
+ schema: table.schema,
27
28
  data: columnarTable
28
29
  };
29
30
  }
@@ -2,7 +2,7 @@
2
2
  // Copyright (c) vis.gl contributors
3
3
 
4
4
  import type {Feature, GeoJSONTable} from '@loaders.gl/schema';
5
- import type * as arrow from 'apache-arrow';
5
+ import * as arrow from 'apache-arrow';
6
6
  import type {ArrowTable} from '../lib/arrow-table';
7
7
  import {serializeArrowSchema, parseGeometryFromArrow} from '@loaders.gl/arrow';
8
8
  import {getGeometryColumnsFromSchema} from '@loaders.gl/gis';
@@ -16,6 +16,7 @@ import {getGeometryColumnsFromSchema} from '@loaders.gl/gis';
16
16
  export function convertApacheArrowToArrowTable(arrowTable: arrow.Table): ArrowTable {
17
17
  return {
18
18
  shape: 'arrow-table',
19
+ schema: serializeArrowSchema(arrowTable.schema),
19
20
  data: arrowTable
20
21
  };
21
22
  }
@@ -34,21 +35,31 @@ export function convertArrowToGeoJSONTable(table: ArrowTable): GeoJSONTable {
34
35
 
35
36
  const features: Feature[] = [];
36
37
 
37
- for (let row = 0; row < arrowTable.numRows; row++) {
38
- // get first geometry from arrow geometry column
39
- const arrowGeometry = arrowTable.getChild('geometry')?.get(row);
40
- const arrowGeometryObject = {encoding, data: arrowGeometry};
38
+ // Remove geometry columns
39
+ const propertyColumnNames = arrowTable.schema.fields
40
+ .map((field) => field.name)
41
+ // TODO - this deletes all geometry columns
42
+ .filter((name) => !(name in geometryColumns));
43
+ const propertiesTable = arrowTable.select(propertyColumnNames);
44
+
45
+ const arrowGeometryColumn = arrowTable.getChild('geometry');
41
46
 
47
+ for (let row = 0; row < arrowTable.numRows; row++) {
48
+ // get the geometry value from arrow geometry column
49
+ // Note that type can vary
50
+ const arrowGeometry = arrowGeometryColumn?.get(row);
42
51
  // parse arrow geometry to geojson feature
43
- const feature = parseGeometryFromArrow(arrowGeometryObject);
52
+ const feature = parseGeometryFromArrow(arrowGeometry, encoding);
44
53
  if (feature) {
45
- features.push(feature);
54
+ const properties = propertiesTable.get(row)?.toJSON() || {};
55
+ features.push({type: 'Feature', geometry: feature, properties});
46
56
  }
47
57
  }
48
58
 
49
59
  return {
50
60
  shape: 'geojson-table',
51
61
  type: 'FeatureCollection',
62
+ schema: table.schema,
52
63
  features
53
64
  };
54
65
  }
@@ -24,6 +24,7 @@ export function convertColumnarToRowFormatTable(columnarTable: ColumnarTable): O
24
24
 
25
25
  return {
26
26
  shape: 'object-row-table',
27
+ schema: columnarTable.schema,
27
28
  data: rowFormatTable
28
29
  };
29
30
  }
@@ -1,19 +1,52 @@
1
1
  // loaders.gl, MIT license
2
2
  // Copyright (c) vis.gl contributors
3
3
 
4
+ import * as arrow from 'apache-arrow';
4
5
  import type {WorkerOptions} from '@loaders.gl/worker-utils';
5
6
  import {processOnWorker} from '@loaders.gl/worker-utils';
7
+ import {BinaryDataFromGeoArrow, GeoArrowEncoding} from '@loaders.gl/arrow';
6
8
 
7
9
  // __VERSION__ is injected by babel-plugin-version-inline
8
10
  // @ts-ignore TS2304: Cannot find name '__VERSION__'.
9
11
  const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
10
12
 
11
- export type TriangulationWorkerInput = TriangulateInput | {operation: 'test'; data: any};
12
- export type TriangulationWorkerOutput = TriangulateResult | {operation: 'test'; data: any};
13
+ export type TriangulationWorkerInput =
14
+ | ({operation: 'triangulate'} & TriangulateInput)
15
+ | ParseGeoArrowInput
16
+ | {operation: 'test'; data: any};
17
+
18
+ export type TriangulationWorkerOutput =
19
+ | ({operation: 'triangulate'} & TriangulateResult)
20
+ | ({operation: 'parse-geoarrow'} & ParseGeoArrowResult)
21
+ | {operation: 'test'; data: any};
22
+
23
+ type GeoArrowChunkData = {
24
+ type: arrow.DataType;
25
+ offset: number;
26
+ length: number;
27
+ nullCount: number;
28
+ buffers: any;
29
+ children: arrow.Data[];
30
+ dictionary?: arrow.Vector;
31
+ };
32
+
33
+ export type ParseGeoArrowInput = {
34
+ operation: 'parse-geoarrow';
35
+ chunkData: GeoArrowChunkData;
36
+ chunkIndex: number;
37
+ chunkOffset: number;
38
+ geometryEncoding: GeoArrowEncoding;
39
+ calculateMeanCenters: boolean;
40
+ triangle: boolean;
41
+ };
42
+
43
+ export type ParseGeoArrowResult = {
44
+ chunkIndex: number;
45
+ binaryDataFromGeoArrow: BinaryDataFromGeoArrow | null;
46
+ };
13
47
 
14
48
  /** Input data for operation: 'triangulate' */
15
49
  export type TriangulateInput = {
16
- operation: 'triangulate';
17
50
  polygonIndices: Uint16Array;
18
51
  primitivePolygonIndices: Int32Array;
19
52
  flatCoordinateArray: Float64Array;
@@ -22,7 +55,7 @@ export type TriangulateInput = {
22
55
 
23
56
  /** Result type for operation: 'triangulate' */
24
57
  export type TriangulateResult = TriangulateInput & {
25
- triangleIndices: Uint32Array;
58
+ triangleIndices?: Uint32Array;
26
59
  };
27
60
 
28
61
  /**
@@ -37,11 +70,21 @@ export const TriangulationWorker = {
37
70
  };
38
71
 
39
72
  /**
40
- * Provide type safety
73
+ * Triangulate a set of polygons on worker, type safe API
41
74
  */
42
75
  export function triangulateOnWorker(
43
- data: TriangulationWorkerInput,
76
+ data: TriangulateInput,
77
+ options: WorkerOptions = {}
78
+ ): Promise<TriangulateResult> {
79
+ return processOnWorker(TriangulationWorker, {...data, operation: 'triangulate'}, options);
80
+ }
81
+
82
+ /**
83
+ * Parse GeoArrow geometry colum on worker, type safe API
84
+ */
85
+ export function parseGeoArrowOnWorker(
86
+ data: ParseGeoArrowInput,
44
87
  options: WorkerOptions = {}
45
- ): Promise<TriangulationWorkerOutput> {
46
- return processOnWorker(TriangulationWorker, data, options);
88
+ ): Promise<ParseGeoArrowResult> {
89
+ return processOnWorker(TriangulationWorker, {...data, operation: 'parse-geoarrow'}, options);
47
90
  }
@@ -0,0 +1,162 @@
1
+ import * as arrow from 'apache-arrow';
2
+ import type {Buffers} from 'apache-arrow/data';
3
+
4
+ type TypedArray =
5
+ | Uint8Array
6
+ | Uint8ClampedArray
7
+ | Uint16Array
8
+ | Uint32Array
9
+ | Int8Array
10
+ | Int16Array
11
+ | Int32Array
12
+ | Float32Array
13
+ | Float64Array;
14
+
15
+ /**
16
+ * Clone an Arrow JS Data or Vector, detaching from an existing ArrayBuffer if
17
+ * it is shared with other.
18
+ *
19
+ * The purpose of this function is to enable transferring a `Data` instance,
20
+ * e.g. to a web worker, without neutering any other data.
21
+ *
22
+ * Any internal buffers that are a slice of a larger `ArrayBuffer` (i.e. where
23
+ * the typed array's `byteOffset` is not `0` and where its `byteLength` does not
24
+ * match its `array.buffer.byteLength`) are copied into new `ArrayBuffers`.
25
+ *
26
+ * If `force` is `true`, always clone internal buffers, even if not shared. If
27
+ * the default, `false`, any internal buffers that are **not** a slice of a
28
+ * larger `ArrayBuffer` will not be copied.
29
+ */
30
+ export function hardClone<T extends arrow.DataType>(
31
+ input: arrow.Data<T>,
32
+ force?: boolean
33
+ ): arrow.Data<T>;
34
+ export function hardClone<T extends arrow.DataType>(
35
+ input: arrow.Vector<T>,
36
+ force?: boolean
37
+ ): arrow.Vector<T>;
38
+
39
+ export function hardClone<T extends arrow.DataType>(
40
+ data: arrow.Data<T> | arrow.Vector<T>,
41
+ force: boolean = false
42
+ ): arrow.Data<T> | arrow.Vector<T> {
43
+ // Check if `data` is an arrow.Vector
44
+ if ('data' in data) {
45
+ return new arrow.Vector(data.data.map((data) => hardClone(data, force)));
46
+ }
47
+
48
+ // Clone each of the children, recursively
49
+ const clonedChildren: arrow.Data[] = [];
50
+ for (const childData of data.children) {
51
+ clonedChildren.push(hardClone(childData, force));
52
+ }
53
+
54
+ // Clone the dictionary if there is one
55
+ let clonedDictionary: arrow.Vector | undefined;
56
+ if (data.dictionary !== undefined) {
57
+ clonedDictionary = hardClone(data.dictionary, force);
58
+ }
59
+
60
+ // Buffers can have up to four entries. Each of these can be `undefined` for
61
+ // one or more array types.
62
+ //
63
+ // - OFFSET: value offsets for variable size list types
64
+ // - DATA: the underlying data
65
+ // - VALIDITY: the null buffer. This may be empty or undefined if all elements
66
+ // are non-null/valid.
67
+ // - TYPE: type ids for a union type.
68
+ const clonedBuffers: Buffers<T> = {
69
+ [arrow.BufferType.OFFSET]: cloneBuffer(data.buffers[arrow.BufferType.OFFSET], force),
70
+ [arrow.BufferType.DATA]: cloneBuffer(data.buffers[arrow.BufferType.DATA], force),
71
+ [arrow.BufferType.VALIDITY]: cloneBuffer(data.buffers[arrow.BufferType.VALIDITY], force),
72
+ [arrow.BufferType.TYPE]: cloneBuffer(data.buffers[arrow.BufferType.TYPE], force)
73
+ };
74
+
75
+ // Note: the data.offset is passed on so that a sliced Data instance will not
76
+ // be "un-sliced". However keep in mind that this means we're cloning the
77
+ // _original backing buffer_, not only the portion of the Data that was
78
+ // sliced.
79
+ return new arrow.Data(
80
+ data.type,
81
+ data.offset,
82
+ data.length,
83
+ // @ts-expect-error _nullCount is protected. We're using it here to mimic
84
+ // `Data.clone`
85
+ data._nullCount,
86
+ clonedBuffers,
87
+ clonedChildren,
88
+ clonedDictionary
89
+ );
90
+ }
91
+
92
+ /**
93
+ * Test whether an arrow.Data instance is a slice of a larger `ArrayBuffer`.
94
+ */
95
+ export function isShared<T extends arrow.DataType>(data: arrow.Data<T> | arrow.Vector<T>): boolean {
96
+ // Loop over arrow.Vector
97
+ if ('data' in data) {
98
+ return data.data.some((data) => isShared(data));
99
+ }
100
+
101
+ // Check child data
102
+ for (const childData of data.children) {
103
+ if (isShared(childData)) {
104
+ return true;
105
+ }
106
+ }
107
+
108
+ // Check dictionary
109
+ if (data.dictionary !== undefined) {
110
+ if (isShared(data.dictionary)) {
111
+ return true;
112
+ }
113
+ }
114
+
115
+ const bufferTypes = [
116
+ arrow.BufferType.OFFSET,
117
+ arrow.BufferType.DATA,
118
+ arrow.BufferType.VALIDITY,
119
+ arrow.BufferType.TYPE
120
+ ];
121
+ for (const bufferType of bufferTypes) {
122
+ if (data.buffers[bufferType] !== undefined && isTypedArraySliced(data.buffers[bufferType])) {
123
+ return true;
124
+ }
125
+ }
126
+
127
+ return false;
128
+ }
129
+
130
+ /**
131
+ * Returns true if the current typed array is a partial slice on a larger
132
+ * ArrayBuffer
133
+ */
134
+ function isTypedArraySliced(arr: TypedArray): boolean {
135
+ return !(arr.byteOffset === 0 && arr.byteLength === arr.buffer.byteLength);
136
+ }
137
+
138
+ /**
139
+ * If a slice of a larger ArrayBuffer, clone to a fresh `ArrayBuffer`.
140
+ *
141
+ * If `force` is `true`, always clone the array, even if not shared.
142
+ */
143
+ function cloneBuffer<A extends TypedArray | undefined>(arr: A, force: boolean): A {
144
+ // Not all buffer types are defined for every type of Arrow array. E.g.
145
+ // `arrow.BufferType.TYPE` is only defined for the Union type.
146
+ if (arr === undefined) {
147
+ return arr;
148
+ }
149
+
150
+ // The current array is not a part of a larger ArrayBuffer, don't clone it
151
+ if (!force && !isTypedArraySliced(arr)) {
152
+ return arr;
153
+ }
154
+
155
+ // Note: TypedArray.slice() **copies** into a new ArrayBuffer
156
+
157
+ // @ts-expect-error 'Uint8Array' is assignable to the constraint of type 'A',
158
+ // but 'A' could be instantiated with a different subtype of constraint
159
+ // 'TypedArray'
160
+ // We know from arr.slice that it will always return the same
161
+ return arr.slice();
162
+ }
@@ -1,12 +1,19 @@
1
1
  // loaders.gl, MIT license
2
2
  // Copyright (c) vis.gl contributors
3
3
 
4
+ import * as arrow from 'apache-arrow';
4
5
  import {createWorker} from '@loaders.gl/worker-utils';
5
- import {getTriangleIndices} from '../geoarrow/convert-geoarrow-to-binary-geometry';
6
+ import {
7
+ getTriangleIndices,
8
+ getBinaryGeometriesFromArrow,
9
+ BinaryDataFromGeoArrow
10
+ } from '../geoarrow/convert-geoarrow-to-binary-geometry';
6
11
  import type {
7
12
  TriangulationWorkerInput,
8
13
  TriangulateInput,
9
- TriangulateResult
14
+ TriangulateResult,
15
+ ParseGeoArrowInput,
16
+ ParseGeoArrowResult
10
17
  } from '../triangulate-on-worker';
11
18
 
12
19
  createWorker(async (data, options = {}) => {
@@ -17,6 +24,8 @@ createWorker(async (data, options = {}) => {
17
24
  return input;
18
25
  case 'triangulate':
19
26
  return triangulateBatch(data);
27
+ case 'parse-geoarrow':
28
+ return parseGeoArrowBatch(data);
20
29
  default:
21
30
  throw new Error(
22
31
  `TriangulationWorker: Unsupported operation ${operation}. Expected 'triangulate'`
@@ -35,5 +44,50 @@ function triangulateBatch(data: TriangulateInput): TriangulateResult {
35
44
  data.flatCoordinateArray,
36
45
  data.nDim
37
46
  );
38
- return {...data, triangleIndices};
47
+ return {...data, ...(triangleIndices ? {triangleIndices} : {})};
48
+ }
49
+
50
+ /**
51
+ * Reading the arrow file into memory is very fast. Parsing the geoarrow column is slow, and blocking the main thread.
52
+ * To address this issue, we can move the parsing job from main thread to parallel web workers.
53
+ * Each web worker will parse one chunk/batch of geoarrow column, and return binary geometries to main thread.
54
+ * The app on the main thread will render the binary geometries and the parsing will not block the main thread.
55
+ *
56
+ * @param data
57
+ * @returns
58
+ */
59
+ function parseGeoArrowBatch(data: ParseGeoArrowInput): ParseGeoArrowResult {
60
+ let binaryDataFromGeoArrow: BinaryDataFromGeoArrow | null = null;
61
+ const {chunkData, chunkIndex, chunkOffset, geometryEncoding, calculateMeanCenters, triangle} =
62
+ data;
63
+ // rebuild chunkData that is only for geoarrow column
64
+ const arrowData = new arrow.Data(
65
+ chunkData.type,
66
+ chunkData.offset,
67
+ chunkData.length,
68
+ chunkData.nullCount,
69
+ chunkData.buffers,
70
+ chunkData.children,
71
+ chunkData.dictionary
72
+ );
73
+ // rebuild geometry column with chunkData
74
+ const geometryColumn = arrow.makeVector(arrowData);
75
+ if (geometryColumn) {
76
+ // NOTE: for a rebuild arrow.Vector, there is only one chunk, so chunkIndex is always 0
77
+ const options = {calculateMeanCenters, triangle, chunkIndex: 0, chunkOffset};
78
+ binaryDataFromGeoArrow = getBinaryGeometriesFromArrow(
79
+ geometryColumn,
80
+ geometryEncoding,
81
+ options
82
+ );
83
+ // NOTE: here binaryGeometry will be copied to main thread
84
+ return {
85
+ binaryDataFromGeoArrow,
86
+ chunkIndex
87
+ };
88
+ }
89
+ return {
90
+ binaryDataFromGeoArrow,
91
+ chunkIndex
92
+ };
39
93
  }