@loaders.gl/arrow 4.1.0-alpha.1 → 4.1.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,19 @@
1
1
  // loaders.gl, MIT license
2
2
  // Copyright (c) vis.gl contributors
3
3
 
4
+ import * as arrow from 'apache-arrow';
4
5
  import {createWorker} from '@loaders.gl/worker-utils';
5
- import {getTriangleIndices} from '../geoarrow/convert-geoarrow-to-binary-geometry';
6
+ import {
7
+ getTriangleIndices,
8
+ getBinaryGeometriesFromArrow,
9
+ BinaryDataFromGeoArrow
10
+ } from '../geoarrow/convert-geoarrow-to-binary-geometry';
6
11
  import type {
7
12
  TriangulationWorkerInput,
8
13
  TriangulateInput,
9
- TriangulateResult
14
+ TriangulateResult,
15
+ ParseGeoArrowInput,
16
+ ParseGeoArrowResult
10
17
  } from '../triangulate-on-worker';
11
18
 
12
19
  createWorker(async (data, options = {}) => {
@@ -17,6 +24,8 @@ createWorker(async (data, options = {}) => {
17
24
  return input;
18
25
  case 'triangulate':
19
26
  return triangulateBatch(data);
27
+ case 'parse-geoarrow':
28
+ return parseGeoArrowBatch(data);
20
29
  default:
21
30
  throw new Error(
22
31
  `TriangulationWorker: Unsupported operation ${operation}. Expected 'triangulate'`
@@ -37,3 +46,48 @@ function triangulateBatch(data: TriangulateInput): TriangulateResult {
37
46
  );
38
47
  return {...data, ...(triangleIndices ? {triangleIndices} : {})};
39
48
  }
49
+
50
+ /**
51
+ * Reading the arrow file into memory is very fast. Parsing the geoarrow column is slow, and blocking the main thread.
52
+ * To address this issue, we can move the parsing job from main thread to parallel web workers.
53
+ * Each web worker will parse one chunk/batch of geoarrow column, and return binary geometries to main thread.
54
+ * The app on the main thread will render the binary geometries and the parsing will not block the main thread.
55
+ *
56
+ * @param data
57
+ * @returns
58
+ */
59
+ function parseGeoArrowBatch(data: ParseGeoArrowInput): ParseGeoArrowResult {
60
+ let binaryDataFromGeoArrow: BinaryDataFromGeoArrow | null = null;
61
+ const {chunkData, chunkIndex, chunkOffset, geometryEncoding, calculateMeanCenters, triangle} =
62
+ data;
63
+ // rebuild chunkData that is only for geoarrow column
64
+ const arrowData = new arrow.Data(
65
+ chunkData.type,
66
+ chunkData.offset,
67
+ chunkData.length,
68
+ chunkData.nullCount,
69
+ chunkData.buffers,
70
+ chunkData.children,
71
+ chunkData.dictionary
72
+ );
73
+ // rebuild geometry column with chunkData
74
+ const geometryColumn = arrow.makeVector(arrowData);
75
+ if (geometryColumn) {
76
+ // NOTE: for a rebuild arrow.Vector, there is only one chunk, so chunkIndex is always 0
77
+ const options = {calculateMeanCenters, triangle, chunkIndex: 0, chunkOffset};
78
+ binaryDataFromGeoArrow = getBinaryGeometriesFromArrow(
79
+ geometryColumn,
80
+ geometryEncoding,
81
+ options
82
+ );
83
+ // NOTE: here binaryGeometry will be copied to main thread
84
+ return {
85
+ binaryDataFromGeoArrow,
86
+ chunkIndex
87
+ };
88
+ }
89
+ return {
90
+ binaryDataFromGeoArrow,
91
+ chunkIndex
92
+ };
93
+ }