@loaders.gl/arrow 4.1.0-alpha.1 → 4.1.0-alpha.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/dist/arrow-loader.d.ts +1 -0
  2. package/dist/arrow-loader.d.ts.map +1 -1
  3. package/dist/arrow-loader.js +1 -1
  4. package/dist/arrow-loader.js.map +1 -1
  5. package/dist/arrow-worker.js +6132 -5809
  6. package/dist/arrow-writer.js +1 -1
  7. package/dist/arrow-writer.js.map +1 -1
  8. package/dist/dist.dev.js +6193 -5806
  9. package/dist/geoarrow/convert-geoarrow-to-binary-geometry.d.ts +4 -2
  10. package/dist/geoarrow/convert-geoarrow-to-binary-geometry.d.ts.map +1 -1
  11. package/dist/geoarrow/convert-geoarrow-to-binary-geometry.js +25 -23
  12. package/dist/geoarrow/convert-geoarrow-to-binary-geometry.js.map +1 -1
  13. package/dist/geoarrow/convert-geoarrow-to-geojson-geometry.d.ts.map +1 -1
  14. package/dist/geoarrow/convert-geoarrow-to-geojson-geometry.js.map +1 -1
  15. package/dist/geoarrow/get-arrow-bounds.d.ts.map +1 -1
  16. package/dist/geoarrow/get-arrow-bounds.js.map +1 -1
  17. package/dist/geoarrow-loader.d.ts.map +1 -1
  18. package/dist/geoarrow-loader.js.map +1 -1
  19. package/dist/geoarrow-writer.js +1 -1
  20. package/dist/geoarrow-writer.js.map +1 -1
  21. package/dist/index.cjs +77 -17
  22. package/dist/index.d.ts +4 -2
  23. package/dist/index.d.ts.map +1 -1
  24. package/dist/index.js +3 -2
  25. package/dist/index.js.map +1 -1
  26. package/dist/lib/arrow-table-batch.d.ts.map +1 -1
  27. package/dist/lib/arrow-table-batch.js.map +1 -1
  28. package/dist/lib/arrow-table.d.ts.map +1 -1
  29. package/dist/lib/arrow-table.js.map +1 -1
  30. package/dist/lib/encode-arrow.d.ts.map +1 -1
  31. package/dist/lib/encode-arrow.js.map +1 -1
  32. package/dist/lib/encode-geoarrow.d.ts.map +1 -1
  33. package/dist/lib/encode-geoarrow.js.map +1 -1
  34. package/dist/parsers/parse-arrow-in-batches.d.ts +2 -1
  35. package/dist/parsers/parse-arrow-in-batches.d.ts.map +1 -1
  36. package/dist/parsers/parse-arrow-in-batches.js +8 -1
  37. package/dist/parsers/parse-arrow-in-batches.js.map +1 -1
  38. package/dist/parsers/parse-arrow-sync.d.ts.map +1 -1
  39. package/dist/parsers/parse-arrow-sync.js.map +1 -1
  40. package/dist/parsers/parse-geoarrow-in-batches.d.ts.map +1 -1
  41. package/dist/parsers/parse-geoarrow-in-batches.js.map +1 -1
  42. package/dist/parsers/parse-geoarrow-sync.d.ts.map +1 -1
  43. package/dist/parsers/parse-geoarrow-sync.js.map +1 -1
  44. package/dist/schema/arrow-type-utils.d.ts.map +1 -1
  45. package/dist/schema/arrow-type-utils.js.map +1 -1
  46. package/dist/schema/convert-arrow-schema.d.ts.map +1 -1
  47. package/dist/schema/convert-arrow-schema.js.map +1 -1
  48. package/dist/tables/convert-arrow-to-columnar-table.d.ts.map +1 -1
  49. package/dist/tables/convert-arrow-to-columnar-table.js.map +1 -1
  50. package/dist/tables/convert-arrow-to-geojson-table.d.ts.map +1 -1
  51. package/dist/tables/convert-arrow-to-geojson-table.js.map +1 -1
  52. package/dist/tables/convert-columnar-to-row-table.d.ts.map +1 -1
  53. package/dist/tables/convert-columnar-to-row-table.js.map +1 -1
  54. package/dist/tables/convert-table-to-arrow.d.ts.map +1 -1
  55. package/dist/tables/convert-table-to-arrow.js.map +1 -1
  56. package/dist/triangulate-on-worker.d.ts +39 -5
  57. package/dist/triangulate-on-worker.d.ts.map +1 -1
  58. package/dist/triangulate-on-worker.js +12 -2
  59. package/dist/triangulate-on-worker.js.map +1 -1
  60. package/dist/triangulation-worker.js +11992 -11
  61. package/dist/types.d.ts.map +1 -1
  62. package/dist/types.js.map +1 -1
  63. package/dist/workers/arrow-worker.js.map +1 -1
  64. package/dist/workers/hard-clone.d.ts +23 -0
  65. package/dist/workers/hard-clone.d.ts.map +1 -0
  66. package/dist/workers/hard-clone.js +57 -0
  67. package/dist/workers/hard-clone.js.map +1 -0
  68. package/dist/workers/triangulation-worker-node.d.ts.map +1 -1
  69. package/dist/workers/triangulation-worker-node.js.map +1 -1
  70. package/dist/workers/triangulation-worker.js +34 -2
  71. package/dist/workers/triangulation-worker.js.map +1 -1
  72. package/package.json +9 -9
  73. package/src/arrow-loader.ts +3 -1
  74. package/src/geoarrow/convert-geoarrow-to-binary-geometry.ts +28 -20
  75. package/src/geoarrow/convert-geoarrow-to-geojson-geometry.ts +2 -1
  76. package/src/geoarrow/get-arrow-bounds.ts +2 -1
  77. package/src/geoarrow-loader.ts +2 -1
  78. package/src/index.ts +10 -3
  79. package/src/lib/arrow-table-batch.ts +2 -1
  80. package/src/lib/arrow-table.ts +2 -1
  81. package/src/lib/encode-arrow.ts +2 -1
  82. package/src/lib/encode-geoarrow.ts +2 -1
  83. package/src/parsers/parse-arrow-in-batches.ts +9 -2
  84. package/src/parsers/parse-arrow-sync.ts +2 -1
  85. package/src/parsers/parse-geoarrow-in-batches.ts +2 -1
  86. package/src/parsers/parse-geoarrow-sync.ts +2 -1
  87. package/src/schema/arrow-type-utils.ts +2 -1
  88. package/src/schema/convert-arrow-schema.ts +2 -1
  89. package/src/tables/convert-arrow-to-columnar-table.ts +2 -1
  90. package/src/tables/convert-arrow-to-geojson-table.ts +2 -1
  91. package/src/tables/convert-columnar-to-row-table.ts +2 -1
  92. package/src/tables/convert-table-to-arrow.ts +2 -1
  93. package/src/triangulate-on-worker.ts +52 -8
  94. package/src/types.ts +2 -1
  95. package/src/workers/arrow-worker.ts +2 -1
  96. package/src/workers/hard-clone.ts +162 -0
  97. package/src/workers/triangulation-worker-node.ts +2 -1
  98. package/src/workers/triangulation-worker.ts +58 -4
@@ -1,19 +1,53 @@
1
- // loaders.gl, MIT license
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
2
3
  // Copyright (c) vis.gl contributors
3
4
 
5
+ import * as arrow from 'apache-arrow';
4
6
  import type {WorkerOptions} from '@loaders.gl/worker-utils';
5
7
  import {processOnWorker} from '@loaders.gl/worker-utils';
8
+ import {BinaryDataFromGeoArrow, GeoArrowEncoding} from '@loaders.gl/arrow';
6
9
 
7
10
  // __VERSION__ is injected by babel-plugin-version-inline
8
11
  // @ts-ignore TS2304: Cannot find name '__VERSION__'.
9
12
  const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
10
13
 
11
- export type TriangulationWorkerInput = TriangulateInput | {operation: 'test'; data: any};
12
- export type TriangulationWorkerOutput = TriangulateResult | {operation: 'test'; data: any};
14
+ export type TriangulationWorkerInput =
15
+ | ({operation: 'triangulate'} & TriangulateInput)
16
+ | ParseGeoArrowInput
17
+ | {operation: 'test'; data: any};
18
+
19
+ export type TriangulationWorkerOutput =
20
+ | ({operation: 'triangulate'} & TriangulateResult)
21
+ | ({operation: 'parse-geoarrow'} & ParseGeoArrowResult)
22
+ | {operation: 'test'; data: any};
23
+
24
+ type GeoArrowChunkData = {
25
+ type: arrow.DataType;
26
+ offset: number;
27
+ length: number;
28
+ nullCount: number;
29
+ buffers: any;
30
+ children: arrow.Data[];
31
+ dictionary?: arrow.Vector;
32
+ };
33
+
34
+ export type ParseGeoArrowInput = {
35
+ operation: 'parse-geoarrow';
36
+ chunkData: GeoArrowChunkData;
37
+ chunkIndex: number;
38
+ chunkOffset: number;
39
+ geometryEncoding: GeoArrowEncoding;
40
+ calculateMeanCenters: boolean;
41
+ triangle: boolean;
42
+ };
43
+
44
+ export type ParseGeoArrowResult = {
45
+ chunkIndex: number;
46
+ binaryDataFromGeoArrow: BinaryDataFromGeoArrow | null;
47
+ };
13
48
 
14
49
  /** Input data for operation: 'triangulate' */
15
50
  export type TriangulateInput = {
16
- operation: 'triangulate';
17
51
  polygonIndices: Uint16Array;
18
52
  primitivePolygonIndices: Int32Array;
19
53
  flatCoordinateArray: Float64Array;
@@ -37,11 +71,21 @@ export const TriangulationWorker = {
37
71
  };
38
72
 
39
73
  /**
40
- * Provide type safety
74
+ * Triangulate a set of polygons on worker, type safe API
41
75
  */
42
76
  export function triangulateOnWorker(
43
- data: TriangulationWorkerInput,
77
+ data: TriangulateInput,
78
+ options: WorkerOptions = {}
79
+ ): Promise<TriangulateResult> {
80
+ return processOnWorker(TriangulationWorker, {...data, operation: 'triangulate'}, options);
81
+ }
82
+
83
+ /**
84
+ * Parse GeoArrow geometry colum on worker, type safe API
85
+ */
86
+ export function parseGeoArrowOnWorker(
87
+ data: ParseGeoArrowInput,
44
88
  options: WorkerOptions = {}
45
- ): Promise<TriangulationWorkerOutput> {
46
- return processOnWorker(TriangulationWorker, data, options);
89
+ ): Promise<ParseGeoArrowResult> {
90
+ return processOnWorker(TriangulationWorker, {...data, operation: 'parse-geoarrow'}, options);
47
91
  }
package/src/types.ts CHANGED
@@ -1,4 +1,5 @@
1
- // loaders.gl, MIT license
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
2
3
  // Copyright (c) vis.gl contributors
3
4
 
4
5
  type TypedIntArray = Int8Array | Uint8Array | Int16Array | Uint16Array | Int32Array | Uint32Array;
@@ -1,4 +1,5 @@
1
- // loaders.gl, MIT license
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
2
3
  // Copyright (c) vis.gl contributors
3
4
 
4
5
  import {createLoaderWorker} from '@loaders.gl/loader-utils';
@@ -0,0 +1,162 @@
1
+ import * as arrow from 'apache-arrow';
2
+ import type {Buffers} from 'apache-arrow/data';
3
+
4
+ type TypedArray =
5
+ | Uint8Array
6
+ | Uint8ClampedArray
7
+ | Uint16Array
8
+ | Uint32Array
9
+ | Int8Array
10
+ | Int16Array
11
+ | Int32Array
12
+ | Float32Array
13
+ | Float64Array;
14
+
15
+ /**
16
+ * Clone an Arrow JS Data or Vector, detaching from an existing ArrayBuffer if
17
+ * it is shared with other.
18
+ *
19
+ * The purpose of this function is to enable transferring a `Data` instance,
20
+ * e.g. to a web worker, without neutering any other data.
21
+ *
22
+ * Any internal buffers that are a slice of a larger `ArrayBuffer` (i.e. where
23
+ * the typed array's `byteOffset` is not `0` and where its `byteLength` does not
24
+ * match its `array.buffer.byteLength`) are copied into new `ArrayBuffers`.
25
+ *
26
+ * If `force` is `true`, always clone internal buffers, even if not shared. If
27
+ * the default, `false`, any internal buffers that are **not** a slice of a
28
+ * larger `ArrayBuffer` will not be copied.
29
+ */
30
+ export function hardClone<T extends arrow.DataType>(
31
+ input: arrow.Data<T>,
32
+ force?: boolean
33
+ ): arrow.Data<T>;
34
+ export function hardClone<T extends arrow.DataType>(
35
+ input: arrow.Vector<T>,
36
+ force?: boolean
37
+ ): arrow.Vector<T>;
38
+
39
+ export function hardClone<T extends arrow.DataType>(
40
+ data: arrow.Data<T> | arrow.Vector<T>,
41
+ force: boolean = false
42
+ ): arrow.Data<T> | arrow.Vector<T> {
43
+ // Check if `data` is an arrow.Vector
44
+ if ('data' in data) {
45
+ return new arrow.Vector(data.data.map((data) => hardClone(data, force)));
46
+ }
47
+
48
+ // Clone each of the children, recursively
49
+ const clonedChildren: arrow.Data[] = [];
50
+ for (const childData of data.children) {
51
+ clonedChildren.push(hardClone(childData, force));
52
+ }
53
+
54
+ // Clone the dictionary if there is one
55
+ let clonedDictionary: arrow.Vector | undefined;
56
+ if (data.dictionary !== undefined) {
57
+ clonedDictionary = hardClone(data.dictionary, force);
58
+ }
59
+
60
+ // Buffers can have up to four entries. Each of these can be `undefined` for
61
+ // one or more array types.
62
+ //
63
+ // - OFFSET: value offsets for variable size list types
64
+ // - DATA: the underlying data
65
+ // - VALIDITY: the null buffer. This may be empty or undefined if all elements
66
+ // are non-null/valid.
67
+ // - TYPE: type ids for a union type.
68
+ const clonedBuffers: Buffers<T> = {
69
+ [arrow.BufferType.OFFSET]: cloneBuffer(data.buffers[arrow.BufferType.OFFSET], force),
70
+ [arrow.BufferType.DATA]: cloneBuffer(data.buffers[arrow.BufferType.DATA], force),
71
+ [arrow.BufferType.VALIDITY]: cloneBuffer(data.buffers[arrow.BufferType.VALIDITY], force),
72
+ [arrow.BufferType.TYPE]: cloneBuffer(data.buffers[arrow.BufferType.TYPE], force)
73
+ };
74
+
75
+ // Note: the data.offset is passed on so that a sliced Data instance will not
76
+ // be "un-sliced". However keep in mind that this means we're cloning the
77
+ // _original backing buffer_, not only the portion of the Data that was
78
+ // sliced.
79
+ return new arrow.Data(
80
+ data.type,
81
+ data.offset,
82
+ data.length,
83
+ // @ts-expect-error _nullCount is protected. We're using it here to mimic
84
+ // `Data.clone`
85
+ data._nullCount,
86
+ clonedBuffers,
87
+ clonedChildren,
88
+ clonedDictionary
89
+ );
90
+ }
91
+
92
+ /**
93
+ * Test whether an arrow.Data instance is a slice of a larger `ArrayBuffer`.
94
+ */
95
+ export function isShared<T extends arrow.DataType>(data: arrow.Data<T> | arrow.Vector<T>): boolean {
96
+ // Loop over arrow.Vector
97
+ if ('data' in data) {
98
+ return data.data.some((data) => isShared(data));
99
+ }
100
+
101
+ // Check child data
102
+ for (const childData of data.children) {
103
+ if (isShared(childData)) {
104
+ return true;
105
+ }
106
+ }
107
+
108
+ // Check dictionary
109
+ if (data.dictionary !== undefined) {
110
+ if (isShared(data.dictionary)) {
111
+ return true;
112
+ }
113
+ }
114
+
115
+ const bufferTypes = [
116
+ arrow.BufferType.OFFSET,
117
+ arrow.BufferType.DATA,
118
+ arrow.BufferType.VALIDITY,
119
+ arrow.BufferType.TYPE
120
+ ];
121
+ for (const bufferType of bufferTypes) {
122
+ if (data.buffers[bufferType] !== undefined && isTypedArraySliced(data.buffers[bufferType])) {
123
+ return true;
124
+ }
125
+ }
126
+
127
+ return false;
128
+ }
129
+
130
+ /**
131
+ * Returns true if the current typed array is a partial slice on a larger
132
+ * ArrayBuffer
133
+ */
134
+ function isTypedArraySliced(arr: TypedArray): boolean {
135
+ return !(arr.byteOffset === 0 && arr.byteLength === arr.buffer.byteLength);
136
+ }
137
+
138
+ /**
139
+ * If a slice of a larger ArrayBuffer, clone to a fresh `ArrayBuffer`.
140
+ *
141
+ * If `force` is `true`, always clone the array, even if not shared.
142
+ */
143
+ function cloneBuffer<A extends TypedArray | undefined>(arr: A, force: boolean): A {
144
+ // Not all buffer types are defined for every type of Arrow array. E.g.
145
+ // `arrow.BufferType.TYPE` is only defined for the Union type.
146
+ if (arr === undefined) {
147
+ return arr;
148
+ }
149
+
150
+ // The current array is not a part of a larger ArrayBuffer, don't clone it
151
+ if (!force && !isTypedArraySliced(arr)) {
152
+ return arr;
153
+ }
154
+
155
+ // Note: TypedArray.slice() **copies** into a new ArrayBuffer
156
+
157
+ // @ts-expect-error 'Uint8Array' is assignable to the constraint of type 'A',
158
+ // but 'A' could be instantiated with a different subtype of constraint
159
+ // 'TypedArray'
160
+ // We know from arr.slice that it will always return the same
161
+ return arr.slice();
162
+ }
@@ -1,4 +1,5 @@
1
- // loaders.gl, MIT license
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
2
3
  // Copyright (c) vis.gl contributors
3
4
 
4
5
  import './triangulation-worker';
@@ -1,12 +1,20 @@
1
- // loaders.gl, MIT license
1
+ // loaders.gl
2
+ // SPDX-License-Identifier: MIT
2
3
  // Copyright (c) vis.gl contributors
3
4
 
5
+ import * as arrow from 'apache-arrow';
4
6
  import {createWorker} from '@loaders.gl/worker-utils';
5
- import {getTriangleIndices} from '../geoarrow/convert-geoarrow-to-binary-geometry';
7
+ import {
8
+ getTriangleIndices,
9
+ getBinaryGeometriesFromArrow,
10
+ BinaryDataFromGeoArrow
11
+ } from '../geoarrow/convert-geoarrow-to-binary-geometry';
6
12
  import type {
7
13
  TriangulationWorkerInput,
8
14
  TriangulateInput,
9
- TriangulateResult
15
+ TriangulateResult,
16
+ ParseGeoArrowInput,
17
+ ParseGeoArrowResult
10
18
  } from '../triangulate-on-worker';
11
19
 
12
20
  createWorker(async (data, options = {}) => {
@@ -17,6 +25,8 @@ createWorker(async (data, options = {}) => {
17
25
  return input;
18
26
  case 'triangulate':
19
27
  return triangulateBatch(data);
28
+ case 'parse-geoarrow':
29
+ return parseGeoArrowBatch(data);
20
30
  default:
21
31
  throw new Error(
22
32
  `TriangulationWorker: Unsupported operation ${operation}. Expected 'triangulate'`
@@ -28,7 +38,6 @@ function triangulateBatch(data: TriangulateInput): TriangulateResult {
28
38
  // Parse any WKT/WKB geometries
29
39
  // Build binary geometries
30
40
  // Call earcut and triangulate
31
- console.error('TriangulationWorker: tessellating batch', data);
32
41
  const triangleIndices = getTriangleIndices(
33
42
  data.polygonIndices,
34
43
  data.primitivePolygonIndices,
@@ -37,3 +46,48 @@ function triangulateBatch(data: TriangulateInput): TriangulateResult {
37
46
  );
38
47
  return {...data, ...(triangleIndices ? {triangleIndices} : {})};
39
48
  }
49
+
50
+ /**
51
+ * Reading the arrow file into memory is very fast. Parsing the geoarrow column is slow, and blocking the main thread.
52
+ * To address this issue, we can move the parsing job from main thread to parallel web workers.
53
+ * Each web worker will parse one chunk/batch of geoarrow column, and return binary geometries to main thread.
54
+ * The app on the main thread will render the binary geometries and the parsing will not block the main thread.
55
+ *
56
+ * @param data
57
+ * @returns
58
+ */
59
+ function parseGeoArrowBatch(data: ParseGeoArrowInput): ParseGeoArrowResult {
60
+ let binaryDataFromGeoArrow: BinaryDataFromGeoArrow | null = null;
61
+ const {chunkData, chunkIndex, chunkOffset, geometryEncoding, calculateMeanCenters, triangle} =
62
+ data;
63
+ // rebuild chunkData that is only for geoarrow column
64
+ const arrowData = new arrow.Data(
65
+ chunkData.type,
66
+ chunkData.offset,
67
+ chunkData.length,
68
+ chunkData.nullCount,
69
+ chunkData.buffers,
70
+ chunkData.children,
71
+ chunkData.dictionary
72
+ );
73
+ // rebuild geometry column with chunkData
74
+ const geometryColumn = arrow.makeVector(arrowData);
75
+ if (geometryColumn) {
76
+ // NOTE: for a rebuild arrow.Vector, there is only one chunk, so chunkIndex is always 0
77
+ const options = {calculateMeanCenters, triangle, chunkIndex: 0, chunkOffset};
78
+ binaryDataFromGeoArrow = getBinaryGeometriesFromArrow(
79
+ geometryColumn,
80
+ geometryEncoding,
81
+ options
82
+ );
83
+ // NOTE: here binaryGeometry will be copied to main thread
84
+ return {
85
+ binaryDataFromGeoArrow,
86
+ chunkIndex
87
+ };
88
+ }
89
+ return {
90
+ binaryDataFromGeoArrow,
91
+ chunkIndex
92
+ };
93
+ }