@loaders.gl/arrow 4.1.0-alpha.1 → 4.1.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/arrow-worker.js +1 -1
- package/dist/dist.dev.js +72 -25
- package/dist/geoarrow/convert-geoarrow-to-binary-geometry.d.ts +4 -2
- package/dist/geoarrow/convert-geoarrow-to-binary-geometry.d.ts.map +1 -1
- package/dist/geoarrow/convert-geoarrow-to-binary-geometry.js +25 -23
- package/dist/geoarrow/convert-geoarrow-to-binary-geometry.js.map +1 -1
- package/dist/index.cjs +69 -16
- package/dist/index.d.ts +4 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -2
- package/dist/index.js.map +1 -1
- package/dist/triangulate-on-worker.d.ts +39 -5
- package/dist/triangulate-on-worker.d.ts.map +1 -1
- package/dist/triangulate-on-worker.js +11 -1
- package/dist/triangulate-on-worker.js.map +1 -1
- package/dist/triangulation-worker.js +11672 -10
- package/dist/workers/hard-clone.d.ts +23 -0
- package/dist/workers/hard-clone.d.ts.map +1 -0
- package/dist/workers/hard-clone.js +57 -0
- package/dist/workers/hard-clone.js.map +1 -0
- package/dist/workers/triangulation-worker.js +34 -1
- package/dist/workers/triangulation-worker.js.map +1 -1
- package/package.json +6 -6
- package/src/geoarrow/convert-geoarrow-to-binary-geometry.ts +26 -19
- package/src/index.ts +8 -2
- package/src/triangulate-on-worker.ts +50 -7
- package/src/workers/hard-clone.ts +162 -0
- package/src/workers/triangulation-worker.ts +56 -2
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import * as arrow from 'apache-arrow';
|
|
2
|
+
/**
|
|
3
|
+
* Clone an Arrow JS Data or Vector, detaching from an existing ArrayBuffer if
|
|
4
|
+
* it is shared with other.
|
|
5
|
+
*
|
|
6
|
+
* The purpose of this function is to enable transferring a `Data` instance,
|
|
7
|
+
* e.g. to a web worker, without neutering any other data.
|
|
8
|
+
*
|
|
9
|
+
* Any internal buffers that are a slice of a larger `ArrayBuffer` (i.e. where
|
|
10
|
+
* the typed array's `byteOffset` is not `0` and where its `byteLength` does not
|
|
11
|
+
* match its `array.buffer.byteLength`) are copied into new `ArrayBuffers`.
|
|
12
|
+
*
|
|
13
|
+
* If `force` is `true`, always clone internal buffers, even if not shared. If
|
|
14
|
+
* the default, `false`, any internal buffers that are **not** a slice of a
|
|
15
|
+
* larger `ArrayBuffer` will not be copied.
|
|
16
|
+
*/
|
|
17
|
+
export declare function hardClone<T extends arrow.DataType>(input: arrow.Data<T>, force?: boolean): arrow.Data<T>;
|
|
18
|
+
export declare function hardClone<T extends arrow.DataType>(input: arrow.Vector<T>, force?: boolean): arrow.Vector<T>;
|
|
19
|
+
/**
|
|
20
|
+
* Test whether an arrow.Data instance is a slice of a larger `ArrayBuffer`.
|
|
21
|
+
*/
|
|
22
|
+
export declare function isShared<T extends arrow.DataType>(data: arrow.Data<T> | arrow.Vector<T>): boolean;
|
|
23
|
+
//# sourceMappingURL=hard-clone.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hard-clone.d.ts","sourceRoot":"","sources":["../../src/workers/hard-clone.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,KAAK,MAAM,cAAc,CAAC;AActC;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,SAAS,CAAC,CAAC,SAAS,KAAK,CAAC,QAAQ,EAChD,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EACpB,KAAK,CAAC,EAAE,OAAO,GACd,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,wBAAgB,SAAS,CAAC,CAAC,SAAS,KAAK,CAAC,QAAQ,EAChD,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EACtB,KAAK,CAAC,EAAE,OAAO,GACd,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;AAuDnB;;GAEG;AACH,wBAAgB,QAAQ,CAAC,CAAC,SAAS,KAAK,CAAC,QAAQ,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,GAAG,OAAO,CAiCjG"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import * as arrow from 'apache-arrow';
|
|
2
|
+
export function hardClone(data) {
|
|
3
|
+
let force = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
|
|
4
|
+
if ('data' in data) {
|
|
5
|
+
return new arrow.Vector(data.data.map(data => hardClone(data, force)));
|
|
6
|
+
}
|
|
7
|
+
const clonedChildren = [];
|
|
8
|
+
for (const childData of data.children) {
|
|
9
|
+
clonedChildren.push(hardClone(childData, force));
|
|
10
|
+
}
|
|
11
|
+
let clonedDictionary;
|
|
12
|
+
if (data.dictionary !== undefined) {
|
|
13
|
+
clonedDictionary = hardClone(data.dictionary, force);
|
|
14
|
+
}
|
|
15
|
+
const clonedBuffers = {
|
|
16
|
+
[arrow.BufferType.OFFSET]: cloneBuffer(data.buffers[arrow.BufferType.OFFSET], force),
|
|
17
|
+
[arrow.BufferType.DATA]: cloneBuffer(data.buffers[arrow.BufferType.DATA], force),
|
|
18
|
+
[arrow.BufferType.VALIDITY]: cloneBuffer(data.buffers[arrow.BufferType.VALIDITY], force),
|
|
19
|
+
[arrow.BufferType.TYPE]: cloneBuffer(data.buffers[arrow.BufferType.TYPE], force)
|
|
20
|
+
};
|
|
21
|
+
return new arrow.Data(data.type, data.offset, data.length, data._nullCount, clonedBuffers, clonedChildren, clonedDictionary);
|
|
22
|
+
}
|
|
23
|
+
export function isShared(data) {
|
|
24
|
+
if ('data' in data) {
|
|
25
|
+
return data.data.some(data => isShared(data));
|
|
26
|
+
}
|
|
27
|
+
for (const childData of data.children) {
|
|
28
|
+
if (isShared(childData)) {
|
|
29
|
+
return true;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
if (data.dictionary !== undefined) {
|
|
33
|
+
if (isShared(data.dictionary)) {
|
|
34
|
+
return true;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const bufferTypes = [arrow.BufferType.OFFSET, arrow.BufferType.DATA, arrow.BufferType.VALIDITY, arrow.BufferType.TYPE];
|
|
38
|
+
for (const bufferType of bufferTypes) {
|
|
39
|
+
if (data.buffers[bufferType] !== undefined && isTypedArraySliced(data.buffers[bufferType])) {
|
|
40
|
+
return true;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
function isTypedArraySliced(arr) {
|
|
46
|
+
return !(arr.byteOffset === 0 && arr.byteLength === arr.buffer.byteLength);
|
|
47
|
+
}
|
|
48
|
+
function cloneBuffer(arr, force) {
|
|
49
|
+
if (arr === undefined) {
|
|
50
|
+
return arr;
|
|
51
|
+
}
|
|
52
|
+
if (!force && !isTypedArraySliced(arr)) {
|
|
53
|
+
return arr;
|
|
54
|
+
}
|
|
55
|
+
return arr.slice();
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=hard-clone.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"hard-clone.js","names":["arrow","hardClone","data","force","arguments","length","undefined","Vector","map","clonedChildren","childData","children","push","clonedDictionary","dictionary","clonedBuffers","BufferType","OFFSET","cloneBuffer","buffers","DATA","VALIDITY","TYPE","Data","type","offset","_nullCount","isShared","some","bufferTypes","bufferType","isTypedArraySliced","arr","byteOffset","byteLength","buffer","slice"],"sources":["../../src/workers/hard-clone.ts"],"sourcesContent":["import * as arrow from 'apache-arrow';\nimport type {Buffers} from 'apache-arrow/data';\n\ntype TypedArray =\n | Uint8Array\n | Uint8ClampedArray\n | Uint16Array\n | Uint32Array\n | Int8Array\n | Int16Array\n | Int32Array\n | Float32Array\n | Float64Array;\n\n/**\n * Clone an Arrow JS Data or Vector, detaching from an existing ArrayBuffer if\n * it is shared with other.\n *\n * The purpose of this function is to enable transferring a `Data` instance,\n * e.g. to a web worker, without neutering any other data.\n *\n * Any internal buffers that are a slice of a larger `ArrayBuffer` (i.e. where\n * the typed array's `byteOffset` is not `0` and where its `byteLength` does not\n * match its `array.buffer.byteLength`) are copied into new `ArrayBuffers`.\n *\n * If `force` is `true`, always clone internal buffers, even if not shared. If\n * the default, `false`, any internal buffers that are **not** a slice of a\n * larger `ArrayBuffer` will not be copied.\n */\nexport function hardClone<T extends arrow.DataType>(\n input: arrow.Data<T>,\n force?: boolean\n): arrow.Data<T>;\nexport function hardClone<T extends arrow.DataType>(\n input: arrow.Vector<T>,\n force?: boolean\n): arrow.Vector<T>;\n\nexport function hardClone<T extends arrow.DataType>(\n data: arrow.Data<T> | arrow.Vector<T>,\n force: boolean = false\n): arrow.Data<T> | arrow.Vector<T> {\n // Check if `data` is an arrow.Vector\n if ('data' in data) {\n return new arrow.Vector(data.data.map((data) => hardClone(data, force)));\n }\n\n // Clone each of the children, recursively\n const clonedChildren: arrow.Data[] = [];\n for (const childData of data.children) {\n clonedChildren.push(hardClone(childData, force));\n }\n\n // Clone the dictionary if there is one\n let clonedDictionary: arrow.Vector | undefined;\n if (data.dictionary !== undefined) {\n clonedDictionary = hardClone(data.dictionary, force);\n }\n\n // Buffers can have up to four entries. Each of these can be `undefined` for\n // one or more array types.\n //\n // - OFFSET: value offsets for variable size list types\n // - DATA: the underlying data\n // - VALIDITY: the null buffer. This may be empty or undefined if all elements\n // are non-null/valid.\n // - TYPE: type ids for a union type.\n const clonedBuffers: Buffers<T> = {\n [arrow.BufferType.OFFSET]: cloneBuffer(data.buffers[arrow.BufferType.OFFSET], force),\n [arrow.BufferType.DATA]: cloneBuffer(data.buffers[arrow.BufferType.DATA], force),\n [arrow.BufferType.VALIDITY]: cloneBuffer(data.buffers[arrow.BufferType.VALIDITY], force),\n [arrow.BufferType.TYPE]: cloneBuffer(data.buffers[arrow.BufferType.TYPE], force)\n };\n\n // Note: the data.offset is passed on so that a sliced Data instance will not\n // be \"un-sliced\". However keep in mind that this means we're cloning the\n // _original backing buffer_, not only the portion of the Data that was\n // sliced.\n return new arrow.Data(\n data.type,\n data.offset,\n data.length,\n // @ts-expect-error _nullCount is protected. We're using it here to mimic\n // `Data.clone`\n data._nullCount,\n clonedBuffers,\n clonedChildren,\n clonedDictionary\n );\n}\n\n/**\n * Test whether an arrow.Data instance is a slice of a larger `ArrayBuffer`.\n */\nexport function isShared<T extends arrow.DataType>(data: arrow.Data<T> | arrow.Vector<T>): boolean {\n // Loop over arrow.Vector\n if ('data' in data) {\n return data.data.some((data) => isShared(data));\n }\n\n // Check child data\n for (const childData of data.children) {\n if (isShared(childData)) {\n return true;\n }\n }\n\n // Check dictionary\n if (data.dictionary !== undefined) {\n if (isShared(data.dictionary)) {\n return true;\n }\n }\n\n const bufferTypes = [\n arrow.BufferType.OFFSET,\n arrow.BufferType.DATA,\n arrow.BufferType.VALIDITY,\n arrow.BufferType.TYPE\n ];\n for (const bufferType of bufferTypes) {\n if (data.buffers[bufferType] !== undefined && isTypedArraySliced(data.buffers[bufferType])) {\n return true;\n }\n }\n\n return false;\n}\n\n/**\n * Returns true if the current typed array is a partial slice on a larger\n * ArrayBuffer\n */\nfunction isTypedArraySliced(arr: TypedArray): boolean {\n return !(arr.byteOffset === 0 && arr.byteLength === arr.buffer.byteLength);\n}\n\n/**\n * If a slice of a larger ArrayBuffer, clone to a fresh `ArrayBuffer`.\n *\n * If `force` is `true`, always clone the array, even if not shared.\n */\nfunction cloneBuffer<A extends TypedArray | undefined>(arr: A, force: boolean): A {\n // Not all buffer types are defined for every type of Arrow array. E.g.\n // `arrow.BufferType.TYPE` is only defined for the Union type.\n if (arr === undefined) {\n return arr;\n }\n\n // The current array is not a part of a larger ArrayBuffer, don't clone it\n if (!force && !isTypedArraySliced(arr)) {\n return arr;\n }\n\n // Note: TypedArray.slice() **copies** into a new ArrayBuffer\n\n // @ts-expect-error 'Uint8Array' is assignable to the constraint of type 'A',\n // but 'A' could be instantiated with a different subtype of constraint\n // 'TypedArray'\n // We know from arr.slice that it will always return the same\n return arr.slice();\n}\n"],"mappings":"AAAA,OAAO,KAAKA,KAAK,MAAM,cAAc;AAsCrC,OAAO,SAASC,SAASA,CACvBC,IAAqC,EAEJ;EAAA,IADjCC,KAAc,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,KAAK;EAGtB,IAAI,MAAM,IAAIF,IAAI,EAAE;IAClB,OAAO,IAAIF,KAAK,CAACO,MAAM,CAACL,IAAI,CAACA,IAAI,CAACM,GAAG,CAAEN,IAAI,IAAKD,SAAS,CAACC,IAAI,EAAEC,KAAK,CAAC,CAAC,CAAC;EAC1E;EAGA,MAAMM,cAA4B,GAAG,EAAE;EACvC,KAAK,MAAMC,SAAS,IAAIR,IAAI,CAACS,QAAQ,EAAE;IACrCF,cAAc,CAACG,IAAI,CAACX,SAAS,CAACS,SAAS,EAAEP,KAAK,CAAC,CAAC;EAClD;EAGA,IAAIU,gBAA0C;EAC9C,IAAIX,IAAI,CAACY,UAAU,KAAKR,SAAS,EAAE;IACjCO,gBAAgB,GAAGZ,SAAS,CAACC,IAAI,CAACY,UAAU,EAAEX,KAAK,CAAC;EACtD;EAUA,MAAMY,aAAyB,GAAG;IAChC,CAACf,KAAK,CAACgB,UAAU,CAACC,MAAM,GAAGC,WAAW,CAAChB,IAAI,CAACiB,OAAO,CAACnB,KAAK,CAACgB,UAAU,CAACC,MAAM,CAAC,EAAEd,KAAK,CAAC;IACpF,CAACH,KAAK,CAACgB,UAAU,CAACI,IAAI,GAAGF,WAAW,CAAChB,IAAI,CAACiB,OAAO,CAACnB,KAAK,CAACgB,UAAU,CAACI,IAAI,CAAC,EAAEjB,KAAK,CAAC;IAChF,CAACH,KAAK,CAACgB,UAAU,CAACK,QAAQ,GAAGH,WAAW,CAAChB,IAAI,CAACiB,OAAO,CAACnB,KAAK,CAACgB,UAAU,CAACK,QAAQ,CAAC,EAAElB,KAAK,CAAC;IACxF,CAACH,KAAK,CAACgB,UAAU,CAACM,IAAI,GAAGJ,WAAW,CAAChB,IAAI,CAACiB,OAAO,CAACnB,KAAK,CAACgB,UAAU,CAACM,IAAI,CAAC,EAAEnB,KAAK;EACjF,CAAC;EAMD,OAAO,IAAIH,KAAK,CAACuB,IAAI,CACnBrB,IAAI,CAACsB,IAAI,EACTtB,IAAI,CAACuB,MAAM,EACXvB,IAAI,CAACG,MAAM,EAGXH,IAAI,CAACwB,UAAU,EACfX,aAAa,EACbN,cAAc,EACdI,gBACF,CAAC;AACH;AAKA,OAAO,SAASc,QAAQA,CAA2BzB,IAAqC,EAAW;EAEjG,IAAI,MAAM,IAAIA,IAAI,EAAE;IAClB,OAAOA,IAAI,CAACA,IAAI,CAAC0B,IAAI,CAAE1B,IAAI,IAAKyB,QAAQ,CAACzB,IAAI,CAAC,CAAC;EACjD;EAGA,KAAK,MAAMQ,SAAS,IAAIR,IAAI,CAACS,QAAQ,EAAE;IACrC,IAAIgB,QAAQ,CAACjB,SAAS,CAAC,EAAE;MACvB,OAAO,IAAI;IACb;EACF;EAGA,IAAIR,IAAI,CAACY,UAAU,KAAKR,SAAS,EAAE;IACjC,IAAIqB,QAAQ,CAACzB,IAAI,CAACY,UAAU,CAAC,EAAE;MAC7B,OAAO,IAAI;IACb;EACF;EAEA,MAAMe,WAAW,GAAG,CAClB7B,KAAK,CAACgB,UAAU,CAACC,MAAM,EACvBjB,KAAK,CAACgB,UAAU,CAACI,IAAI,EACrBpB,KAAK,CAACgB,UAAU,CAACK,QAAQ,EACzBrB,KAAK,CAACgB,UAAU,CAACM,IAAI,CACtB;EACD,KAAK,MAAMQ,UAAU,IAAID,WAAW,EAAE;IACpC,IAAI3B,IAAI,CAACiB,OAAO,CAACW,UAAU,CAAC,KAAKxB,SAAS,IAAIyB,kBAAkB,CAAC7B,IAAI,CAACiB,OAAO,CAACW,UAAU,CAAC,CAAC,EAAE;MAC1F,OAAO,IAAI;IACb;EACF;EAEA,OAAO,KAAK;AACd;AAMA,SAASC,kBAAkBA,CAACC,GAAe,EAAW;EACpD,OAAO,EAAEA,GAAG,CAACC,UAAU,KAAK,CAAC,IAAID,GAAG,CAACE,UAAU,KAAKF,GAAG,CAACG,MAAM,CAACD,UAAU,CAAC;AAC5E;AAOA,SAAShB,WAAWA,CAAmCc,GAAM,EAAE7B,KAAc,EAAK;EAGhF,IAAI6B,GAAG,KAAK1B,SAAS,EAAE;IACrB,OAAO0B,GAAG;EACZ;EAGA,IAAI,CAAC7B,KAAK,IAAI,CAAC4B,kBAAkB,CAACC,GAAG,CAAC,EAAE;IACtC,OAAOA,GAAG;EACZ;EAQA,OAAOA,GAAG,CAACI,KAAK,CAAC,CAAC;AACpB"}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
import * as arrow from 'apache-arrow';
|
|
1
2
|
import { createWorker } from '@loaders.gl/worker-utils';
|
|
2
|
-
import { getTriangleIndices } from "../geoarrow/convert-geoarrow-to-binary-geometry.js";
|
|
3
|
+
import { getTriangleIndices, getBinaryGeometriesFromArrow } from "../geoarrow/convert-geoarrow-to-binary-geometry.js";
|
|
3
4
|
createWorker(async function (data) {
|
|
4
5
|
let options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
|
|
5
6
|
const input = data;
|
|
@@ -9,6 +10,8 @@ createWorker(async function (data) {
|
|
|
9
10
|
return input;
|
|
10
11
|
case 'triangulate':
|
|
11
12
|
return triangulateBatch(data);
|
|
13
|
+
case 'parse-geoarrow':
|
|
14
|
+
return parseGeoArrowBatch(data);
|
|
12
15
|
default:
|
|
13
16
|
throw new Error(`TriangulationWorker: Unsupported operation ${operation}. Expected 'triangulate'`);
|
|
14
17
|
}
|
|
@@ -23,4 +26,34 @@ function triangulateBatch(data) {
|
|
|
23
26
|
} : {})
|
|
24
27
|
};
|
|
25
28
|
}
|
|
29
|
+
function parseGeoArrowBatch(data) {
|
|
30
|
+
let binaryDataFromGeoArrow = null;
|
|
31
|
+
const {
|
|
32
|
+
chunkData,
|
|
33
|
+
chunkIndex,
|
|
34
|
+
chunkOffset,
|
|
35
|
+
geometryEncoding,
|
|
36
|
+
calculateMeanCenters,
|
|
37
|
+
triangle
|
|
38
|
+
} = data;
|
|
39
|
+
const arrowData = new arrow.Data(chunkData.type, chunkData.offset, chunkData.length, chunkData.nullCount, chunkData.buffers, chunkData.children, chunkData.dictionary);
|
|
40
|
+
const geometryColumn = arrow.makeVector(arrowData);
|
|
41
|
+
if (geometryColumn) {
|
|
42
|
+
const options = {
|
|
43
|
+
calculateMeanCenters,
|
|
44
|
+
triangle,
|
|
45
|
+
chunkIndex: 0,
|
|
46
|
+
chunkOffset
|
|
47
|
+
};
|
|
48
|
+
binaryDataFromGeoArrow = getBinaryGeometriesFromArrow(geometryColumn, geometryEncoding, options);
|
|
49
|
+
return {
|
|
50
|
+
binaryDataFromGeoArrow,
|
|
51
|
+
chunkIndex
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
return {
|
|
55
|
+
binaryDataFromGeoArrow,
|
|
56
|
+
chunkIndex
|
|
57
|
+
};
|
|
58
|
+
}
|
|
26
59
|
//# sourceMappingURL=triangulation-worker.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"triangulation-worker.js","names":["createWorker","getTriangleIndices","data","options","arguments","length","undefined","input","operation","triangulateBatch","Error","console","error","triangleIndices","polygonIndices","primitivePolygonIndices","flatCoordinateArray","nDim"],"sources":["../../src/workers/triangulation-worker.ts"],"sourcesContent":["// loaders.gl, MIT license\n// Copyright (c) vis.gl contributors\n\nimport {createWorker} from '@loaders.gl/worker-utils';\nimport {getTriangleIndices} from '../geoarrow/convert-geoarrow-to-binary-geometry';\nimport type {\n TriangulationWorkerInput,\n TriangulateInput,\n TriangulateResult\n} from '../triangulate-on-worker';\n\ncreateWorker(async (data, options = {}) => {\n const input = data as TriangulationWorkerInput;\n const operation = input?.operation;\n switch (operation) {\n case 'test':\n return input;\n case 'triangulate':\n return triangulateBatch(data);\n default:\n throw new Error(\n `TriangulationWorker: Unsupported operation ${operation}. Expected 'triangulate'`\n );\n }\n});\n\nfunction triangulateBatch(data: TriangulateInput): TriangulateResult {\n // Parse any WKT/WKB geometries\n // Build binary geometries\n // Call earcut and triangulate\n console.error('TriangulationWorker: tessellating batch', data);\n const triangleIndices = getTriangleIndices(\n data.polygonIndices,\n data.primitivePolygonIndices,\n data.flatCoordinateArray,\n data.nDim\n );\n return {...data, ...(triangleIndices ? {triangleIndices} : {})};\n}\n"],"mappings":"AAGA,
|
|
1
|
+
{"version":3,"file":"triangulation-worker.js","names":["arrow","createWorker","getTriangleIndices","getBinaryGeometriesFromArrow","data","options","arguments","length","undefined","input","operation","triangulateBatch","parseGeoArrowBatch","Error","console","error","triangleIndices","polygonIndices","primitivePolygonIndices","flatCoordinateArray","nDim","binaryDataFromGeoArrow","chunkData","chunkIndex","chunkOffset","geometryEncoding","calculateMeanCenters","triangle","arrowData","Data","type","offset","nullCount","buffers","children","dictionary","geometryColumn","makeVector"],"sources":["../../src/workers/triangulation-worker.ts"],"sourcesContent":["// loaders.gl, MIT license\n// Copyright (c) vis.gl contributors\n\nimport * as arrow from 'apache-arrow';\nimport {createWorker} from '@loaders.gl/worker-utils';\nimport {\n getTriangleIndices,\n getBinaryGeometriesFromArrow,\n BinaryDataFromGeoArrow\n} from '../geoarrow/convert-geoarrow-to-binary-geometry';\nimport type {\n TriangulationWorkerInput,\n TriangulateInput,\n TriangulateResult,\n ParseGeoArrowInput,\n ParseGeoArrowResult\n} from '../triangulate-on-worker';\n\ncreateWorker(async (data, options = {}) => {\n const input = data as TriangulationWorkerInput;\n const operation = input?.operation;\n switch (operation) {\n case 'test':\n return input;\n case 'triangulate':\n return triangulateBatch(data);\n case 'parse-geoarrow':\n return parseGeoArrowBatch(data);\n default:\n throw new Error(\n `TriangulationWorker: Unsupported operation ${operation}. Expected 'triangulate'`\n );\n }\n});\n\nfunction triangulateBatch(data: TriangulateInput): TriangulateResult {\n // Parse any WKT/WKB geometries\n // Build binary geometries\n // Call earcut and triangulate\n console.error('TriangulationWorker: tessellating batch', data);\n const triangleIndices = getTriangleIndices(\n data.polygonIndices,\n data.primitivePolygonIndices,\n data.flatCoordinateArray,\n data.nDim\n );\n return {...data, ...(triangleIndices ? {triangleIndices} : {})};\n}\n\n/**\n * Reading the arrow file into memory is very fast. Parsing the geoarrow column is slow, and blocking the main thread.\n * To address this issue, we can move the parsing job from main thread to parallel web workers.\n * Each web worker will parse one chunk/batch of geoarrow column, and return binary geometries to main thread.\n * The app on the main thread will render the binary geometries and the parsing will not block the main thread.\n *\n * @param data\n * @returns\n */\nfunction parseGeoArrowBatch(data: ParseGeoArrowInput): ParseGeoArrowResult {\n let binaryDataFromGeoArrow: BinaryDataFromGeoArrow | null = null;\n const {chunkData, chunkIndex, chunkOffset, geometryEncoding, calculateMeanCenters, triangle} =\n data;\n // rebuild chunkData that is only for geoarrow column\n const arrowData = new arrow.Data(\n chunkData.type,\n chunkData.offset,\n chunkData.length,\n chunkData.nullCount,\n chunkData.buffers,\n chunkData.children,\n chunkData.dictionary\n );\n // rebuild geometry column with chunkData\n const geometryColumn = arrow.makeVector(arrowData);\n if (geometryColumn) {\n // NOTE: for a rebuild arrow.Vector, there is only one chunk, so chunkIndex is always 0\n const options = {calculateMeanCenters, triangle, chunkIndex: 0, chunkOffset};\n binaryDataFromGeoArrow = getBinaryGeometriesFromArrow(\n geometryColumn,\n geometryEncoding,\n options\n );\n // NOTE: here binaryGeometry will be copied to main thread\n return {\n binaryDataFromGeoArrow,\n chunkIndex\n };\n }\n return {\n binaryDataFromGeoArrow,\n chunkIndex\n };\n}\n"],"mappings":"AAGA,OAAO,KAAKA,KAAK,MAAM,cAAc;AACrC,SAAQC,YAAY,QAAO,0BAA0B;AAAC,SAEpDC,kBAAkB,EAClBC,4BAA4B;AAW9BF,YAAY,CAAC,gBAAOG,IAAI,EAAmB;EAAA,IAAjBC,OAAO,GAAAC,SAAA,CAAAC,MAAA,QAAAD,SAAA,QAAAE,SAAA,GAAAF,SAAA,MAAG,CAAC,CAAC;EACpC,MAAMG,KAAK,GAAGL,IAAgC;EAC9C,MAAMM,SAAS,GAAGD,KAAK,aAALA,KAAK,uBAALA,KAAK,CAAEC,SAAS;EAClC,QAAQA,SAAS;IACf,KAAK,MAAM;MACT,OAAOD,KAAK;IACd,KAAK,aAAa;MAChB,OAAOE,gBAAgB,CAACP,IAAI,CAAC;IAC/B,KAAK,gBAAgB;MACnB,OAAOQ,kBAAkB,CAACR,IAAI,CAAC;IACjC;MACE,MAAM,IAAIS,KAAK,CACZ,8CAA6CH,SAAU,0BAC1D,CAAC;EACL;AACF,CAAC,CAAC;AAEF,SAASC,gBAAgBA,CAACP,IAAsB,EAAqB;EAInEU,OAAO,CAACC,KAAK,CAAC,yCAAyC,EAAEX,IAAI,CAAC;EAC9D,MAAMY,eAAe,GAAGd,kBAAkB,CACxCE,IAAI,CAACa,cAAc,EACnBb,IAAI,CAACc,uBAAuB,EAC5Bd,IAAI,CAACe,mBAAmB,EACxBf,IAAI,CAACgB,IACP,CAAC;EACD,OAAO;IAAC,GAAGhB,IAAI;IAAE,IAAIY,eAAe,GAAG;MAACA;IAAe,CAAC,GAAG,CAAC,CAAC;EAAC,CAAC;AACjE;AAWA,SAASJ,kBAAkBA,CAACR,IAAwB,EAAuB;EACzE,IAAIiB,sBAAqD,GAAG,IAAI;EAChE,MAAM;IAACC,SAAS;IAAEC,UAAU;IAAEC,WAAW;IAAEC,gBAAgB;IAAEC,oBAAoB;IAAEC;EAAQ,CAAC,GAC1FvB,IAAI;EAEN,MAAMwB,SAAS,GAAG,IAAI5B,KAAK,CAAC6B,IAAI,CAC9BP,SAAS,CAACQ,IAAI,EACdR,SAAS,CAACS,MAAM,EAChBT,SAAS,CAACf,MAAM,EAChBe,SAAS,CAACU,SAAS,EACnBV,SAAS,CAACW,OAAO,EACjBX,SAAS,CAACY,QAAQ,EAClBZ,SAAS,CAACa,UACZ,CAAC;EAED,MAAMC,cAAc,GAAGpC,KAAK,CAACqC,UAAU,CAACT,SAAS,CAAC;EAClD,IAAIQ,cAAc,EAAE;IAElB,MAAM/B,OAAO,GAAG;MAACqB,oBAAoB;MAAEC,QAAQ;MAAEJ,UAAU,EAAE,CAAC;MAAEC;IAAW,CAAC;IAC5EH,sBAAsB,GAAGlB,4BAA4B,CACnDiC,cAAc,EACdX,gBAAgB,EAChBpB,OACF,CAAC;IAED,OAAO;MACLgB,sBAAsB;MACtBE;IACF,CAAC;EACH;EACA,OAAO;IACLF,sBAAsB;IACtBE;EACF,CAAC;AACH"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@loaders.gl/arrow",
|
|
3
|
-
"version": "4.1.0-alpha.
|
|
3
|
+
"version": "4.1.0-alpha.2",
|
|
4
4
|
"description": "Simple columnar table loader for the Apache Arrow format",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|
|
@@ -52,12 +52,12 @@
|
|
|
52
52
|
"build-worker2": "esbuild src/workers/arrow-worker.ts --bundle --outfile=dist/arrow-worker.js --platform=browser --external:{stream}"
|
|
53
53
|
},
|
|
54
54
|
"dependencies": {
|
|
55
|
-
"@loaders.gl/gis": "4.1.0-alpha.
|
|
56
|
-
"@loaders.gl/loader-utils": "4.1.0-alpha.
|
|
57
|
-
"@loaders.gl/schema": "4.1.0-alpha.
|
|
58
|
-
"@loaders.gl/wkt": "4.1.0-alpha.
|
|
55
|
+
"@loaders.gl/gis": "4.1.0-alpha.2",
|
|
56
|
+
"@loaders.gl/loader-utils": "4.1.0-alpha.2",
|
|
57
|
+
"@loaders.gl/schema": "4.1.0-alpha.2",
|
|
58
|
+
"@loaders.gl/wkt": "4.1.0-alpha.2",
|
|
59
59
|
"@math.gl/polygon": "4.0.0",
|
|
60
60
|
"apache-arrow": "^13.0.0"
|
|
61
61
|
},
|
|
62
|
-
"gitHead": "
|
|
62
|
+
"gitHead": "a248382edd20e846c1ccb23c15d089fb9b368dbc"
|
|
63
63
|
}
|
|
@@ -45,7 +45,7 @@ type BinaryGeometryContent = {
|
|
|
45
45
|
geomOffset: Int32Array;
|
|
46
46
|
/** Array of geometry indicies: the start index of each geometry */
|
|
47
47
|
geometryIndicies: Uint16Array;
|
|
48
|
-
/** (Optional) indices of triangels returned from polygon
|
|
48
|
+
/** (Optional) indices of triangels returned from polygon triangulation (Polygon only) */
|
|
49
49
|
triangles?: Uint32Array;
|
|
50
50
|
/** (Optional) array of mean center of each geometry */
|
|
51
51
|
meanCenters?: Float64Array;
|
|
@@ -54,17 +54,21 @@ type BinaryGeometryContent = {
|
|
|
54
54
|
/**
|
|
55
55
|
* binary geometry template, see deck.gl BinaryGeometry
|
|
56
56
|
*/
|
|
57
|
-
export
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
}
|
|
57
|
+
export function getBinaryGeometryTemplate() {
|
|
58
|
+
return {
|
|
59
|
+
globalFeatureIds: {value: new Uint32Array(0), size: 1},
|
|
60
|
+
positions: {value: new Float32Array(0), size: 2},
|
|
61
|
+
properties: [],
|
|
62
|
+
numericProps: {},
|
|
63
|
+
featureIds: {value: new Uint32Array(0), size: 1}
|
|
64
|
+
};
|
|
65
|
+
}
|
|
64
66
|
|
|
65
67
|
export type BinaryGeometriesFromArrowOptions = {
|
|
66
68
|
/** option to specify which chunk to get binary geometries from, for progressive rendering */
|
|
67
69
|
chunkIndex?: number;
|
|
70
|
+
/** The offset (beginning index of rows) of input chunk. Used for reconstructing globalFeatureIds in web workers */
|
|
71
|
+
chunkOffset?: number;
|
|
68
72
|
/** option to get mean centers from geometries, for polygon filtering */
|
|
69
73
|
calculateMeanCenters?: boolean;
|
|
70
74
|
/** option to compute the triangle indices by tesselating polygons */
|
|
@@ -90,9 +94,12 @@ export function getBinaryGeometriesFromArrow(
|
|
|
90
94
|
line: geoEncoding === 'geoarrow.multilinestring' || geoEncoding === 'geoarrow.linestring'
|
|
91
95
|
};
|
|
92
96
|
|
|
93
|
-
const chunks =
|
|
97
|
+
const chunks =
|
|
98
|
+
options?.chunkIndex !== undefined && options?.chunkIndex >= 0
|
|
99
|
+
? [geoColumn.data[options?.chunkIndex]]
|
|
100
|
+
: geoColumn.data;
|
|
94
101
|
let bounds: [number, number, number, number] = [Infinity, Infinity, -Infinity, -Infinity];
|
|
95
|
-
let globalFeatureIdOffset = 0;
|
|
102
|
+
let globalFeatureIdOffset = options?.chunkOffset || 0;
|
|
96
103
|
const binaryGeometries: BinaryFeatures[] = [];
|
|
97
104
|
|
|
98
105
|
chunks.forEach((chunk) => {
|
|
@@ -111,6 +118,7 @@ export function getBinaryGeometriesFromArrow(
|
|
|
111
118
|
size: nDim
|
|
112
119
|
},
|
|
113
120
|
featureIds: {value: featureIds, size: 1},
|
|
121
|
+
// eslint-disable-next-line no-loop-func
|
|
114
122
|
properties: [...Array(chunk.length).keys()].map((i) => ({
|
|
115
123
|
index: i + globalFeatureIdOffset
|
|
116
124
|
}))
|
|
@@ -123,18 +131,18 @@ export function getBinaryGeometriesFromArrow(
|
|
|
123
131
|
shape: 'binary-feature-collection',
|
|
124
132
|
points: {
|
|
125
133
|
type: 'Point',
|
|
126
|
-
...
|
|
134
|
+
...getBinaryGeometryTemplate(),
|
|
127
135
|
...(featureTypes.point ? binaryContent : {})
|
|
128
136
|
},
|
|
129
137
|
lines: {
|
|
130
138
|
type: 'LineString',
|
|
131
|
-
...
|
|
139
|
+
...getBinaryGeometryTemplate(),
|
|
132
140
|
...(featureTypes.line ? binaryContent : {}),
|
|
133
141
|
pathIndices: {value: featureTypes.line ? geomOffset : new Uint16Array(0), size: 1}
|
|
134
142
|
},
|
|
135
143
|
polygons: {
|
|
136
144
|
type: 'Polygon',
|
|
137
|
-
...
|
|
145
|
+
...getBinaryGeometryTemplate(),
|
|
138
146
|
...(featureTypes.polygon ? binaryContent : {}),
|
|
139
147
|
polygonIndices: {
|
|
140
148
|
// use geomOffset as polygonIndices same as primitivePolygonIndices since we are using earcut to get triangule indices
|
|
@@ -281,7 +289,7 @@ function getBinaryGeometriesFromChunk(
|
|
|
281
289
|
* @param primitivePolygonIndices Indices within positions of the start of each primitive Polygon/ring
|
|
282
290
|
* @param flatCoordinateArray Array of x, y or x, y, z positions
|
|
283
291
|
* @param nDim - number of dimensions per position
|
|
284
|
-
* @returns
|
|
292
|
+
* @returns triangle indices or null if invalid polygon and earcut fails
|
|
285
293
|
*/
|
|
286
294
|
export function getTriangleIndices(
|
|
287
295
|
polygonIndices: Uint16Array,
|
|
@@ -306,13 +314,14 @@ export function getTriangleIndices(
|
|
|
306
314
|
}
|
|
307
315
|
primitiveIndex++;
|
|
308
316
|
}
|
|
317
|
+
// TODO check if each ring is closed
|
|
309
318
|
const triangleIndices = earcut(
|
|
310
319
|
slicedFlatCoords,
|
|
311
320
|
holeIndices.length > 0 ? holeIndices : undefined,
|
|
312
321
|
nDim
|
|
313
322
|
);
|
|
314
323
|
if (triangleIndices.length === 0) {
|
|
315
|
-
throw Error('
|
|
324
|
+
throw Error('earcut failed e.g. invalid polygon');
|
|
316
325
|
}
|
|
317
326
|
for (let j = 0; j < triangleIndices.length; j++) {
|
|
318
327
|
triangles.push(triangleIndices[j] + startIdx);
|
|
@@ -325,9 +334,7 @@ export function getTriangleIndices(
|
|
|
325
334
|
}
|
|
326
335
|
return trianglesUint32;
|
|
327
336
|
} catch (error) {
|
|
328
|
-
//
|
|
329
|
-
// there is an expection when tesselating invalid polygon, e.g. polygon with self-intersection
|
|
330
|
-
// return null to skip tesselating
|
|
337
|
+
// if earcut fails, return null
|
|
331
338
|
return null;
|
|
332
339
|
}
|
|
333
340
|
}
|
|
@@ -379,8 +386,8 @@ function getBinaryPolygonsFromChunk(
|
|
|
379
386
|
|
|
380
387
|
return {
|
|
381
388
|
featureIds,
|
|
382
|
-
flatCoordinateArray,
|
|
383
389
|
nDim,
|
|
390
|
+
flatCoordinateArray,
|
|
384
391
|
geomOffset,
|
|
385
392
|
geometryIndicies,
|
|
386
393
|
...(options?.triangulate && triangles ? {triangles} : {})
|
package/src/index.ts
CHANGED
|
@@ -52,7 +52,7 @@ export type {
|
|
|
52
52
|
BinaryGeometriesFromArrowOptions
|
|
53
53
|
} from './geoarrow/convert-geoarrow-to-binary-geometry';
|
|
54
54
|
export {
|
|
55
|
-
|
|
55
|
+
getBinaryGeometryTemplate,
|
|
56
56
|
getBinaryGeometriesFromArrow,
|
|
57
57
|
getTriangleIndices,
|
|
58
58
|
getMeanCentersFromBinaryGeometries
|
|
@@ -65,5 +65,11 @@ export {parseGeometryFromArrow} from './geoarrow/convert-geoarrow-to-geojson-geo
|
|
|
65
65
|
export {convertArrowToGeoJSONTable} from './tables/convert-arrow-to-geojson-table';
|
|
66
66
|
|
|
67
67
|
// EXPERIMENTAL WORKER
|
|
68
|
+
export {hardClone} from './workers/hard-clone';
|
|
68
69
|
|
|
69
|
-
export {
|
|
70
|
+
export type {ParseGeoArrowInput, ParseGeoArrowResult} from './triangulate-on-worker';
|
|
71
|
+
export {
|
|
72
|
+
TriangulationWorker,
|
|
73
|
+
triangulateOnWorker,
|
|
74
|
+
parseGeoArrowOnWorker
|
|
75
|
+
} from './triangulate-on-worker';
|
|
@@ -1,19 +1,52 @@
|
|
|
1
1
|
// loaders.gl, MIT license
|
|
2
2
|
// Copyright (c) vis.gl contributors
|
|
3
3
|
|
|
4
|
+
import * as arrow from 'apache-arrow';
|
|
4
5
|
import type {WorkerOptions} from '@loaders.gl/worker-utils';
|
|
5
6
|
import {processOnWorker} from '@loaders.gl/worker-utils';
|
|
7
|
+
import {BinaryDataFromGeoArrow, GeoArrowEncoding} from '@loaders.gl/arrow';
|
|
6
8
|
|
|
7
9
|
// __VERSION__ is injected by babel-plugin-version-inline
|
|
8
10
|
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
|
|
9
11
|
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';
|
|
10
12
|
|
|
11
|
-
export type TriangulationWorkerInput =
|
|
12
|
-
|
|
13
|
+
export type TriangulationWorkerInput =
|
|
14
|
+
| ({operation: 'triangulate'} & TriangulateInput)
|
|
15
|
+
| ParseGeoArrowInput
|
|
16
|
+
| {operation: 'test'; data: any};
|
|
17
|
+
|
|
18
|
+
export type TriangulationWorkerOutput =
|
|
19
|
+
| ({operation: 'triangulate'} & TriangulateResult)
|
|
20
|
+
| ({operation: 'parse-geoarrow'} & ParseGeoArrowResult)
|
|
21
|
+
| {operation: 'test'; data: any};
|
|
22
|
+
|
|
23
|
+
type GeoArrowChunkData = {
|
|
24
|
+
type: arrow.DataType;
|
|
25
|
+
offset: number;
|
|
26
|
+
length: number;
|
|
27
|
+
nullCount: number;
|
|
28
|
+
buffers: any;
|
|
29
|
+
children: arrow.Data[];
|
|
30
|
+
dictionary?: arrow.Vector;
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
export type ParseGeoArrowInput = {
|
|
34
|
+
operation: 'parse-geoarrow';
|
|
35
|
+
chunkData: GeoArrowChunkData;
|
|
36
|
+
chunkIndex: number;
|
|
37
|
+
chunkOffset: number;
|
|
38
|
+
geometryEncoding: GeoArrowEncoding;
|
|
39
|
+
calculateMeanCenters: boolean;
|
|
40
|
+
triangle: boolean;
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
export type ParseGeoArrowResult = {
|
|
44
|
+
chunkIndex: number;
|
|
45
|
+
binaryDataFromGeoArrow: BinaryDataFromGeoArrow | null;
|
|
46
|
+
};
|
|
13
47
|
|
|
14
48
|
/** Input data for operation: 'triangulate' */
|
|
15
49
|
export type TriangulateInput = {
|
|
16
|
-
operation: 'triangulate';
|
|
17
50
|
polygonIndices: Uint16Array;
|
|
18
51
|
primitivePolygonIndices: Int32Array;
|
|
19
52
|
flatCoordinateArray: Float64Array;
|
|
@@ -37,11 +70,21 @@ export const TriangulationWorker = {
|
|
|
37
70
|
};
|
|
38
71
|
|
|
39
72
|
/**
|
|
40
|
-
*
|
|
73
|
+
* Triangulate a set of polygons on worker, type safe API
|
|
41
74
|
*/
|
|
42
75
|
export function triangulateOnWorker(
|
|
43
|
-
data:
|
|
76
|
+
data: TriangulateInput,
|
|
77
|
+
options: WorkerOptions = {}
|
|
78
|
+
): Promise<TriangulateResult> {
|
|
79
|
+
return processOnWorker(TriangulationWorker, {...data, operation: 'triangulate'}, options);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Parse GeoArrow geometry colum on worker, type safe API
|
|
84
|
+
*/
|
|
85
|
+
export function parseGeoArrowOnWorker(
|
|
86
|
+
data: ParseGeoArrowInput,
|
|
44
87
|
options: WorkerOptions = {}
|
|
45
|
-
): Promise<
|
|
46
|
-
return processOnWorker(TriangulationWorker, data, options);
|
|
88
|
+
): Promise<ParseGeoArrowResult> {
|
|
89
|
+
return processOnWorker(TriangulationWorker, {...data, operation: 'parse-geoarrow'}, options);
|
|
47
90
|
}
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import * as arrow from 'apache-arrow';
|
|
2
|
+
import type {Buffers} from 'apache-arrow/data';
|
|
3
|
+
|
|
4
|
+
type TypedArray =
|
|
5
|
+
| Uint8Array
|
|
6
|
+
| Uint8ClampedArray
|
|
7
|
+
| Uint16Array
|
|
8
|
+
| Uint32Array
|
|
9
|
+
| Int8Array
|
|
10
|
+
| Int16Array
|
|
11
|
+
| Int32Array
|
|
12
|
+
| Float32Array
|
|
13
|
+
| Float64Array;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Clone an Arrow JS Data or Vector, detaching from an existing ArrayBuffer if
|
|
17
|
+
* it is shared with other.
|
|
18
|
+
*
|
|
19
|
+
* The purpose of this function is to enable transferring a `Data` instance,
|
|
20
|
+
* e.g. to a web worker, without neutering any other data.
|
|
21
|
+
*
|
|
22
|
+
* Any internal buffers that are a slice of a larger `ArrayBuffer` (i.e. where
|
|
23
|
+
* the typed array's `byteOffset` is not `0` and where its `byteLength` does not
|
|
24
|
+
* match its `array.buffer.byteLength`) are copied into new `ArrayBuffers`.
|
|
25
|
+
*
|
|
26
|
+
* If `force` is `true`, always clone internal buffers, even if not shared. If
|
|
27
|
+
* the default, `false`, any internal buffers that are **not** a slice of a
|
|
28
|
+
* larger `ArrayBuffer` will not be copied.
|
|
29
|
+
*/
|
|
30
|
+
export function hardClone<T extends arrow.DataType>(
|
|
31
|
+
input: arrow.Data<T>,
|
|
32
|
+
force?: boolean
|
|
33
|
+
): arrow.Data<T>;
|
|
34
|
+
export function hardClone<T extends arrow.DataType>(
|
|
35
|
+
input: arrow.Vector<T>,
|
|
36
|
+
force?: boolean
|
|
37
|
+
): arrow.Vector<T>;
|
|
38
|
+
|
|
39
|
+
export function hardClone<T extends arrow.DataType>(
|
|
40
|
+
data: arrow.Data<T> | arrow.Vector<T>,
|
|
41
|
+
force: boolean = false
|
|
42
|
+
): arrow.Data<T> | arrow.Vector<T> {
|
|
43
|
+
// Check if `data` is an arrow.Vector
|
|
44
|
+
if ('data' in data) {
|
|
45
|
+
return new arrow.Vector(data.data.map((data) => hardClone(data, force)));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Clone each of the children, recursively
|
|
49
|
+
const clonedChildren: arrow.Data[] = [];
|
|
50
|
+
for (const childData of data.children) {
|
|
51
|
+
clonedChildren.push(hardClone(childData, force));
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Clone the dictionary if there is one
|
|
55
|
+
let clonedDictionary: arrow.Vector | undefined;
|
|
56
|
+
if (data.dictionary !== undefined) {
|
|
57
|
+
clonedDictionary = hardClone(data.dictionary, force);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Buffers can have up to four entries. Each of these can be `undefined` for
|
|
61
|
+
// one or more array types.
|
|
62
|
+
//
|
|
63
|
+
// - OFFSET: value offsets for variable size list types
|
|
64
|
+
// - DATA: the underlying data
|
|
65
|
+
// - VALIDITY: the null buffer. This may be empty or undefined if all elements
|
|
66
|
+
// are non-null/valid.
|
|
67
|
+
// - TYPE: type ids for a union type.
|
|
68
|
+
const clonedBuffers: Buffers<T> = {
|
|
69
|
+
[arrow.BufferType.OFFSET]: cloneBuffer(data.buffers[arrow.BufferType.OFFSET], force),
|
|
70
|
+
[arrow.BufferType.DATA]: cloneBuffer(data.buffers[arrow.BufferType.DATA], force),
|
|
71
|
+
[arrow.BufferType.VALIDITY]: cloneBuffer(data.buffers[arrow.BufferType.VALIDITY], force),
|
|
72
|
+
[arrow.BufferType.TYPE]: cloneBuffer(data.buffers[arrow.BufferType.TYPE], force)
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
// Note: the data.offset is passed on so that a sliced Data instance will not
|
|
76
|
+
// be "un-sliced". However keep in mind that this means we're cloning the
|
|
77
|
+
// _original backing buffer_, not only the portion of the Data that was
|
|
78
|
+
// sliced.
|
|
79
|
+
return new arrow.Data(
|
|
80
|
+
data.type,
|
|
81
|
+
data.offset,
|
|
82
|
+
data.length,
|
|
83
|
+
// @ts-expect-error _nullCount is protected. We're using it here to mimic
|
|
84
|
+
// `Data.clone`
|
|
85
|
+
data._nullCount,
|
|
86
|
+
clonedBuffers,
|
|
87
|
+
clonedChildren,
|
|
88
|
+
clonedDictionary
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Test whether an arrow.Data instance is a slice of a larger `ArrayBuffer`.
|
|
94
|
+
*/
|
|
95
|
+
export function isShared<T extends arrow.DataType>(data: arrow.Data<T> | arrow.Vector<T>): boolean {
|
|
96
|
+
// Loop over arrow.Vector
|
|
97
|
+
if ('data' in data) {
|
|
98
|
+
return data.data.some((data) => isShared(data));
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// Check child data
|
|
102
|
+
for (const childData of data.children) {
|
|
103
|
+
if (isShared(childData)) {
|
|
104
|
+
return true;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Check dictionary
|
|
109
|
+
if (data.dictionary !== undefined) {
|
|
110
|
+
if (isShared(data.dictionary)) {
|
|
111
|
+
return true;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const bufferTypes = [
|
|
116
|
+
arrow.BufferType.OFFSET,
|
|
117
|
+
arrow.BufferType.DATA,
|
|
118
|
+
arrow.BufferType.VALIDITY,
|
|
119
|
+
arrow.BufferType.TYPE
|
|
120
|
+
];
|
|
121
|
+
for (const bufferType of bufferTypes) {
|
|
122
|
+
if (data.buffers[bufferType] !== undefined && isTypedArraySliced(data.buffers[bufferType])) {
|
|
123
|
+
return true;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Returns true if the current typed array is a partial slice on a larger
|
|
132
|
+
* ArrayBuffer
|
|
133
|
+
*/
|
|
134
|
+
function isTypedArraySliced(arr: TypedArray): boolean {
|
|
135
|
+
return !(arr.byteOffset === 0 && arr.byteLength === arr.buffer.byteLength);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* If a slice of a larger ArrayBuffer, clone to a fresh `ArrayBuffer`.
|
|
140
|
+
*
|
|
141
|
+
* If `force` is `true`, always clone the array, even if not shared.
|
|
142
|
+
*/
|
|
143
|
+
function cloneBuffer<A extends TypedArray | undefined>(arr: A, force: boolean): A {
|
|
144
|
+
// Not all buffer types are defined for every type of Arrow array. E.g.
|
|
145
|
+
// `arrow.BufferType.TYPE` is only defined for the Union type.
|
|
146
|
+
if (arr === undefined) {
|
|
147
|
+
return arr;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// The current array is not a part of a larger ArrayBuffer, don't clone it
|
|
151
|
+
if (!force && !isTypedArraySliced(arr)) {
|
|
152
|
+
return arr;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Note: TypedArray.slice() **copies** into a new ArrayBuffer
|
|
156
|
+
|
|
157
|
+
// @ts-expect-error 'Uint8Array' is assignable to the constraint of type 'A',
|
|
158
|
+
// but 'A' could be instantiated with a different subtype of constraint
|
|
159
|
+
// 'TypedArray'
|
|
160
|
+
// We know from arr.slice that it will always return the same
|
|
161
|
+
return arr.slice();
|
|
162
|
+
}
|