@oceanum/datamesh 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/dist/README.md +31 -0
- package/dist/blosc-CeItQ6qj.cjs +17 -0
- package/dist/blosc-DaK8KnI4.js +719 -0
- package/dist/browser-BDe_cnOJ.cjs +1 -0
- package/dist/browser-CJIXy_XB.js +524 -0
- package/dist/chunk-INHXZS53-DiyuLb3Z.js +14 -0
- package/dist/chunk-INHXZS53-z3BpFH8p.cjs +1 -0
- package/dist/gzip-DfmsOCZR.cjs +1 -0
- package/dist/gzip-TMN4LZ5e.js +24 -0
- package/dist/index.cjs +9 -0
- package/dist/index.d.ts +4 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11341 -0
- package/dist/lib/connector.d.ts +13 -17
- package/dist/lib/connector.d.ts.map +1 -1
- package/dist/lib/datamodel.d.ts +53 -24
- package/dist/lib/datamodel.d.ts.map +1 -1
- package/dist/lib/datasource.d.ts +8 -30
- package/dist/lib/datasource.d.ts.map +1 -1
- package/dist/lib/observe.d.ts +3 -0
- package/dist/lib/observe.d.ts.map +1 -0
- package/dist/lib/query.d.ts +30 -56
- package/dist/lib/query.d.ts.map +1 -1
- package/dist/lib/zarr.d.ts +2 -2
- package/dist/lz4-CssV0LoA.js +643 -0
- package/dist/lz4-PFaIsPAh.cjs +15 -0
- package/dist/test/fixtures.d.ts +9 -5
- package/dist/test/fixtures.d.ts.map +1 -1
- package/dist/zlib-C-RQJQaC.cjs +1 -0
- package/dist/zlib-DrihHfbK.js +24 -0
- package/dist/zstd-Cqadn9HA.js +610 -0
- package/dist/zstd-_xUhkGOV.cjs +15 -0
- package/package.json +8 -4
- package/src/docs/reverse_proxy.md +0 -0
- package/src/index.js +20 -0
- package/src/index.ts +2 -0
- package/src/lib/connector.ts +33 -23
- package/src/lib/datamodel.ts +165 -25
- package/src/lib/datasource.ts +20 -24
- package/src/lib/observe.ts +21 -0
- package/src/lib/query.ts +26 -50
- package/src/lib/workers/README.md +3 -0
- package/src/test/dataframe.test.ts +108 -0
- package/src/test/dataset.test.ts +1 -1
- package/src/test/datasource.test.ts +1 -1
- package/src/test/fixtures.ts +132 -7
- package/src/test/query.test.ts +1 -1
- package/tsconfig.lib.json +2 -1
- package/tsconfig.vitest-temp.json +50 -0
- package/typedoc.json +5 -1
- package/vite.config.ts +11 -1
- package/vite.config.ts.timestamp-1734584068599-c5119713c3c4e.mjs +67 -0
- package/dist/tsconfig.lib.tsbuildinfo +0 -1
- package/src/lib/workers/sw.js +0 -44
- package/vitest.config.ts +0 -10
- /package/{eslint.config.js → eslint.config.cjs} +0 -0
package/package.json
CHANGED
|
@@ -1,22 +1,26 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@oceanum/datamesh",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"scripts": {
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"scripts": {
|
|
5
|
+
"build:docs": "typedoc"
|
|
6
|
+
},
|
|
5
7
|
"publishConfig": {
|
|
6
8
|
"access": "public"
|
|
7
9
|
},
|
|
8
10
|
"dependencies": {
|
|
9
|
-
"@types/geojson": "^7946.0.
|
|
11
|
+
"@types/geojson": "^7946.0.15",
|
|
10
12
|
"@types/object-hash": "^3.0.6",
|
|
11
13
|
"@zarrita/core": "^0.1.0-next.15",
|
|
12
14
|
"@zarrita/indexing": "^0.1.0-next.17",
|
|
13
15
|
"@zarrita/storage": "^0.1.0-next.7",
|
|
16
|
+
"apache-arrow": "^18.1.0",
|
|
14
17
|
"dayjs": "^1.11.13",
|
|
15
18
|
"idb-keyval": "^6.2.1",
|
|
16
19
|
"object-hash": "^3.0.0",
|
|
20
|
+
"wkx": "^0.5.0",
|
|
17
21
|
"zarrita": "^0.4.0-next.17"
|
|
18
22
|
},
|
|
23
|
+
"type": "module",
|
|
19
24
|
"main": "./dist/index.js",
|
|
20
|
-
"module": "./dist/index.mjs",
|
|
21
25
|
"typings": "./dist/index.d.ts"
|
|
22
26
|
}
|
|
File without changes
|
package/src/index.js
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
exports.__esModule = true;
|
|
17
|
+
__exportStar(require("./lib/connector"), exports);
|
|
18
|
+
__exportStar(require("./lib/datasource"), exports);
|
|
19
|
+
__exportStar(require("./lib/query"), exports);
|
|
20
|
+
__exportStar(require("./lib/datamodel"), exports);
|
package/src/index.ts
CHANGED
package/src/lib/connector.ts
CHANGED
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
import { Datasource } from "./datasource";
|
|
2
2
|
import { IQuery, Stage } from "./query";
|
|
3
|
-
import { Dataset, DatameshStore } from "./datamodel";
|
|
3
|
+
import { Dataset, DatameshStore, TempStore } from "./datamodel";
|
|
4
|
+
import { measureTime } from "./observe";
|
|
5
|
+
import { tableFromIPC, Table } from "apache-arrow";
|
|
4
6
|
|
|
5
7
|
/**
|
|
6
8
|
* Datamesh connector class.
|
|
7
9
|
*
|
|
8
10
|
* All datamesh operations are methods of this class.
|
|
11
|
+
*
|
|
9
12
|
*/
|
|
13
|
+
|
|
10
14
|
export class Connector {
|
|
15
|
+
static LAZY_LOAD_SIZE = 1e8;
|
|
11
16
|
private _token: string;
|
|
12
17
|
private _proto: string;
|
|
13
18
|
private _host: string;
|
|
@@ -19,14 +24,15 @@ export class Connector {
|
|
|
19
24
|
*
|
|
20
25
|
* @param token - Your datamesh access token. Defaults to environment variable DATAMESH_TOKEN is defined else as literal string "DATAMESH_TOKEN". DO NOT put your Datamesh token directly into public facing browser code.
|
|
21
26
|
* @param service - URL of datamesh service. Defaults to environment variable DATAMESH_SERVICE or "https://datamesh.oceanum.io".
|
|
22
|
-
* @param
|
|
27
|
+
* @param _gateway - URL of gateway service. Defaults to "https://gateway.datamesh.oceanum.io".
|
|
23
28
|
*
|
|
24
29
|
* @throws {Error} - If a valid token is not provided.
|
|
25
30
|
*/
|
|
26
31
|
constructor(
|
|
27
32
|
token = process.env.DATAMESH_TOKEN || "$DATAMESH_TOKEN",
|
|
28
33
|
service = process.env.DATAMESH_SERVICE || "https://datamesh.oceanum.io",
|
|
29
|
-
|
|
34
|
+
// @ignore //
|
|
35
|
+
_gateway = process.env.DATAMESH_GATEWAY ||
|
|
30
36
|
"https://gateway.datamesh.oceanum.io"
|
|
31
37
|
) {
|
|
32
38
|
if (!token) {
|
|
@@ -44,7 +50,8 @@ export class Connector {
|
|
|
44
50
|
"X-DATAMESH-TOKEN": this._token,
|
|
45
51
|
};
|
|
46
52
|
|
|
47
|
-
|
|
53
|
+
/* This is for testing the gateway service is not always the same as the service domain */
|
|
54
|
+
this._gateway = _gateway || `${this._proto}//gateway.${this._host}`;
|
|
48
55
|
|
|
49
56
|
if (
|
|
50
57
|
this._host.split(".").slice(-1)[0] !==
|
|
@@ -133,15 +140,15 @@ export class Connector {
|
|
|
133
140
|
* @param dataFormat - The format of the requested data. Defaults to "application/json".
|
|
134
141
|
* @returns The path to the cached file.
|
|
135
142
|
*/
|
|
136
|
-
async dataRequest(
|
|
137
|
-
|
|
138
|
-
dataFormat = "application/
|
|
139
|
-
): Promise<
|
|
140
|
-
const response = await fetch(`${this._gateway}/
|
|
143
|
+
private async dataRequest(
|
|
144
|
+
qhash: string,
|
|
145
|
+
dataFormat = "application/vnd.apache.arrow.file"
|
|
146
|
+
): Promise<Table> {
|
|
147
|
+
const response = await fetch(`${this._gateway}/oceanql/${qhash}?f=arrow`, {
|
|
141
148
|
headers: { Accept: dataFormat, ...this._authHeaders },
|
|
142
149
|
});
|
|
143
150
|
await this.validateResponse(response);
|
|
144
|
-
return response.
|
|
151
|
+
return tableFromIPC(await response.arrayBuffer());
|
|
145
152
|
}
|
|
146
153
|
|
|
147
154
|
/**
|
|
@@ -150,6 +157,7 @@ export class Connector {
|
|
|
150
157
|
* @param query - The query to stage.
|
|
151
158
|
* @returns The staged response.
|
|
152
159
|
*/
|
|
160
|
+
@measureTime
|
|
153
161
|
private async stageRequest(query: IQuery): Promise<Stage | null> {
|
|
154
162
|
const data = JSON.stringify(query);
|
|
155
163
|
const response = await fetch(`${this._gateway}/oceanql/stage/`, {
|
|
@@ -173,12 +181,21 @@ export class Connector {
|
|
|
173
181
|
* @param query - The query to execute.
|
|
174
182
|
* @returns The response from the server.
|
|
175
183
|
*/
|
|
176
|
-
|
|
184
|
+
@measureTime
|
|
185
|
+
async query(
|
|
186
|
+
query: IQuery
|
|
187
|
+
): Promise<Dataset</** @ignore */ DatameshStore | TempStore> | null> {
|
|
177
188
|
const stage = await this.stageRequest(query);
|
|
178
189
|
if (!stage) {
|
|
179
190
|
console.warn("No data found for query");
|
|
180
191
|
return null;
|
|
181
192
|
}
|
|
193
|
+
//For smaller dataframes use arrow for transport
|
|
194
|
+
if (stage.size < Connector.LAZY_LOAD_SIZE && stage.container != "dataset") {
|
|
195
|
+
const table = await this.dataRequest(stage.qhash);
|
|
196
|
+
const dataset = await Dataset.fromArrow(table, stage.coordkeys);
|
|
197
|
+
return dataset;
|
|
198
|
+
}
|
|
182
199
|
const url = `${this._gateway}/zarr/${stage.qhash}`;
|
|
183
200
|
const dataset = await Dataset.zarr(url, this._authHeaders);
|
|
184
201
|
return dataset;
|
|
@@ -191,6 +208,7 @@ export class Connector {
|
|
|
191
208
|
* @returns The datasource instance.
|
|
192
209
|
* @throws {Error} - If the datasource cannot be found or is not authorized.
|
|
193
210
|
*/
|
|
211
|
+
@measureTime
|
|
194
212
|
async getDatasource(datasourceId: string): Promise<Datasource> {
|
|
195
213
|
const meta = await this.metadataRequest(datasourceId);
|
|
196
214
|
const metaDict = await meta.json();
|
|
@@ -206,23 +224,15 @@ export class Connector {
|
|
|
206
224
|
*
|
|
207
225
|
* @param datasourceId - Unique datasource ID.
|
|
208
226
|
* @param parameters - Additional datasource parameters.
|
|
209
|
-
* @
|
|
210
|
-
* @returns The datasource container.
|
|
227
|
+
* @returns The dataset.
|
|
211
228
|
*/
|
|
229
|
+
@measureTime
|
|
212
230
|
async loadDatasource(
|
|
213
231
|
datasourceId: string,
|
|
214
232
|
parameters: Record<string, string | number> = {}
|
|
215
|
-
): Promise<Dataset
|
|
233
|
+
): Promise<Dataset</** @ignore */ DatameshStore> | null> {
|
|
216
234
|
const query = { datasource: datasourceId, parameters };
|
|
217
|
-
const
|
|
218
|
-
if (!stage) {
|
|
219
|
-
console.warn("No data found for query");
|
|
220
|
-
return null;
|
|
221
|
-
}
|
|
222
|
-
const dataset = await Dataset.zarr(
|
|
223
|
-
`${this._gateway}/zarr/${stage.qhash}`,
|
|
224
|
-
this._authHeaders
|
|
225
|
-
);
|
|
235
|
+
const dataset = await this.query(query);
|
|
226
236
|
return dataset;
|
|
227
237
|
}
|
|
228
238
|
}
|
package/src/lib/datamodel.ts
CHANGED
|
@@ -1,22 +1,28 @@
|
|
|
1
|
-
import { CachedHTTPStore } from "./zarr";
|
|
2
1
|
import * as zarr from "@zarrita/core";
|
|
3
2
|
import { Chunk, DataType, Listable, Location, TypedArray } from "@zarrita/core";
|
|
4
3
|
import { Mutable, AsyncReadable } from "@zarrita/storage";
|
|
5
4
|
import { get, set, Slice } from "@zarrita/indexing";
|
|
5
|
+
import { Table, DataType as ArrowDataType } from "apache-arrow";
|
|
6
|
+
import { Geometry, Feature, FeatureCollection } from "geojson";
|
|
7
|
+
import wkx from "wkx";
|
|
6
8
|
|
|
7
|
-
import {
|
|
9
|
+
import { CachedHTTPStore } from "./zarr";
|
|
10
|
+
import { Schema, Coordinates } from "./datasource";
|
|
11
|
+
import { measureTime } from "./observe";
|
|
8
12
|
|
|
9
|
-
type ATypedArray =
|
|
13
|
+
export type ATypedArray =
|
|
10
14
|
| Int8Array
|
|
11
15
|
| Int16Array
|
|
12
16
|
| Int32Array
|
|
17
|
+
| BigInt64Array
|
|
13
18
|
| Uint8Array
|
|
14
19
|
| Uint16Array
|
|
15
20
|
| Uint32Array
|
|
21
|
+
| BigUint64Array
|
|
16
22
|
| Float32Array
|
|
17
23
|
| Float64Array;
|
|
18
|
-
type Scalar = string | number | boolean;
|
|
19
|
-
type NDArray =
|
|
24
|
+
export type Scalar = string | number | boolean;
|
|
25
|
+
export type NDArray =
|
|
20
26
|
| Scalar[]
|
|
21
27
|
| Scalar[][]
|
|
22
28
|
| Scalar[][][]
|
|
@@ -25,7 +31,7 @@ type NDArray =
|
|
|
25
31
|
| ATypedArray[][]
|
|
26
32
|
| ATypedArray[][][]
|
|
27
33
|
| ATypedArray[][][][];
|
|
28
|
-
type Data = NDArray | ATypedArray | Scalar;
|
|
34
|
+
export type Data = NDArray | ATypedArray | Scalar;
|
|
29
35
|
|
|
30
36
|
/**
|
|
31
37
|
* Represents a data variable.
|
|
@@ -57,11 +63,11 @@ const getShape = (a: Data) => {
|
|
|
57
63
|
const dim = [] as number[];
|
|
58
64
|
if (!isArray(a)) return dim;
|
|
59
65
|
for (;;) {
|
|
60
|
-
// @ts-
|
|
66
|
+
// @ts-expect-error: Scalar already returned
|
|
61
67
|
dim.push(a.length);
|
|
62
|
-
// @ts-
|
|
68
|
+
// @ts-expect-error: Scalar already returned
|
|
63
69
|
if (isArray(a[0])) {
|
|
64
|
-
// @ts-
|
|
70
|
+
// @ts-expect-error: Scalar already returned
|
|
65
71
|
a = a[0];
|
|
66
72
|
} else {
|
|
67
73
|
break;
|
|
@@ -81,7 +87,7 @@ const getDtype = (data: Data): DataType => {
|
|
|
81
87
|
if (typeof data === "number") {
|
|
82
88
|
return "float32";
|
|
83
89
|
} else if (typeof data === "string") {
|
|
84
|
-
return "
|
|
90
|
+
return "v2:object";
|
|
85
91
|
} else {
|
|
86
92
|
switch (data.constructor.name) {
|
|
87
93
|
case "Int8Array":
|
|
@@ -90,6 +96,8 @@ const getDtype = (data: Data): DataType => {
|
|
|
90
96
|
return "int16";
|
|
91
97
|
case "Int32Array":
|
|
92
98
|
return "int32";
|
|
99
|
+
case "BigInt64Array":
|
|
100
|
+
return "int64";
|
|
93
101
|
case "Uint8Array":
|
|
94
102
|
return "uint8";
|
|
95
103
|
case "Uint16Array":
|
|
@@ -209,15 +217,18 @@ const flatten = (
|
|
|
209
217
|
return rows;
|
|
210
218
|
};
|
|
211
219
|
|
|
220
|
+
/** @ignore */
|
|
212
221
|
export type DatameshStore = Location<Listable<AsyncReadable>>;
|
|
222
|
+
/** @ignore */
|
|
213
223
|
export type TempStore = Location<Mutable>;
|
|
214
224
|
|
|
215
225
|
/**
|
|
216
226
|
* Represents a data variable within a dataset.
|
|
217
227
|
*/
|
|
218
228
|
export class DataVar<
|
|
229
|
+
/** @ignore */
|
|
219
230
|
DType extends DataType,
|
|
220
|
-
S extends TempStore | DatameshStore
|
|
231
|
+
S extends TempStore | DatameshStore,
|
|
221
232
|
> {
|
|
222
233
|
/**
|
|
223
234
|
* Creates an instance of DataVar.
|
|
@@ -251,7 +262,7 @@ export class DataVar<
|
|
|
251
262
|
* @param slice - Optional slice parameters to retrieve specific data from the zarr array.
|
|
252
263
|
* @returns A promise that resolves to the data of the zarr array.
|
|
253
264
|
*/
|
|
254
|
-
|
|
265
|
+
@measureTime
|
|
255
266
|
async get(
|
|
256
267
|
slice?: (null | Slice | number)[] | null | undefined
|
|
257
268
|
): Promise<Data> {
|
|
@@ -259,7 +270,7 @@ export class DataVar<
|
|
|
259
270
|
this.arr as zarr.Array<DType, AsyncReadable>,
|
|
260
271
|
slice
|
|
261
272
|
);
|
|
262
|
-
if (_data.shape) {
|
|
273
|
+
if (this.arr.dtype !== "v2:object" && _data.shape) {
|
|
263
274
|
return unravel(_data.data, _data.shape, _data.stride);
|
|
264
275
|
} else {
|
|
265
276
|
return _data.data as Data;
|
|
@@ -271,19 +282,21 @@ export class DataVar<
|
|
|
271
282
|
* Represents a dataset with dimensions, data variables, and attributes.
|
|
272
283
|
* Implements the DatasetApi interface.
|
|
273
284
|
*/
|
|
274
|
-
export class Dataset
|
|
285
|
+
export class Dataset</** @ignore */ S extends DatameshStore | TempStore> {
|
|
275
286
|
/**
|
|
276
287
|
* Creates an instance of Dataset.
|
|
277
288
|
* @param dims - The dimensions of the dataset.
|
|
278
289
|
* @param data_vars - The data variables of the dataset.
|
|
279
290
|
* @param attrs - The attributes of the dataset.
|
|
280
291
|
* @param root - The root group of the dataset.
|
|
292
|
+
* @param coordinates - The coordinates map of the dataset.
|
|
281
293
|
*/
|
|
282
294
|
dims: Record<string, number>;
|
|
283
295
|
data_vars: S extends TempStore
|
|
284
296
|
? Record<string, DataVar<DataType, TempStore>>
|
|
285
297
|
: Record<string, DataVar<DataType, DatameshStore>>;
|
|
286
298
|
attrs: Record<string, unknown>;
|
|
299
|
+
coordinates: Coordinates;
|
|
287
300
|
root: S;
|
|
288
301
|
|
|
289
302
|
constructor(
|
|
@@ -292,23 +305,26 @@ export class Dataset<S extends DatameshStore | TempStore> {
|
|
|
292
305
|
? Record<string, DataVar<DataType, TempStore>>
|
|
293
306
|
: Record<string, DataVar<DataType, DatameshStore>>,
|
|
294
307
|
attrs: Record<string, unknown>,
|
|
308
|
+
coordinates: Coordinates,
|
|
295
309
|
root: S
|
|
296
310
|
) {
|
|
297
311
|
this.data_vars = data_vars;
|
|
298
312
|
this.dims = dims;
|
|
299
313
|
this.attrs = attrs;
|
|
300
314
|
this.root = root;
|
|
315
|
+
this.coordinates = coordinates;
|
|
301
316
|
}
|
|
302
317
|
|
|
303
318
|
/**
|
|
304
319
|
* Creates a Dataset instance from a Zarr store.
|
|
305
|
-
* @param
|
|
320
|
+
* @param url - The URL of the datamesh gateway.
|
|
306
321
|
* @param authHeaders - The authentication headers.
|
|
307
322
|
* @param parameters - Optional parameters for the request.
|
|
308
323
|
* @param chunks - Optional chunking strategy.
|
|
309
324
|
* @param downsample - Optional downsampling strategy.
|
|
310
325
|
* @returns A promise that resolves to a Dataset instance.
|
|
311
326
|
*/
|
|
327
|
+
@measureTime
|
|
312
328
|
static async zarr(
|
|
313
329
|
url: string,
|
|
314
330
|
authHeaders: Record<string, string>,
|
|
@@ -354,16 +370,67 @@ export class Dataset<S extends DatameshStore | TempStore> {
|
|
|
354
370
|
});
|
|
355
371
|
}
|
|
356
372
|
}
|
|
357
|
-
|
|
373
|
+
const coords = JSON.parse(
|
|
374
|
+
group.attrs["_coordinates"] as string
|
|
375
|
+
) as Coordinates;
|
|
376
|
+
return new Dataset<DatameshStore>(
|
|
377
|
+
dims,
|
|
378
|
+
data_vars,
|
|
379
|
+
group.attrs,
|
|
380
|
+
coords,
|
|
381
|
+
root
|
|
382
|
+
);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
static async fromArrow(
|
|
386
|
+
data: Table,
|
|
387
|
+
coordmap: Coordinates
|
|
388
|
+
): Promise<Dataset<TempStore>> {
|
|
389
|
+
const attrs = {};
|
|
390
|
+
const dims = { record: data.numRows };
|
|
391
|
+
const data_vars = {} as Record<string, DataVar<DataType, TempStore>>;
|
|
392
|
+
data.schema.fields.forEach((field) => {
|
|
393
|
+
const column = data.getChild(field.name);
|
|
394
|
+
let array = column?.toArray();
|
|
395
|
+
if (ArrowDataType.isTimestamp(field.type)) {
|
|
396
|
+
const carray = new Int32Array(array.length);
|
|
397
|
+
const m = BigInt(1000 ** (field.type.unit - 1));
|
|
398
|
+
for (let i = 0; i < array.length; i++) {
|
|
399
|
+
carray[i] = Number(array[i] / m);
|
|
400
|
+
}
|
|
401
|
+
array = carray;
|
|
402
|
+
} else if (ArrowDataType.isBinary(field.type)) {
|
|
403
|
+
const carray = [];
|
|
404
|
+
for (let i = 0; i < array.length; i++) {
|
|
405
|
+
carray.push(new Buffer(array[i]).toString("base64"));
|
|
406
|
+
}
|
|
407
|
+
array = carray;
|
|
408
|
+
}
|
|
409
|
+
data_vars[field.name] = {
|
|
410
|
+
dims: ["record"],
|
|
411
|
+
attrs: {},
|
|
412
|
+
data: array,
|
|
413
|
+
};
|
|
414
|
+
});
|
|
415
|
+
return await Dataset.init({ dims, data_vars, attrs }, coordmap);
|
|
358
416
|
}
|
|
359
417
|
|
|
360
418
|
/**
|
|
361
419
|
* Initializes an in memory Dataset instance from a data object.
|
|
362
420
|
* @param datasource - An object containing id, dimensions, data variables, and attributes.
|
|
363
421
|
*/
|
|
364
|
-
static async init(
|
|
422
|
+
static async init(
|
|
423
|
+
datasource: Schema,
|
|
424
|
+
coordinates?: Coordinates
|
|
425
|
+
): Promise<Dataset<TempStore>> {
|
|
365
426
|
const root = zarr.root(new Map());
|
|
366
|
-
const ds = new Dataset(
|
|
427
|
+
const ds = new Dataset(
|
|
428
|
+
datasource.dims,
|
|
429
|
+
{},
|
|
430
|
+
datasource.attrs || {},
|
|
431
|
+
coordinates || {},
|
|
432
|
+
root
|
|
433
|
+
);
|
|
367
434
|
for (const k in datasource.data_vars) {
|
|
368
435
|
const { dims, attrs, data }: DataVariable = datasource.data_vars[k];
|
|
369
436
|
await ds.assign(k, dims, data as Data, attrs);
|
|
@@ -372,7 +439,7 @@ export class Dataset<S extends DatameshStore | TempStore> {
|
|
|
372
439
|
}
|
|
373
440
|
|
|
374
441
|
/**
|
|
375
|
-
* Converts the
|
|
442
|
+
* Converts the dataset into a dataframe format.
|
|
376
443
|
*
|
|
377
444
|
* @returns {Promise<Record<string, unknown>[]>} A promise that resolves to an array of records,
|
|
378
445
|
* where each record represents a row in the dataframe.
|
|
@@ -380,23 +447,94 @@ export class Dataset<S extends DatameshStore | TempStore> {
|
|
|
380
447
|
* @remarks
|
|
381
448
|
* This method iterates over the data variables, retrieves their dimensions and data,
|
|
382
449
|
* and then flattens the data into a dataframe structure.
|
|
450
|
+
* Time coordinates are converted to IDO8601 format.
|
|
451
|
+
* BigInt datatypes are coerced to number.
|
|
383
452
|
*
|
|
384
453
|
* @example
|
|
385
454
|
* ```typescript
|
|
386
|
-
* const dataframe = await instance.
|
|
455
|
+
* const dataframe = await instance.asDataframe();
|
|
387
456
|
* console.log(dataframe);
|
|
388
457
|
* ```
|
|
389
458
|
*/
|
|
390
|
-
|
|
459
|
+
@measureTime
|
|
460
|
+
async asDataframe(): Promise<Record<string, unknown>[]> {
|
|
391
461
|
const data = {} as Record<string, DataVariable>;
|
|
462
|
+
const bigint = [];
|
|
392
463
|
for (const k in this.data_vars) {
|
|
393
464
|
data[k] = {
|
|
394
465
|
attrs: this.data_vars[k].attrs,
|
|
395
466
|
dims: this.data_vars[k].dims,
|
|
396
467
|
};
|
|
397
468
|
data[k].data = (await this.data_vars[k].get()) as Data;
|
|
469
|
+
if (this.data_vars[k].arr.dtype == "int64") {
|
|
470
|
+
bigint.push(k);
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
const df = flatten(data, { ...this.dims }, []);
|
|
474
|
+
if (this.coordinates.t) {
|
|
475
|
+
for (let i = 0; i < df.length; i++) {
|
|
476
|
+
df[i][this.coordinates.t] = new Date(
|
|
477
|
+
df[i][this.coordinates.t] as number
|
|
478
|
+
).toISOString();
|
|
479
|
+
}
|
|
398
480
|
}
|
|
399
|
-
|
|
481
|
+
//Convert BigInt to number
|
|
482
|
+
if (bigint.length > 0) {
|
|
483
|
+
for (const k of bigint) {
|
|
484
|
+
for (let i = 0; i < df.length; i++) {
|
|
485
|
+
df[i][k] = Number(df[i][k]);
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
return df;
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
/**
|
|
493
|
+
* Converts the dataset into a GeoJSON Feature.
|
|
494
|
+
* @param geometry - Optional GeoJSON geometry to apply to all records, otherwise geometry column is required. Will override geometry column if present.
|
|
495
|
+
*
|
|
496
|
+
* @returns {Promise<FeatureCollection>} A promise that resolves to an array of records,
|
|
497
|
+
* where each record represents a row in the dataframe.
|
|
498
|
+
*
|
|
499
|
+
* @throws Will throw an error if no geometry is found in data or as a parameter
|
|
500
|
+
*
|
|
501
|
+
* @remarks
|
|
502
|
+
* This method iterates over the data variables, retrieves their dimensions and data,
|
|
503
|
+
* and then flattens the data into a dataframe structure.
|
|
504
|
+
*
|
|
505
|
+
* @example
|
|
506
|
+
* ```typescript
|
|
507
|
+
* const dataframe = await instance.asDataframe();
|
|
508
|
+
* console.log(dataframe);
|
|
509
|
+
* ```
|
|
510
|
+
*/
|
|
511
|
+
async asGeojson(geom?: Geometry): Promise<FeatureCollection> {
|
|
512
|
+
if (!this.coordinates.g && !geom) {
|
|
513
|
+
throw new Error("No geometry found");
|
|
514
|
+
}
|
|
515
|
+
const features = [] as Feature[];
|
|
516
|
+
const df = await this.asDataframe();
|
|
517
|
+
for (let i = 0; i < df.length; i++) {
|
|
518
|
+
const { ...properties } = df[i];
|
|
519
|
+
let geometry = geom;
|
|
520
|
+
if (!geometry && this.coordinates.g) {
|
|
521
|
+
delete properties[this.coordinates.g];
|
|
522
|
+
const wkbbuffer = new Buffer(
|
|
523
|
+
df[i][this.coordinates.g as string],
|
|
524
|
+
"base64"
|
|
525
|
+
);
|
|
526
|
+
geometry = wkx.Geometry.parse(wkbbuffer).toGeoJSON() as Geometry;
|
|
527
|
+
}
|
|
528
|
+
features.push({
|
|
529
|
+
type: "Feature",
|
|
530
|
+
geometry,
|
|
531
|
+
properties,
|
|
532
|
+
});
|
|
533
|
+
}
|
|
534
|
+
return {
|
|
535
|
+
type: "FeatureCollection",
|
|
536
|
+
features,
|
|
537
|
+
};
|
|
400
538
|
}
|
|
401
539
|
|
|
402
540
|
/**
|
|
@@ -406,7 +544,7 @@ export class Dataset<S extends DatameshStore | TempStore> {
|
|
|
406
544
|
* @param dims - An array of dimension names corresponding to the data.
|
|
407
545
|
* @param data - The data to be assigned, which can be a multi-dimensional array.
|
|
408
546
|
* @param attrs - Optional. A record of attributes to be associated with the variable.
|
|
409
|
-
* @param
|
|
547
|
+
* @param coordinates - Optional. A record of coordinates to be associated with the variable.
|
|
410
548
|
* @param chunks - Optional. An array specifying the chunk sizes for the data.
|
|
411
549
|
|
|
412
550
|
* @returns A promise that resolves when the data has been successfully assigned.
|
|
@@ -418,6 +556,7 @@ export class Dataset<S extends DatameshStore | TempStore> {
|
|
|
418
556
|
dims: string[],
|
|
419
557
|
data: Data,
|
|
420
558
|
attrs?: Record<string, unknown>,
|
|
559
|
+
coordinates?: Coordinates,
|
|
421
560
|
chunks?: number[]
|
|
422
561
|
): Promise<void> {
|
|
423
562
|
const shape = getShape(data);
|
|
@@ -435,13 +574,14 @@ export class Dataset<S extends DatameshStore | TempStore> {
|
|
|
435
574
|
this.dims[dim] = shape[i];
|
|
436
575
|
}
|
|
437
576
|
});
|
|
438
|
-
|
|
577
|
+
const dtype = getDtype(data);
|
|
439
578
|
const arr = await zarr.create(
|
|
440
579
|
this.root.resolve(varid) as Location<Mutable>,
|
|
441
580
|
{
|
|
442
581
|
shape,
|
|
443
|
-
data_type:
|
|
582
|
+
data_type: dtype,
|
|
444
583
|
chunk_shape: chunks || shape,
|
|
584
|
+
codecs: dtype == "v2:object" ? [{ name: "json2" }] : [],
|
|
445
585
|
}
|
|
446
586
|
);
|
|
447
587
|
await set(
|
package/src/lib/datasource.ts
CHANGED
|
@@ -4,30 +4,26 @@ import duration from "dayjs/plugin/duration";
|
|
|
4
4
|
|
|
5
5
|
import { DataVariable } from "./datamodel";
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
"
|
|
17
|
-
"
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
"Coordinate_k" = "k",
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
type Coordinates = {
|
|
7
|
+
export type Coordinate =
|
|
8
|
+
| "s" // locations assumed stationary, datasource multigeometry coordinate indexed by station coordinate
|
|
9
|
+
| "e" // Ensemble
|
|
10
|
+
| "b" // Raster band
|
|
11
|
+
| "c" // Category
|
|
12
|
+
| "q" // Quantile
|
|
13
|
+
| "n" // Season
|
|
14
|
+
| "m" // Month
|
|
15
|
+
| "t" // Time
|
|
16
|
+
| "z" // Vertical coordinate
|
|
17
|
+
| "y" // Horizontal northerly
|
|
18
|
+
| "x" // Horizontal easterly
|
|
19
|
+
| "g" // Abstract coordinate - a 2 or 3D geometry that defines a feature location
|
|
20
|
+
| "f" // Frequency - spectra
|
|
21
|
+
| "d" // Direction - spectra or stats
|
|
22
|
+
| "i" // Coordinate_i
|
|
23
|
+
| "j" // Coordinate_j
|
|
24
|
+
| "k"; // Coordinate_k
|
|
25
|
+
|
|
26
|
+
export type Coordinates = {
|
|
31
27
|
[key in Coordinate]?: string;
|
|
32
28
|
};
|
|
33
29
|
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/** @ignore */
|
|
2
|
+
export function measureTime(
|
|
3
|
+
target: any,
|
|
4
|
+
propertyKey: string,
|
|
5
|
+
descriptor: PropertyDescriptor
|
|
6
|
+
) {
|
|
7
|
+
const originalMethod = descriptor.value;
|
|
8
|
+
|
|
9
|
+
descriptor.value = async function (...args: any[]) {
|
|
10
|
+
const start = Date.now();
|
|
11
|
+
const result = await originalMethod.apply(this, args);
|
|
12
|
+
const end = Date.now();
|
|
13
|
+
const executionTime = end - start;
|
|
14
|
+
|
|
15
|
+
console.debug(`${propertyKey} took ${executionTime}ms`);
|
|
16
|
+
|
|
17
|
+
return result;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
return descriptor;
|
|
21
|
+
}
|