@oceanum/datamesh 0.5.2 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +5 -5
- package/dist/index.d.ts +6 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +5447 -4501
- package/dist/lib/cftime.d.ts +127 -0
- package/dist/lib/cftime.d.ts.map +1 -0
- package/dist/lib/connector.d.ts +142 -0
- package/dist/lib/connector.d.ts.map +1 -0
- package/dist/lib/datamodel.d.ts +227 -0
- package/dist/lib/datamodel.d.ts.map +1 -0
- package/dist/lib/datasource.d.ts +117 -0
- package/dist/lib/datasource.d.ts.map +1 -0
- package/dist/lib/observe.d.ts +3 -0
- package/dist/lib/observe.d.ts.map +1 -0
- package/dist/lib/query.d.ts +135 -0
- package/dist/lib/query.d.ts.map +1 -0
- package/dist/lib/session.d.ts +61 -0
- package/dist/lib/session.d.ts.map +1 -0
- package/dist/lib/zarr.d.ts +59 -0
- package/dist/lib/zarr.d.ts.map +1 -0
- package/package.json +15 -2
- package/eslint.config.cjs +0 -22
- package/project.json +0 -4
- package/proxy/cloudflare/index.js +0 -42
- package/proxy/express/README.md +0 -54
- package/proxy/express/index.js +0 -126
- package/proxy/guide.md +0 -140
- package/src/index.js +0 -20
- package/src/index.ts +0 -4
- package/src/lib/connector.ts +0 -388
- package/src/lib/datamodel.ts +0 -820
- package/src/lib/datasource.ts +0 -161
- package/src/lib/observe.ts +0 -21
- package/src/lib/query.ts +0 -212
- package/src/lib/session.ts +0 -173
- package/src/lib/workers/README.md +0 -3
- package/src/lib/zarr.ts +0 -236
- package/src/test/dataframe.test.ts +0 -108
- package/src/test/dataset.test.ts +0 -180
- package/src/test/datasource.test.ts +0 -28
- package/src/test/fixtures.ts +0 -297
- package/src/test/query.test.ts +0 -49
- package/tsconfig.json +0 -13
- package/tsconfig.lib.json +0 -25
- package/tsconfig.spec.json +0 -31
- package/tsconfig.vitest-temp.json +0 -61
- package/typedoc.json +0 -8
- package/vite.config.ts +0 -67
package/src/lib/datamodel.ts
DELETED
|
@@ -1,820 +0,0 @@
|
|
|
1
|
-
import * as zarr from "@zarrita/core";
|
|
2
|
-
import {
|
|
3
|
-
Chunk,
|
|
4
|
-
DataType,
|
|
5
|
-
Location,
|
|
6
|
-
Listable,
|
|
7
|
-
TypedArray,
|
|
8
|
-
CodecMetadata,
|
|
9
|
-
} from "@zarrita/core";
|
|
10
|
-
import { Mutable, AsyncReadable } from "@zarrita/storage";
|
|
11
|
-
import { get, set, Slice, slice } from "@zarrita/indexing";
|
|
12
|
-
import { BoolArray } from "zarrita";
|
|
13
|
-
import { Table, DataType as ArrowDataType } from "apache-arrow";
|
|
14
|
-
import { Geometry, Feature, FeatureCollection } from "geojson";
|
|
15
|
-
import { Geometry as WkxGeometry } from "wkx-ts";
|
|
16
|
-
import { Buffer } from "buffer/index";
|
|
17
|
-
|
|
18
|
-
import { CachedHTTPStore, zarr_open_v2_datetime } from "./zarr";
|
|
19
|
-
import { Schema, Coordkeys } from "./datasource";
|
|
20
|
-
import { measureTime } from "./observe";
|
|
21
|
-
|
|
22
|
-
export type ATypedArray =
|
|
23
|
-
| Int8Array
|
|
24
|
-
| Int16Array
|
|
25
|
-
| Int32Array
|
|
26
|
-
| BigInt64Array
|
|
27
|
-
| Uint8Array
|
|
28
|
-
| Uint16Array
|
|
29
|
-
| Uint32Array
|
|
30
|
-
| BigUint64Array
|
|
31
|
-
| Float32Array
|
|
32
|
-
| Float64Array;
|
|
33
|
-
export type Scalar = string | number | boolean;
|
|
34
|
-
export type NDArray =
|
|
35
|
-
| Scalar[]
|
|
36
|
-
| Scalar[][]
|
|
37
|
-
| Scalar[][][]
|
|
38
|
-
| Scalar[][][][]
|
|
39
|
-
| ATypedArray[]
|
|
40
|
-
| ATypedArray[][]
|
|
41
|
-
| ATypedArray[][][]
|
|
42
|
-
| ATypedArray[][][][];
|
|
43
|
-
export type Data = NDArray | ATypedArray | Scalar;
|
|
44
|
-
|
|
45
|
-
/**
|
|
46
|
-
* Represents a data variable.
|
|
47
|
-
*/
|
|
48
|
-
export type DataVariable = {
|
|
49
|
-
/**
|
|
50
|
-
* Attributes of the variable.
|
|
51
|
-
*/
|
|
52
|
-
attributes: Record<string, string | unknown>;
|
|
53
|
-
/**
|
|
54
|
-
* Dimensions of the variable
|
|
55
|
-
* */
|
|
56
|
-
dimensions: string[];
|
|
57
|
-
/**
|
|
58
|
-
* Datatype of the variable.
|
|
59
|
-
*/
|
|
60
|
-
dtype?: DataType;
|
|
61
|
-
/**
|
|
62
|
-
* Data associated with the variable.
|
|
63
|
-
*/
|
|
64
|
-
data?: Data;
|
|
65
|
-
};
|
|
66
|
-
|
|
67
|
-
export const wkb_to_geojson = (wkb: string) => {
|
|
68
|
-
const b = new Buffer(wkb, "base64");
|
|
69
|
-
const geometry = WkxGeometry.parse(b);
|
|
70
|
-
return geometry.toGeoJSON();
|
|
71
|
-
};
|
|
72
|
-
|
|
73
|
-
const isArray = (data?: Data) => {
|
|
74
|
-
return data && (Array.isArray(data) || ArrayBuffer.isView(data));
|
|
75
|
-
};
|
|
76
|
-
|
|
77
|
-
const getShape = (a: Data) => {
|
|
78
|
-
const dim = [] as number[];
|
|
79
|
-
if (!isArray(a)) return dim;
|
|
80
|
-
for (;;) {
|
|
81
|
-
// @ts-expect-error: Scalar already returned
|
|
82
|
-
dim.push(a.length);
|
|
83
|
-
// @ts-expect-error: Scalar already returned
|
|
84
|
-
if (isArray(a[0])) {
|
|
85
|
-
// @ts-expect-error: Scalar already returned
|
|
86
|
-
a = a[0];
|
|
87
|
-
} else {
|
|
88
|
-
break;
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
return dim;
|
|
92
|
-
};
|
|
93
|
-
|
|
94
|
-
const getDtype = (data: Data): DataType => {
|
|
95
|
-
for (;;) {
|
|
96
|
-
if (Array.isArray(data)) {
|
|
97
|
-
data = data[0];
|
|
98
|
-
} else {
|
|
99
|
-
break;
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
if (data === null || data === undefined) {
|
|
103
|
-
return "uint8";
|
|
104
|
-
} else {
|
|
105
|
-
switch (data.constructor.name) {
|
|
106
|
-
case "Boolean":
|
|
107
|
-
return "bool";
|
|
108
|
-
case "Number":
|
|
109
|
-
return "float32";
|
|
110
|
-
case "Int8Array":
|
|
111
|
-
return "int8";
|
|
112
|
-
case "Int16Array":
|
|
113
|
-
return "int16";
|
|
114
|
-
case "Int32Array":
|
|
115
|
-
return "int32";
|
|
116
|
-
case "BigInt64Array":
|
|
117
|
-
return "int64";
|
|
118
|
-
case "Uint8Array":
|
|
119
|
-
return "uint8";
|
|
120
|
-
case "Uint16Array":
|
|
121
|
-
return "uint16";
|
|
122
|
-
case "Uint32Array":
|
|
123
|
-
return "uint32";
|
|
124
|
-
case "Float32Array":
|
|
125
|
-
return "float32";
|
|
126
|
-
case "Float64Array":
|
|
127
|
-
return "float64";
|
|
128
|
-
case "String":
|
|
129
|
-
return "v2:object";
|
|
130
|
-
case "Object":
|
|
131
|
-
return "v2:object";
|
|
132
|
-
}
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
throw new Error("Unsupported data type: " + data.constructor.name);
|
|
136
|
-
};
|
|
137
|
-
|
|
138
|
-
const arrowTypeToDType = (dtype: ArrowDataType): DataType => {
|
|
139
|
-
//Convert arrow data type to zarr datatype
|
|
140
|
-
let type: string = dtype.toString().toLowerCase();
|
|
141
|
-
if (dtype.typeId == 5) {
|
|
142
|
-
type = "v2:object";
|
|
143
|
-
} else if (dtype.typeId == 1) {
|
|
144
|
-
type = "uint8";
|
|
145
|
-
}
|
|
146
|
-
return type as DataType;
|
|
147
|
-
};
|
|
148
|
-
|
|
149
|
-
const ravel = (data: Data) => {
|
|
150
|
-
if (!Array.isArray(data)) return data;
|
|
151
|
-
const flat = (data as NDArray).flat(Infinity);
|
|
152
|
-
if (isArray(flat[0])) {
|
|
153
|
-
// @ts-expect-error: Is array
|
|
154
|
-
const len = flat[0].length;
|
|
155
|
-
// @ts-expect-error: Is array
|
|
156
|
-
const arr = new flat[0].constructor(flat.length * len);
|
|
157
|
-
for (let i = 0; i < flat.length; i++) {
|
|
158
|
-
arr.set(flat[i], i * len);
|
|
159
|
-
}
|
|
160
|
-
return arr;
|
|
161
|
-
} else {
|
|
162
|
-
return flat;
|
|
163
|
-
}
|
|
164
|
-
};
|
|
165
|
-
|
|
166
|
-
const get_strides = (shape: readonly number[]) => {
|
|
167
|
-
const ndim = shape.length;
|
|
168
|
-
const stride: number[] = Array(ndim);
|
|
169
|
-
for (let i = ndim - 1, step = 1; i >= 0; i--) {
|
|
170
|
-
stride[i] = step;
|
|
171
|
-
step *= shape[i];
|
|
172
|
-
}
|
|
173
|
-
return stride;
|
|
174
|
-
};
|
|
175
|
-
|
|
176
|
-
const unravel = <T extends DataType>(
|
|
177
|
-
data: TypedArray<T>,
|
|
178
|
-
shape: number[],
|
|
179
|
-
stride: number[],
|
|
180
|
-
offset = 0
|
|
181
|
-
): Data => {
|
|
182
|
-
// @ts-expect-error: Is array
|
|
183
|
-
if (shape.length === 0) return data[0];
|
|
184
|
-
if (shape.length === 1) {
|
|
185
|
-
// @ts-expect-error: Is array
|
|
186
|
-
const arr = new data.constructor(shape[0]);
|
|
187
|
-
// @ts-expect-error: Is array
|
|
188
|
-
arr.set(data.slice(offset, offset + shape[0]));
|
|
189
|
-
return arr;
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
const arr = new Array(shape[0]);
|
|
193
|
-
for (let i = 0; i < shape[0]; i++) {
|
|
194
|
-
arr[i] = unravel(
|
|
195
|
-
data,
|
|
196
|
-
shape.slice(1),
|
|
197
|
-
stride.slice(1),
|
|
198
|
-
offset + i * stride[0]
|
|
199
|
-
);
|
|
200
|
-
}
|
|
201
|
-
return arr;
|
|
202
|
-
};
|
|
203
|
-
|
|
204
|
-
const npdatetime_to_posixtime = (data: Chunk<DataType>, dtype: string) => {
|
|
205
|
-
const [_, unit] = dtype.split("<M8");
|
|
206
|
-
const _data = new Float64Array(data.data.length);
|
|
207
|
-
let _divisor = 1n;
|
|
208
|
-
switch (unit) {
|
|
209
|
-
case "[ms]":
|
|
210
|
-
_divisor = 1000n;
|
|
211
|
-
break;
|
|
212
|
-
case "[us]":
|
|
213
|
-
_divisor = 1000000n;
|
|
214
|
-
break;
|
|
215
|
-
case "[ns]":
|
|
216
|
-
_divisor = 1000000000n;
|
|
217
|
-
break;
|
|
218
|
-
default:
|
|
219
|
-
_divisor = 1n;
|
|
220
|
-
break;
|
|
221
|
-
}
|
|
222
|
-
for (let i = 0; i < data.data.length; i++) {
|
|
223
|
-
// When dtype is numpy datetime (<M8...), underlying storage corresponds to int64
|
|
224
|
-
// so we can treat the chunk data as a BigInt64Array for conversion.
|
|
225
|
-
const v = (data.data as unknown as BigInt64Array)[i];
|
|
226
|
-
_data[i] = Number(v / _divisor);
|
|
227
|
-
}
|
|
228
|
-
return unravel(_data, data.shape, data.stride);
|
|
229
|
-
};
|
|
230
|
-
|
|
231
|
-
const flatten = (
|
|
232
|
-
data: Record<string, DataVariable>,
|
|
233
|
-
dims: Record<string, number>,
|
|
234
|
-
rows: Record<string, unknown>[]
|
|
235
|
-
): Record<string, unknown>[] => {
|
|
236
|
-
const dim = Object.keys(dims);
|
|
237
|
-
const arrays = {} as Record<string, boolean>;
|
|
238
|
-
for (const k in data) {
|
|
239
|
-
if (isArray(data[k].data)) {
|
|
240
|
-
arrays[k] = true;
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
if (dim.length == 1) {
|
|
244
|
-
for (let i = 0; i < dims[dim[0]]; i++) {
|
|
245
|
-
const row = {} as Record<string, unknown>;
|
|
246
|
-
for (const k in data) {
|
|
247
|
-
if (arrays[k]) {
|
|
248
|
-
// @ts-expect-error: Is array
|
|
249
|
-
if (data[k].data.length > 1) row[k] = data[k].data[i];
|
|
250
|
-
} else {
|
|
251
|
-
row[k] = data[k].data;
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
rows.push(row);
|
|
255
|
-
}
|
|
256
|
-
} else {
|
|
257
|
-
for (let i = 0; i < dims[dim[0]]; i++) {
|
|
258
|
-
const subdata = {} as Record<string, DataVariable>;
|
|
259
|
-
for (const k in data) {
|
|
260
|
-
if (data[k].dimensions.includes(dim[0])) {
|
|
261
|
-
subdata[k] = {
|
|
262
|
-
attributes: data[k].attributes,
|
|
263
|
-
// @ts-expect-error: Is array because include dims
|
|
264
|
-
data: data[k].data[i],
|
|
265
|
-
dimensions: data[k].dimensions.slice(1),
|
|
266
|
-
};
|
|
267
|
-
} else {
|
|
268
|
-
subdata[k] = data[k];
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
const subdims = { ...dims };
|
|
272
|
-
delete subdims[dim[0]];
|
|
273
|
-
flatten(subdata, subdims, rows);
|
|
274
|
-
}
|
|
275
|
-
}
|
|
276
|
-
return rows;
|
|
277
|
-
};
|
|
278
|
-
|
|
279
|
-
/** @ignore */
|
|
280
|
-
export type HttpZarr = Location<Listable<CachedHTTPStore>>;
|
|
281
|
-
/** @ignore */
|
|
282
|
-
export type TempZarr = Location<Mutable>;
|
|
283
|
-
type SliceDef = (null | Slice | number)[] | null | undefined;
|
|
284
|
-
/**
|
|
285
|
-
* Represents a data variable within a dataset.
|
|
286
|
-
*/
|
|
287
|
-
export class DataVar<
|
|
288
|
-
/** @ignore */
|
|
289
|
-
DType extends DataType,
|
|
290
|
-
S extends TempZarr | HttpZarr,
|
|
291
|
-
> {
|
|
292
|
-
/**
|
|
293
|
-
* Creates an instance of DataVar.
|
|
294
|
-
* @param id - The identifier for the data variable.
|
|
295
|
-
* @param dimensions - The dimensions associated with the data variable.
|
|
296
|
-
* @param attributes - The attributes of the data variable, represented as a record of key-value pairs.
|
|
297
|
-
* @param arr - The zarr array associated with the data variable.
|
|
298
|
-
*/
|
|
299
|
-
id: string;
|
|
300
|
-
dimensions: string[];
|
|
301
|
-
attributes: Record<string, unknown>;
|
|
302
|
-
arr: S extends TempZarr
|
|
303
|
-
? zarr.Array<DType, Mutable>
|
|
304
|
-
: zarr.Array<DType, AsyncReadable>;
|
|
305
|
-
constructor(
|
|
306
|
-
id: string,
|
|
307
|
-
dimensions: string[],
|
|
308
|
-
attributes: Record<string, unknown>,
|
|
309
|
-
arr: S extends TempZarr
|
|
310
|
-
? zarr.Array<DType, Mutable>
|
|
311
|
-
: zarr.Array<DType, AsyncReadable>
|
|
312
|
-
) {
|
|
313
|
-
this.id = id;
|
|
314
|
-
this.dimensions = dimensions;
|
|
315
|
-
this.attributes = attributes;
|
|
316
|
-
this.arr = arr; // zarr array
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
/**
|
|
320
|
-
* Retrieves the data from the zarr array. If the data is already cached, it returns the cached data.
|
|
321
|
-
* @param index - Optional slice parameters to retrieve specific data from the zarr array.
|
|
322
|
-
* @returns A promise that resolves to the data of the zarr array.
|
|
323
|
-
*/
|
|
324
|
-
|
|
325
|
-
@measureTime
|
|
326
|
-
async get(index?: SliceDef | string[]): Promise<Data> {
|
|
327
|
-
if (this.arr.shape.length == 0 || this.arr.shape[0] == 0) {
|
|
328
|
-
return [];
|
|
329
|
-
}
|
|
330
|
-
const _index =
|
|
331
|
-
index &&
|
|
332
|
-
index.map((i) => {
|
|
333
|
-
if (typeof i === "string") {
|
|
334
|
-
const [start, stop, step] = i.split(":");
|
|
335
|
-
return slice(
|
|
336
|
-
parseInt(start),
|
|
337
|
-
parseInt(stop),
|
|
338
|
-
parseInt(step)
|
|
339
|
-
) as Slice;
|
|
340
|
-
} else {
|
|
341
|
-
return i;
|
|
342
|
-
}
|
|
343
|
-
});
|
|
344
|
-
const _data: Chunk<DType> | Scalar = await get(
|
|
345
|
-
this.arr as zarr.Array<DType, AsyncReadable>,
|
|
346
|
-
_index as SliceDef
|
|
347
|
-
);
|
|
348
|
-
if (this.arr.dtype == "v2:object" || !_data.shape) {
|
|
349
|
-
return _data.data as Data;
|
|
350
|
-
} else if (this.arr.dtype == "bool") {
|
|
351
|
-
return [..._data.data] as Data;
|
|
352
|
-
} else {
|
|
353
|
-
// Safely inspect potential numpy datetime dtype stored in attrs
|
|
354
|
-
const dtype = (this.arr.attrs as Record<string, unknown>)._dtype as
|
|
355
|
-
| string
|
|
356
|
-
| undefined;
|
|
357
|
-
if (dtype?.startsWith("<M8")) {
|
|
358
|
-
return npdatetime_to_posixtime(_data, dtype) as Data;
|
|
359
|
-
}
|
|
360
|
-
return unravel(_data.data, _data.shape, _data.stride);
|
|
361
|
-
}
|
|
362
|
-
}
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
/**
|
|
366
|
-
* Represents a dataset with dimensions, data variables, and attributes.
|
|
367
|
-
* Implements the DatasetApi interface.
|
|
368
|
-
*/
|
|
369
|
-
export interface ZarrOptions {
|
|
370
|
-
parameters?: Record<string, string | number>;
|
|
371
|
-
chunks?: string;
|
|
372
|
-
downsample?: Record<string, number>;
|
|
373
|
-
coordkeys?: Coordkeys;
|
|
374
|
-
timeout?: number;
|
|
375
|
-
nocache?: boolean;
|
|
376
|
-
}
|
|
377
|
-
|
|
378
|
-
export class Dataset<S extends HttpZarr | TempZarr> {
|
|
379
|
-
/**
|
|
380
|
-
* Creates an instance of Dataset.
|
|
381
|
-
* @param dimensions - The dimensions of the dataset.
|
|
382
|
-
* @param variables - The data variables of the dataset.
|
|
383
|
-
* @param attributes - The attributes of the dataset.
|
|
384
|
-
* @param root - The root group of the dataset.
|
|
385
|
-
* @param coordkeys - The coordinates map of the dataset.
|
|
386
|
-
*/
|
|
387
|
-
dimensions: Record<string, number>;
|
|
388
|
-
variables: S extends TempZarr
|
|
389
|
-
? Record<string, DataVar<DataType, TempZarr>>
|
|
390
|
-
: Record<string, DataVar<DataType, HttpZarr>>;
|
|
391
|
-
attributes: Record<string, unknown>;
|
|
392
|
-
coordkeys: Coordkeys;
|
|
393
|
-
root: S;
|
|
394
|
-
|
|
395
|
-
constructor(
|
|
396
|
-
dimensions: Record<string, number>,
|
|
397
|
-
variables: S extends TempZarr
|
|
398
|
-
? Record<string, DataVar<DataType, TempZarr>>
|
|
399
|
-
: Record<string, DataVar<DataType, HttpZarr>>,
|
|
400
|
-
attributes: Record<string, unknown>,
|
|
401
|
-
coordkeys: Coordkeys,
|
|
402
|
-
root: S
|
|
403
|
-
) {
|
|
404
|
-
this.dimensions = dimensions;
|
|
405
|
-
this.variables = variables;
|
|
406
|
-
this.attributes = attributes;
|
|
407
|
-
this.coordkeys = coordkeys;
|
|
408
|
-
this.root = root;
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
/**
|
|
412
|
-
* Creates a Dataset instance from a Zarr store.
|
|
413
|
-
* @param url - The URL of the datamesh gateway.
|
|
414
|
-
* @param authHeaders - The authentication headers.
|
|
415
|
-
* @param options.chunks - Optional chunking for the request.
|
|
416
|
-
* @param options.downsample - Optional downsample parameters for the request.
|
|
417
|
-
* @param options.parameters - Optional parameters for the request.
|
|
418
|
-
* @param options.coordkeys - Optional coordinates for the request.
|
|
419
|
-
* @param options.timeout - Optional timeout for the request.
|
|
420
|
-
* @param options.nocache - Disable caching
|
|
421
|
-
* @returns A promise that resolves to a Dataset instance.
|
|
422
|
-
*/
|
|
423
|
-
//@measureTime
|
|
424
|
-
static async zarr(
|
|
425
|
-
url: string,
|
|
426
|
-
authHeaders: Record<string, string>,
|
|
427
|
-
options: ZarrOptions = {}
|
|
428
|
-
): Promise<Dataset<HttpZarr>> {
|
|
429
|
-
const store = new CachedHTTPStore(url, authHeaders, {
|
|
430
|
-
chunks: options.chunks,
|
|
431
|
-
downsample: options.downsample,
|
|
432
|
-
parameters: options.parameters,
|
|
433
|
-
timeout: options.timeout,
|
|
434
|
-
nocache: options.nocache,
|
|
435
|
-
}) as AsyncReadable;
|
|
436
|
-
const _zarr = await zarr.withConsolidated(store);
|
|
437
|
-
const root = await zarr.open(_zarr, { kind: "group" });
|
|
438
|
-
const vars = {} as Record<string, DataVar<DataType, HttpZarr>>;
|
|
439
|
-
const dims = {} as Record<string, number>;
|
|
440
|
-
for (const item of _zarr.contents()) {
|
|
441
|
-
if (item.kind == "array") {
|
|
442
|
-
let arr;
|
|
443
|
-
try {
|
|
444
|
-
arr = await zarr.open(root.resolve(item.path), {
|
|
445
|
-
kind: "array",
|
|
446
|
-
});
|
|
447
|
-
} catch (e: unknown) {
|
|
448
|
-
const message =
|
|
449
|
-
typeof e === "object" && e && "message" in e
|
|
450
|
-
? String((e as { message?: unknown }).message)
|
|
451
|
-
: undefined;
|
|
452
|
-
if (message && message.includes("<M8")) {
|
|
453
|
-
//A python <M8 type fails to load
|
|
454
|
-
arr = await zarr_open_v2_datetime(root.resolve(item.path));
|
|
455
|
-
} else {
|
|
456
|
-
throw e;
|
|
457
|
-
}
|
|
458
|
-
}
|
|
459
|
-
const array_dims = arr.attrs._ARRAY_DIMENSIONS as string[] | null;
|
|
460
|
-
const vid = item.path.split("/").pop() as string;
|
|
461
|
-
vars[vid] = new DataVar<DataType, HttpZarr>(
|
|
462
|
-
vid,
|
|
463
|
-
array_dims || [],
|
|
464
|
-
arr.attrs as Record<string, unknown>,
|
|
465
|
-
arr
|
|
466
|
-
);
|
|
467
|
-
if (array_dims)
|
|
468
|
-
array_dims.map((dim: string, i: number) => {
|
|
469
|
-
const n = (arr.shape as number[])[i];
|
|
470
|
-
if (dims[dim] && dims[dim] != n) {
|
|
471
|
-
throw new Error(
|
|
472
|
-
`Inconsistent dimension size for ${dim}: ${dims[dim]} != ${n}`
|
|
473
|
-
);
|
|
474
|
-
} else {
|
|
475
|
-
dims[dim] = n;
|
|
476
|
-
}
|
|
477
|
-
});
|
|
478
|
-
}
|
|
479
|
-
}
|
|
480
|
-
const coords = (JSON.parse(root.attrs["_coordinates"] as string) ||
|
|
481
|
-
{}) as Coordkeys;
|
|
482
|
-
return new Dataset<HttpZarr>(
|
|
483
|
-
dims,
|
|
484
|
-
vars,
|
|
485
|
-
root.attrs,
|
|
486
|
-
options.coordkeys || coords,
|
|
487
|
-
root
|
|
488
|
-
);
|
|
489
|
-
}
|
|
490
|
-
|
|
491
|
-
static async fromArrow(
|
|
492
|
-
data: Table,
|
|
493
|
-
coordkeys: Coordkeys
|
|
494
|
-
): Promise<Dataset<TempZarr>> {
|
|
495
|
-
const attributes = {};
|
|
496
|
-
const dimensions = { record: data.numRows };
|
|
497
|
-
const variables = {} as Record<string, DataVariable>;
|
|
498
|
-
data.schema.fields.forEach((field) => {
|
|
499
|
-
const column = data.getChild(field.name);
|
|
500
|
-
let attrs = {};
|
|
501
|
-
let array = column?.toArray();
|
|
502
|
-
let dtype = arrowTypeToDType(field.type);
|
|
503
|
-
//Store times internally as Unix seconds in Float64 - this is consistent with Datamesh zarr
|
|
504
|
-
if (ArrowDataType.isTimestamp(field.type)) {
|
|
505
|
-
const carray = new Float64Array(array.length);
|
|
506
|
-
const m = BigInt(1000 ** (field.type.unit - 0));
|
|
507
|
-
for (let i = 0; i < array.length; i++) {
|
|
508
|
-
carray[i] = Number(array[i] / m);
|
|
509
|
-
}
|
|
510
|
-
array = carray;
|
|
511
|
-
dtype = "float64";
|
|
512
|
-
attrs = { unit: `Unix timestamp (s)` };
|
|
513
|
-
} else if (ArrowDataType.isBinary(field.type)) {
|
|
514
|
-
const carray = [];
|
|
515
|
-
for (let i = 0; i < array.length; i++) {
|
|
516
|
-
carray.push(new Buffer(array[i]).toString("base64"));
|
|
517
|
-
}
|
|
518
|
-
array = carray;
|
|
519
|
-
dtype = "v2:object";
|
|
520
|
-
}
|
|
521
|
-
variables[field.name] = {
|
|
522
|
-
dimensions: ["record"],
|
|
523
|
-
attributes: attrs,
|
|
524
|
-
data: array,
|
|
525
|
-
dtype,
|
|
526
|
-
};
|
|
527
|
-
});
|
|
528
|
-
return await Dataset.init({ dimensions, variables, attributes }, coordkeys);
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
static async fromGeojson(
|
|
532
|
-
geojson: FeatureCollection | Feature,
|
|
533
|
-
coordkeys?: Coordkeys
|
|
534
|
-
): Promise<Dataset<TempZarr>> {
|
|
535
|
-
if (
|
|
536
|
-
!("features" in geojson && Array.isArray(geojson.features)) &&
|
|
537
|
-
!("geometry" in geojson)
|
|
538
|
-
) {
|
|
539
|
-
throw new Error("Invalid GeoJSON");
|
|
540
|
-
}
|
|
541
|
-
const features: Feature[] =
|
|
542
|
-
"features" in geojson && geojson.features
|
|
543
|
-
? geojson.features
|
|
544
|
-
: [geojson as Feature];
|
|
545
|
-
if (features.length === 0) {
|
|
546
|
-
throw new Error("FeatureCollection contains no features");
|
|
547
|
-
}
|
|
548
|
-
|
|
549
|
-
// Extract all unique property keys from features
|
|
550
|
-
const propertyKeys = new Set<string>();
|
|
551
|
-
features.forEach((feature) => {
|
|
552
|
-
if (feature.properties) {
|
|
553
|
-
Object.keys(feature.properties).forEach((key) => propertyKeys.add(key));
|
|
554
|
-
}
|
|
555
|
-
});
|
|
556
|
-
|
|
557
|
-
// Create a flattened array of records
|
|
558
|
-
const records: Array<Record<string, unknown>> = features.map(
|
|
559
|
-
(feature: Feature) => {
|
|
560
|
-
const record: Record<string, unknown> = {
|
|
561
|
-
geometry: feature.geometry,
|
|
562
|
-
};
|
|
563
|
-
if (feature.properties) {
|
|
564
|
-
Object.assign(record, feature.properties);
|
|
565
|
-
}
|
|
566
|
-
return record;
|
|
567
|
-
}
|
|
568
|
-
);
|
|
569
|
-
|
|
570
|
-
// Create schema with dimensions and variables
|
|
571
|
-
const schema: Schema = {
|
|
572
|
-
dimensions: { index: records.length },
|
|
573
|
-
variables: {},
|
|
574
|
-
attributes: {},
|
|
575
|
-
};
|
|
576
|
-
|
|
577
|
-
// Create temporary dataset
|
|
578
|
-
const dataset = await Dataset.init(schema, { ...coordkeys, g: "geometry" });
|
|
579
|
-
|
|
580
|
-
// Add geometry variable
|
|
581
|
-
await dataset.assign(
|
|
582
|
-
"geometry",
|
|
583
|
-
["index"],
|
|
584
|
-
records.map((r) => r.geometry) as Data,
|
|
585
|
-
{ description: "GeoJSON geometry" }
|
|
586
|
-
);
|
|
587
|
-
|
|
588
|
-
// Add property variables
|
|
589
|
-
for (const key of propertyKeys) {
|
|
590
|
-
const values = records.map((r) => r[key]) as Data;
|
|
591
|
-
await dataset.assign(key, ["index"], values, {
|
|
592
|
-
description: `Property: ${key}`,
|
|
593
|
-
});
|
|
594
|
-
}
|
|
595
|
-
|
|
596
|
-
return dataset;
|
|
597
|
-
}
|
|
598
|
-
|
|
599
|
-
/**
|
|
600
|
-
* Initializes an in memory Dataset instance from a data object.
|
|
601
|
-
* @param datasource - An object containing id, dimensions, data variables, and attributes.
|
|
602
|
-
*/
|
|
603
|
-
static async init(
|
|
604
|
-
datasource: Schema,
|
|
605
|
-
coordkeys?: Coordkeys
|
|
606
|
-
): Promise<Dataset<TempZarr>> {
|
|
607
|
-
const root = (await zarr.create(new Map(), {
|
|
608
|
-
attributes: { created: new Date() },
|
|
609
|
-
})) as TempZarr;
|
|
610
|
-
const ds = new Dataset(
|
|
611
|
-
datasource.dimensions,
|
|
612
|
-
{},
|
|
613
|
-
datasource.attributes || {},
|
|
614
|
-
coordkeys || {},
|
|
615
|
-
root
|
|
616
|
-
);
|
|
617
|
-
for (const k in datasource.variables) {
|
|
618
|
-
const { dimensions, attributes, data, dtype }: DataVariable =
|
|
619
|
-
datasource.variables[k];
|
|
620
|
-
await ds.assign(
|
|
621
|
-
k,
|
|
622
|
-
dimensions,
|
|
623
|
-
data as Data,
|
|
624
|
-
attributes,
|
|
625
|
-
dtype && (dtype as string) === "string" ? "v2:object" : dtype
|
|
626
|
-
);
|
|
627
|
-
}
|
|
628
|
-
return ds;
|
|
629
|
-
}
|
|
630
|
-
|
|
631
|
-
/**
|
|
632
|
-
* Converts the dataset into a dataframe format.
|
|
633
|
-
*
|
|
634
|
-
* @returns {Promise<Record<string, unknown>[]>} A promise that resolves to an array of records,
|
|
635
|
-
* where each record represents a row in the dataframe.
|
|
636
|
-
*
|
|
637
|
-
* @remarks
|
|
638
|
-
* This method iterates over the data variables, retrieves their dimensions and data,
|
|
639
|
-
* and then flattens the data into a dataframe structure.
|
|
640
|
-
* Time coordinates are converted to IDO8601 format.
|
|
641
|
-
* BigInt datatypes are coerced to number.
|
|
642
|
-
*
|
|
643
|
-
* @example
|
|
644
|
-
* ```typescript
|
|
645
|
-
* const dataframe = await instance.asDataframe();
|
|
646
|
-
* console.log(dataframe);
|
|
647
|
-
* ```
|
|
648
|
-
*/
|
|
649
|
-
//@measureTime
|
|
650
|
-
async asDataframe(): Promise<Record<string, unknown>[]> {
|
|
651
|
-
const data = {} as Record<string, DataVariable>;
|
|
652
|
-
const bigint = [];
|
|
653
|
-
for (const k in this.variables) {
|
|
654
|
-
data[k] = {
|
|
655
|
-
attributes: this.variables[k].attributes,
|
|
656
|
-
dimensions: this.variables[k].dimensions,
|
|
657
|
-
};
|
|
658
|
-
data[k].data = (await this.variables[k].get()) as Data;
|
|
659
|
-
if (this.variables[k].arr.dtype == "int64") {
|
|
660
|
-
bigint.push(k);
|
|
661
|
-
}
|
|
662
|
-
}
|
|
663
|
-
const df = flatten(data, { ...this.dimensions }, []);
|
|
664
|
-
if (this.coordkeys.t) {
|
|
665
|
-
for (let i = 0; i < df.length; i++) {
|
|
666
|
-
df[i][this.coordkeys.t] = new Date(
|
|
667
|
-
1000 * (df[i][this.coordkeys.t] as number)
|
|
668
|
-
).toISOString();
|
|
669
|
-
}
|
|
670
|
-
}
|
|
671
|
-
//Convert BigInt to number
|
|
672
|
-
if (bigint.length > 0) {
|
|
673
|
-
for (const k of bigint) {
|
|
674
|
-
for (let i = 0; i < df.length; i++) {
|
|
675
|
-
df[i][k] = Number(df[i][k]);
|
|
676
|
-
}
|
|
677
|
-
}
|
|
678
|
-
}
|
|
679
|
-
return df;
|
|
680
|
-
}
|
|
681
|
-
|
|
682
|
-
/**
|
|
683
|
-
* Converts the dataset into a GeoJSON Feature.
|
|
684
|
-
* @param geometry - Optional GeoJSON geometry to apply to all records, otherwise geometry column is required. Will override geometry column if present.
|
|
685
|
-
*
|
|
686
|
-
* @returns {Promise<FeatureCollection>} A promise that resolves to an array of records,
|
|
687
|
-
* where each record represents a row in the dataframe.
|
|
688
|
-
*
|
|
689
|
-
* @throws Will throw an error if no geometry is found in data or as a parameter
|
|
690
|
-
*
|
|
691
|
-
* @remarks
|
|
692
|
-
* This method iterates over the data variables, retrieves their dimensions and data,
|
|
693
|
-
* and then flattens the data into a dataframe structure.
|
|
694
|
-
*
|
|
695
|
-
* @example
|
|
696
|
-
* ```typescript
|
|
697
|
-
* const dataframe = await instance.asDataframe();
|
|
698
|
-
* console.log(dataframe);
|
|
699
|
-
* ```
|
|
700
|
-
*/
|
|
701
|
-
async asGeojson(geometry?: Geometry): Promise<FeatureCollection> {
|
|
702
|
-
if (!this.coordkeys.g && !geometry) {
|
|
703
|
-
throw new Error("No geometry found");
|
|
704
|
-
}
|
|
705
|
-
const features = [] as Feature[];
|
|
706
|
-
const df = await this.asDataframe();
|
|
707
|
-
for (let i = 0; i < df.length; i++) {
|
|
708
|
-
const { ...properties } = df[i];
|
|
709
|
-
let geom = geometry;
|
|
710
|
-
if (!geom && this.coordkeys.g) {
|
|
711
|
-
delete properties[this.coordkeys.g];
|
|
712
|
-
const g = df[i][this.coordkeys.g] as string;
|
|
713
|
-
if (g.slice(0, 7) == '{"type:') {
|
|
714
|
-
//GeoJSON
|
|
715
|
-
geom = JSON.parse(g) as Geometry;
|
|
716
|
-
} else {
|
|
717
|
-
//WKB
|
|
718
|
-
geom = wkb_to_geojson(g);
|
|
719
|
-
}
|
|
720
|
-
}
|
|
721
|
-
features.push({
|
|
722
|
-
type: "Feature",
|
|
723
|
-
geometry: geom as Geometry,
|
|
724
|
-
properties,
|
|
725
|
-
});
|
|
726
|
-
}
|
|
727
|
-
return {
|
|
728
|
-
type: "FeatureCollection",
|
|
729
|
-
features,
|
|
730
|
-
};
|
|
731
|
-
}
|
|
732
|
-
|
|
733
|
-
/**
|
|
734
|
-
* Asynchronously assigns data to a variable in the dataset.
|
|
735
|
-
*
|
|
736
|
-
* @param varid - The identifier for the variable.
|
|
737
|
-
* @param dims - An array of dimension names corresponding to the data.
|
|
738
|
-
* @param data - The data to be assigned, which can be a multi-dimensional array.
|
|
739
|
-
* @param attrs - Optional. A record of attributes to be associated with the variable.
|
|
740
|
-
* @param dtype - Optional. The data type of the data.
|
|
741
|
-
* @param chunks - Optional. An array specifying the chunk sizes for the data.
|
|
742
|
-
*
|
|
743
|
-
* @returns A promise that resolves when the data has been successfully assigned.
|
|
744
|
-
* @throws Will throw an error if the shape of the data does not match the provided dimensions.
|
|
745
|
-
* @throws Will throw an error if an existing dimension size does not match the new data.
|
|
746
|
-
*/
|
|
747
|
-
async assign(
|
|
748
|
-
varid: string,
|
|
749
|
-
dims: string[],
|
|
750
|
-
data: Data,
|
|
751
|
-
attrs?: Record<string, unknown>,
|
|
752
|
-
dtype?: DataType,
|
|
753
|
-
chunks?: number[]
|
|
754
|
-
): Promise<void> {
|
|
755
|
-
const shape = getShape(data);
|
|
756
|
-
if (shape.length != dims.length) {
|
|
757
|
-
throw new Error("Data shape does not match dimensions");
|
|
758
|
-
}
|
|
759
|
-
dims.map((dim, i) => {
|
|
760
|
-
if (this.dimensions[dim]) {
|
|
761
|
-
if (this.dimensions[dim] != shape[i]) {
|
|
762
|
-
throw new Error(
|
|
763
|
-
`Existing size of dimension ${dim} does not match new data`
|
|
764
|
-
);
|
|
765
|
-
}
|
|
766
|
-
} else {
|
|
767
|
-
this.dimensions[dim] = shape[i];
|
|
768
|
-
}
|
|
769
|
-
});
|
|
770
|
-
const _dtype = dtype || getDtype(data);
|
|
771
|
-
const arr = await zarr.create(
|
|
772
|
-
this.root.resolve(varid) as Location<Mutable>,
|
|
773
|
-
{
|
|
774
|
-
shape,
|
|
775
|
-
data_type: _dtype,
|
|
776
|
-
chunk_shape: chunks || shape,
|
|
777
|
-
codecs:
|
|
778
|
-
_dtype == "v2:object" ? [{ name: "json2" } as CodecMetadata] : [],
|
|
779
|
-
}
|
|
780
|
-
);
|
|
781
|
-
let _data = ravel(data);
|
|
782
|
-
if (_data.length == 0) {
|
|
783
|
-
_data = null;
|
|
784
|
-
} else if (_dtype == "bool") {
|
|
785
|
-
_data = new BoolArray(_data);
|
|
786
|
-
} else if (Array.isArray(_data) && _dtype == "float32") {
|
|
787
|
-
_data = Float32Array.from(_data, (n) => (n == null ? NaN : n));
|
|
788
|
-
} else if (Array.isArray(_data) && _dtype == "float64") {
|
|
789
|
-
_data = Float64Array.from(_data, (n) => (n == null ? NaN : n));
|
|
790
|
-
} else if (Array.isArray(_data) && _dtype == "int8") {
|
|
791
|
-
_data = Int8Array.from(_data);
|
|
792
|
-
} else if (Array.isArray(_data) && _dtype == "int16") {
|
|
793
|
-
_data = Int16Array.from(_data);
|
|
794
|
-
} else if (Array.isArray(_data) && _dtype == "int32") {
|
|
795
|
-
_data = Int32Array.from(_data);
|
|
796
|
-
} else if (Array.isArray(_data) && _dtype == "int64") {
|
|
797
|
-
_data = BigInt64Array.from(_data.map((d) => BigInt(d)));
|
|
798
|
-
} else if (Array.isArray(_data) && _dtype == "uint8") {
|
|
799
|
-
_data = Uint8Array.from(_data);
|
|
800
|
-
} else if (Array.isArray(_data) && _dtype == "uint16") {
|
|
801
|
-
_data = Uint16Array.from(_data);
|
|
802
|
-
} else if (Array.isArray(_data) && _dtype == "uint32") {
|
|
803
|
-
_data = Uint32Array.from(_data);
|
|
804
|
-
} else if (Array.isArray(_data) && _dtype == "uint64") {
|
|
805
|
-
_data = BigUint64Array.from(_data.map((d) => BigInt(d)));
|
|
806
|
-
}
|
|
807
|
-
if (_data) {
|
|
808
|
-
await set(
|
|
809
|
-
arr,
|
|
810
|
-
shape.map(() => null),
|
|
811
|
-
{
|
|
812
|
-
data: _data,
|
|
813
|
-
shape: shape,
|
|
814
|
-
stride: get_strides(shape),
|
|
815
|
-
}
|
|
816
|
-
);
|
|
817
|
-
}
|
|
818
|
-
this.variables[varid] = new DataVar(varid, dims, attrs || {}, arr);
|
|
819
|
-
}
|
|
820
|
-
}
|