@oceanum/datamesh 0.1.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,7 +4,7 @@ A typescript library for interacting with the Oceanum.io Datamesh.
4
4
 
5
5
  ## Installation
6
6
 
7
- You can use this library in Node.js, Deno or browser code
7
+ You can use this library in Node.js, Deno or browser code (with the caveat below)
8
8
 
9
9
  ```sh
10
10
  npm install @oceanum/datamesh
@@ -16,15 +16,16 @@ npm install @oceanum/datamesh
16
16
  import { Connector } from "@oceanum/datamesh";
17
17
 
18
18
  //Instatiate the Datamesh Connector
19
- const datamesh=Connector("my_datamesh_token"); //Get you datamesh token from your Oceanum.io account
19
+ const datamesh = Connector("my_datamesh_token"); //Get your datamesh token from your Oceanum.io account
20
20
 
21
21
  //Define a datamesh query
22
- const query={
23
- "datasource":"oceanum-sizing_giants"
24
- }
22
+ const query = {
23
+ datasource: "oceanum-sizing_giants",
24
+ };
25
25
 
26
26
  //Get the data
27
- const data=await datamesh.query(query);
27
+ const data = await datamesh.query(query);
28
28
  ```
29
29
 
30
- DO NOT put your Datamesh token directly into browser code. For use in an SPA, you can either forward your Datamesh request through a proxy or implement a token exchange. Read the [library documentation](https://oceanum-js.oceanum.io/) to learn more.
30
+ [!WARNING]
31
+ DO NOT put your Datamesh token directly into browser code. For use in an SPA, you should forward your Datamesh request through a reverse proxy to conceal your token. Read the [library documentation](https://oceanum-js.oceanum.io/datamesh) to learn more.
package/package.json CHANGED
@@ -1,22 +1,24 @@
1
1
  {
2
2
  "name": "@oceanum/datamesh",
3
- "version": "0.1.1",
4
- "scripts": {},
3
+ "version": "0.4.2",
4
+ "scripts": {
5
+ "build:docs": "typedoc"
6
+ },
5
7
  "publishConfig": {
6
8
  "access": "public"
7
9
  },
8
10
  "dependencies": {
9
- "@types/geojson": "^7946.0.14",
11
+ "@types/geojson": "^7946.0.16",
10
12
  "@types/object-hash": "^3.0.6",
11
- "@zarrita/core": "^0.1.0-next.15",
12
- "@zarrita/indexing": "^0.1.0-next.17",
13
- "@zarrita/storage": "^0.1.0-next.7",
13
+ "apache-arrow": "^19.0.1",
14
+ "buffer": "^6.0.3",
14
15
  "dayjs": "^1.11.13",
15
16
  "idb-keyval": "^6.2.1",
16
17
  "object-hash": "^3.0.0",
17
- "zarrita": "^0.4.0-next.17"
18
+ "wkx-ts": "^1.0.1",
19
+ "zarrita": "^0.4.0-next.23"
18
20
  },
21
+ "type": "module",
19
22
  "main": "./dist/index.js",
20
- "module": "./dist/index.mjs",
21
23
  "typings": "./dist/index.d.ts"
22
24
  }
package/src/index.js ADDED
@@ -0,0 +1,20 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ exports.__esModule = true;
17
+ __exportStar(require("./lib/connector"), exports);
18
+ __exportStar(require("./lib/datasource"), exports);
19
+ __exportStar(require("./lib/query"), exports);
20
+ __exportStar(require("./lib/datamodel"), exports);
package/src/index.ts CHANGED
@@ -1,2 +1,4 @@
1
1
  export * from "./lib/connector";
2
2
  export * from "./lib/datasource";
3
+ export * from "./lib/query";
4
+ export * from "./lib/datamodel";
@@ -1,56 +1,120 @@
1
1
  import { Datasource } from "./datasource";
2
2
  import { IQuery, Stage } from "./query";
3
- import { Dataset, DatameshStore } from "./datamodel";
3
+ import { Dataset, HttpZarr, TempZarr } from "./datamodel";
4
+ import { measureTime } from "./observe";
5
+ import { tableFromIPC, Table } from "apache-arrow";
6
+ import { Session } from "./session";
4
7
 
5
8
  /**
6
9
  * Datamesh connector class.
7
10
  *
8
11
  * All datamesh operations are methods of this class.
12
+ *
9
13
  */
14
+ const DATAMESH_SERVICE =
15
+ process.env.DATAMESH_SERVICE || "https://datamesh.oceanum.io";
16
+
10
17
  export class Connector {
18
+ static LAZY_LOAD_SIZE = 1e8;
11
19
  private _token: string;
12
- private _proto: string;
13
20
  private _host: string;
14
21
  private _authHeaders: Record<string, string>;
15
22
  private _gateway: string;
23
+ private _nocache = false;
24
+ private _isV1 = false;
25
+ private _sessionParams: Record<string, number> = {};
26
+ private _currentSession: Session | null = null;
27
+ service?: string;
28
+ gateway?: string;
16
29
 
17
30
  /**
18
31
  * Datamesh connector constructor
19
32
  *
20
33
  * @param token - Your datamesh access token. Defaults to environment variable DATAMESH_TOKEN is defined else as literal string "DATAMESH_TOKEN". DO NOT put your Datamesh token directly into public facing browser code.
21
- * @param service - URL of datamesh service. Defaults to environment variable DATAMESH_SERVICE or "https://datamesh.oceanum.io".
22
- * @param gateway - URL of gateway service. Defaults to "https://gateway.datamesh.oceanum.io".
34
+ * @param options - Constructor options.
35
+ * @param options.service - URL of datamesh service. Defaults to environment variable DATAMESH_SERVICE or "https://datamesh.oceanum.io".
36
+ * @param options.gateway - URL of gateway service. Defaults to "https://gateway.<datamesh_service_domain>".
37
+ * @param options.jwtAuth - JWT for Oceanum service.
38
+ * @param options.nocache - Disable caching of datamesh results.
39
+ * @param options.sessionDuration - The desired length of time for acquired datamesh sessions in hours. Will be 1 hour by default.
23
40
  *
24
41
  * @throws {Error} - If a valid token is not provided.
25
42
  */
26
43
  constructor(
27
44
  token = process.env.DATAMESH_TOKEN || "$DATAMESH_TOKEN",
28
- service = process.env.DATAMESH_SERVICE || "https://datamesh.oceanum.io",
29
- gateway = process.env.DATAMESH_GATEWAY ||
30
- "https://gateway.datamesh.oceanum.io"
45
+ options?: {
46
+ service?: string;
47
+ gateway?: string;
48
+ jwtAuth?: string;
49
+ nocache?: boolean;
50
+ sessionDuration?: number;
51
+ }
31
52
  ) {
32
- if (!token) {
53
+ if (!token && !options?.jwtAuth) {
33
54
  throw new Error(
34
55
  "A valid datamesh token must be supplied as a connector constructor argument or defined in environment variables as DATAMESH_TOKEN"
35
56
  );
36
57
  }
37
58
 
38
59
  this._token = token;
39
- const url = new URL(service);
40
- this._proto = url.protocol;
41
- this._host = url.hostname;
42
- this._authHeaders = {
43
- Authorization: `Token ${this._token}`,
44
- "X-DATAMESH-TOKEN": this._token,
45
- };
60
+ this._nocache = options?.nocache ?? false;
61
+ const url = new URL(options?.service || DATAMESH_SERVICE);
62
+ this._host = `${url.protocol}//${url.hostname}`;
63
+ this._authHeaders = options?.jwtAuth
64
+ ? {
65
+ Authorization: `Bearer ${options.jwtAuth}`,
66
+ }
67
+ : {
68
+ Authorization: `Token ${this._token}`,
69
+ "X-DATAMESH-TOKEN": this._token,
70
+ };
46
71
 
47
- this._gateway = gateway || `${this._proto}//gateway.${this._host}`;
72
+ /* This is for testing the gateway service is not always the same as the service domain */
73
+ this._gateway =
74
+ options?.gateway || `${url.protocol}//gateway.${url.hostname}`;
48
75
 
49
76
  if (
50
77
  this._host.split(".").slice(-1)[0] !==
51
78
  this._gateway.split(".").slice(-1)[0]
52
79
  ) {
53
- console.warn("Gateway and service domain do not match");
80
+ console.warn("Datamesh gateway and service domains do not match");
81
+ }
82
+
83
+ // Set session parameters if provided
84
+ if (
85
+ options?.sessionDuration &&
86
+ typeof options.sessionDuration === "number"
87
+ ) {
88
+ this._sessionParams = { duration: options.sessionDuration };
89
+ }
90
+
91
+ // Check if the API is v1 (supports sessions)
92
+ this._checkApiVersion();
93
+ }
94
+
95
+ /**
96
+ * Check if the API version supports sessions.
97
+ *
98
+ * @private
99
+ */
100
+ private async _checkApiVersion(): Promise<void> {
101
+ try {
102
+ // Simply check to see if we can get a session
103
+ const response = await fetch(`${this._gateway}/session`, {
104
+ headers: this._authHeaders,
105
+ });
106
+
107
+ if (response.status === 200) {
108
+ this._isV1 = true;
109
+ console.info("Using datamesh API version 1");
110
+ } else {
111
+ this._isV1 = false;
112
+ console.info("Using datamesh API version 0");
113
+ }
114
+ } catch {
115
+ // If we can't connect to the gateway, assume it's not a v1 API
116
+ this._isV1 = false;
117
+ console.info("Using datamesh API version 0");
54
118
  }
55
119
  }
56
120
 
@@ -66,10 +130,10 @@ export class Connector {
66
130
  /**
67
131
  * Check the status of the metadata server.
68
132
  *
69
- * @returns True if the metadata server is up, false otherwise.
133
+ * @returns True if the server is up, false otherwise.
70
134
  */
71
135
  async status(): Promise<boolean> {
72
- const response = await fetch(`${this._proto}//${this._host}`, {
136
+ const response = await fetch(this._host, {
73
137
  headers: this._authHeaders,
74
138
  });
75
139
  return response.status === 200;
@@ -94,6 +158,56 @@ export class Connector {
94
158
  }
95
159
  }
96
160
 
161
+ /**
162
+ * Create a new session.
163
+ *
164
+ * @param options - Session options.
165
+ * @param options.duration - The desired length of time for the session in hours. Defaults to the value set in the constructor or 1 hour.
166
+ * @returns A new session instance.
167
+ */
168
+ async createSession(options: { duration?: number } = {}): Promise<Session> {
169
+ const sessionOptions = {
170
+ duration: options.duration || this._sessionParams.duration || 1,
171
+ };
172
+ this._currentSession = await Session.acquire(this, sessionOptions);
173
+ return this._currentSession;
174
+ }
175
+
176
+ /**
177
+ * Get the current session or create a new one if none exists.
178
+ *
179
+ * @returns The current session.
180
+ */
181
+ async getSession(): Promise<Session> {
182
+ if (!this._currentSession) {
183
+ return this.createSession();
184
+ }
185
+ return this._currentSession;
186
+ }
187
+
188
+ /**
189
+ * Get headers with session information if available.
190
+ *
191
+ * @param additionalHeaders - Additional headers to include.
192
+ * @returns Headers with session information.
193
+ */
194
+ private async getSessionHeaders(
195
+ additionalHeaders: Record<string, string> = {}
196
+ ): Promise<Record<string, string>> {
197
+ if (this._isV1 && !this._currentSession) {
198
+ await this.createSession();
199
+ }
200
+
201
+ if (this._currentSession) {
202
+ return this._currentSession.addHeader({
203
+ ...this._authHeaders,
204
+ ...additionalHeaders,
205
+ });
206
+ }
207
+
208
+ return { ...this._authHeaders, ...additionalHeaders };
209
+ }
210
+
97
211
  /**
98
212
  * Request metadata from datamesh.
99
213
  *
@@ -105,20 +219,17 @@ export class Connector {
105
219
  datasourceId = "",
106
220
  params = {} as Record<string, string>
107
221
  ): Promise<Response> {
108
- const url = new URL(
109
- `${this._proto}//${this._host}/datasource/${datasourceId}`
110
- );
222
+ const url = new URL(`${this._host}/datasource/${datasourceId}`);
111
223
  Object.keys(params).forEach((key) =>
112
224
  url.searchParams.append(key, params[key])
113
225
  );
114
226
 
227
+ const headers = await this.getSessionHeaders();
115
228
  const response = await fetch(url.toString(), {
116
- headers: this._authHeaders,
229
+ headers,
117
230
  });
118
231
 
119
- if (response.status === 404) {
120
- throw new Error(`Datasource ${datasourceId} not found`);
121
- } else if (response.status === 401) {
232
+ if (response.status === 403) {
122
233
  throw new Error(`Datasource ${datasourceId} not authorized`);
123
234
  }
124
235
 
@@ -133,15 +244,16 @@ export class Connector {
133
244
  * @param dataFormat - The format of the requested data. Defaults to "application/json".
134
245
  * @returns The path to the cached file.
135
246
  */
136
- async dataRequest(
137
- datasourceId: string,
138
- dataFormat = "application/json"
139
- ): Promise<Blob> {
140
- const response = await fetch(`${this._gateway}/data/${datasourceId}`, {
141
- headers: { Accept: dataFormat, ...this._authHeaders },
247
+ private async dataRequest(
248
+ qhash: string,
249
+ dataFormat = "application/vnd.apache.arrow.file"
250
+ ): Promise<Table> {
251
+ const headers = await this.getSessionHeaders({ Accept: dataFormat });
252
+ const response = await fetch(`${this._gateway}/oceanql/${qhash}?f=arrow`, {
253
+ headers,
142
254
  });
143
255
  await this.validateResponse(response);
144
- return response.blob();
256
+ return tableFromIPC(await response.arrayBuffer());
145
257
  }
146
258
 
147
259
  /**
@@ -150,11 +262,16 @@ export class Connector {
150
262
  * @param query - The query to stage.
151
263
  * @returns The staged response.
152
264
  */
153
- private async stageRequest(query: IQuery): Promise<Stage | null> {
265
+ @measureTime
266
+ async stageRequest(query: IQuery): Promise<Stage | null> {
154
267
  const data = JSON.stringify(query);
268
+ const headers = await this.getSessionHeaders({
269
+ "Content-Type": "application/json",
270
+ });
271
+
155
272
  const response = await fetch(`${this._gateway}/oceanql/stage/`, {
156
273
  method: "POST",
157
- headers: { "Content-Type": "application/json", ...this._authHeaders },
274
+ headers,
158
275
  body: data,
159
276
  });
160
277
  if (response.status >= 400) {
@@ -171,16 +288,60 @@ export class Connector {
171
288
  * Execute a query to the datamesh.
172
289
  *
173
290
  * @param query - The query to execute.
291
+ * @param options.timeout - Additional options for the query.
174
292
  * @returns The response from the server.
175
293
  */
176
- async query(query: IQuery): Promise<Dataset<DatameshStore> | null> {
294
+ @measureTime
295
+ async query(
296
+ query: IQuery,
297
+ options: { timeout?: number } = {}
298
+ ): Promise<Dataset<HttpZarr | TempZarr> | null> {
299
+ //Stage the query
177
300
  const stage = await this.stageRequest(query);
178
301
  if (!stage) {
179
302
  console.warn("No data found for query");
180
303
  return null;
181
304
  }
182
- const url = `${this._gateway}/zarr/${stage.qhash}`;
183
- const dataset = await Dataset.zarr(url, this._authHeaders);
305
+ //For smaller dataframes use arrow for transport
306
+ if (stage.size < Connector.LAZY_LOAD_SIZE && stage.container != "dataset") {
307
+ const table = await this.dataRequest(stage.qhash);
308
+ const dataset = await Dataset.fromArrow(table, stage.coordkeys);
309
+ return dataset;
310
+ }
311
+ let url = null;
312
+ let params = undefined;
313
+ if (
314
+ query.timefilter ||
315
+ query.geofilter ||
316
+ query.levelfilter ||
317
+ query.coordfilter
318
+ ) {
319
+ url = `${this._gateway}/zarr/${stage.qhash}`;
320
+ } else {
321
+ url = `${this._gateway}/zarr/${query.datasource}`;
322
+ params = query.parameters;
323
+ }
324
+
325
+ // Get headers with session information if available
326
+ const headers = await this.getSessionHeaders();
327
+
328
+ // Pass the headers to the Dataset.zarr method
329
+ const dataset = await Dataset.zarr(url, headers, {
330
+ parameters: params,
331
+ timeout: options.timeout || 60000, // Default timeout value
332
+ nocache: this._nocache,
333
+ });
334
+
335
+ if (query.variables) {
336
+ for (const v of Object.keys(dataset.variables)) {
337
+ if (
338
+ !query.variables.includes(v) &&
339
+ !Object.values(dataset.coordkeys).includes(v)
340
+ ) {
341
+ delete dataset.variables[v];
342
+ }
343
+ }
344
+ }
184
345
  return dataset;
185
346
  }
186
347
 
@@ -191,6 +352,7 @@ export class Connector {
191
352
  * @returns The datasource instance.
192
353
  * @throws {Error} - If the datasource cannot be found or is not authorized.
193
354
  */
355
+ //@measureTime
194
356
  async getDatasource(datasourceId: string): Promise<Datasource> {
195
357
  const meta = await this.metadataRequest(datasourceId);
196
358
  const metaDict = await meta.json();
@@ -206,23 +368,28 @@ export class Connector {
206
368
  *
207
369
  * @param datasourceId - Unique datasource ID.
208
370
  * @param parameters - Additional datasource parameters.
209
- * @param useDask - Whether to use Dask for loading. Defaults to false.
210
- * @returns The datasource container.
371
+ * @returns The dataset.
211
372
  */
373
+ //@measureTime
212
374
  async loadDatasource(
213
375
  datasourceId: string,
214
376
  parameters: Record<string, string | number> = {}
215
- ): Promise<Dataset<DatameshStore> | null> {
377
+ ): Promise<Dataset<HttpZarr | TempZarr> | null> {
216
378
  const query = { datasource: datasourceId, parameters };
217
- const stage = await this.stageRequest(query);
218
- if (!stage) {
219
- console.warn("No data found for query");
220
- return null;
221
- }
222
- const dataset = await Dataset.zarr(
223
- `${this._gateway}/zarr/${stage.qhash}`,
224
- this._authHeaders
225
- );
379
+ const dataset = await this.query(query);
226
380
  return dataset;
227
381
  }
382
+
383
+ /**
384
+ * Close the current session if one exists.
385
+ *
386
+ * @param finaliseWrite - Whether to finalise any write operations. Defaults to false.
387
+ * @returns A promise that resolves when the session is closed.
388
+ */
389
+ async closeSession(finaliseWrite = false): Promise<void> {
390
+ if (this._currentSession) {
391
+ await this._currentSession.close(finaliseWrite);
392
+ this._currentSession = null;
393
+ }
394
+ }
228
395
  }