pyetp 0.0.46__py3-none-any.whl → 0.0.47__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- energistics/__init__.py +0 -0
- energistics/etp/__init__.py +0 -0
- energistics/etp/v12/__init__.py +0 -0
- energistics/etp/v12/datatypes/__init__.py +25 -0
- energistics/etp/v12/datatypes/data_array_types/__init__.py +27 -0
- energistics/etp/v12/datatypes/object/__init__.py +22 -0
- energistics/etp/v12/protocol/__init__.py +0 -0
- energistics/etp/v12/protocol/core/__init__.py +19 -0
- energistics/etp/v12/protocol/data_array/__init__.py +51 -0
- energistics/etp/v12/protocol/dataspace/__init__.py +23 -0
- energistics/etp/v12/protocol/discovery/__init__.py +21 -0
- energistics/etp/v12/protocol/store/__init__.py +27 -0
- energistics/etp/v12/protocol/transaction/__init__.py +27 -0
- pyetp/_version.py +2 -2
- pyetp/client.py +218 -172
- pyetp/errors.py +39 -0
- {pyetp-0.0.46.dist-info → pyetp-0.0.47.dist-info}/METADATA +1 -1
- pyetp-0.0.47.dist-info/RECORD +39 -0
- {pyetp-0.0.46.dist-info → pyetp-0.0.47.dist-info}/WHEEL +1 -1
- {pyetp-0.0.46.dist-info → pyetp-0.0.47.dist-info}/top_level.txt +2 -0
- rddms_io/__init__.py +0 -0
- rddms_io/client.py +1234 -0
- rddms_io/data_types.py +11 -0
- resqml_objects/v201/generated.py +44 -2
- resqml_objects/v201/utils.py +38 -0
- pyetp-0.0.46.dist-info/RECORD +0 -22
- {pyetp-0.0.46.dist-info → pyetp-0.0.47.dist-info}/licenses/LICENSE.md +0 -0
rddms_io/client.py
ADDED
|
@@ -0,0 +1,1234 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import datetime
|
|
3
|
+
import logging
|
|
4
|
+
import typing
|
|
5
|
+
import uuid
|
|
6
|
+
from collections.abc import AsyncGenerator, Sequence
|
|
7
|
+
from types import TracebackType
|
|
8
|
+
|
|
9
|
+
import numpy.typing as npt
|
|
10
|
+
from pydantic import SecretStr
|
|
11
|
+
|
|
12
|
+
import resqml_objects.v201 as ro
|
|
13
|
+
from energistics.etp.v12.datatypes import ArrayOfString, DataValue, Uuid
|
|
14
|
+
from energistics.etp.v12.datatypes.data_array_types import (
|
|
15
|
+
DataArrayIdentifier,
|
|
16
|
+
DataArrayMetadata,
|
|
17
|
+
)
|
|
18
|
+
from energistics.etp.v12.datatypes.object import (
|
|
19
|
+
ContextInfo,
|
|
20
|
+
ContextScopeKind,
|
|
21
|
+
DataObject,
|
|
22
|
+
Dataspace,
|
|
23
|
+
RelationshipKind,
|
|
24
|
+
Resource,
|
|
25
|
+
)
|
|
26
|
+
from energistics.etp.v12.protocol.data_array import (
|
|
27
|
+
GetDataArrayMetadata,
|
|
28
|
+
GetDataArrayMetadataResponse,
|
|
29
|
+
)
|
|
30
|
+
from energistics.etp.v12.protocol.dataspace import (
|
|
31
|
+
DeleteDataspaces,
|
|
32
|
+
DeleteDataspacesResponse,
|
|
33
|
+
GetDataspaces,
|
|
34
|
+
GetDataspacesResponse,
|
|
35
|
+
PutDataspaces,
|
|
36
|
+
PutDataspacesResponse,
|
|
37
|
+
)
|
|
38
|
+
from energistics.etp.v12.protocol.discovery import (
|
|
39
|
+
GetResources,
|
|
40
|
+
GetResourcesEdgesResponse,
|
|
41
|
+
GetResourcesResponse,
|
|
42
|
+
)
|
|
43
|
+
from energistics.etp.v12.protocol.store import (
|
|
44
|
+
DeleteDataObjects,
|
|
45
|
+
DeleteDataObjectsResponse,
|
|
46
|
+
GetDataObjects,
|
|
47
|
+
GetDataObjectsResponse,
|
|
48
|
+
PutDataObjects,
|
|
49
|
+
PutDataObjectsResponse,
|
|
50
|
+
)
|
|
51
|
+
from energistics.etp.v12.protocol.transaction import (
|
|
52
|
+
CommitTransaction,
|
|
53
|
+
RollbackTransaction,
|
|
54
|
+
RollbackTransactionResponse,
|
|
55
|
+
StartTransaction,
|
|
56
|
+
StartTransactionResponse,
|
|
57
|
+
)
|
|
58
|
+
from pyetp import utils_arrays
|
|
59
|
+
from pyetp.client import ETPClient, ETPError, etp_connect, timeout_intervals
|
|
60
|
+
from pyetp.errors import (
|
|
61
|
+
ETPTransactionFailure,
|
|
62
|
+
parse_and_raise_response_errors,
|
|
63
|
+
)
|
|
64
|
+
from pyetp.uri import DataObjectURI, DataspaceURI
|
|
65
|
+
from rddms_io.data_types import LinkedObjects
|
|
66
|
+
from resqml_objects import parse_resqml_v201_object, serialize_resqml_v201_object
|
|
67
|
+
from resqml_objects.v201.utils import find_hdf5_datasets
|
|
68
|
+
|
|
69
|
+
logger = logging.getLogger(__name__)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class RDDMSClient:
|
|
73
|
+
"""
|
|
74
|
+
Client using ETP to communicate with an RDDMS (Reservoir Domain Data
|
|
75
|
+
Management Services) server. It is specifically
|
|
76
|
+
tailored towards the OSDU open-etp-server (see:
|
|
77
|
+
https://community.opengroup.org/osdu/platform/domain-data-mgmt-services/reservoir/open-etp-server)
|
|
78
|
+
and made with the intention to make it easier to interact with RDDMS by
|
|
79
|
+
exposing ergonomic user-facing functions.
|
|
80
|
+
|
|
81
|
+
The client is meant to be set up via :func:`rddms_connect`.
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
etp_client: ETPClient
|
|
86
|
+
An instance of `ETPClient` from `pyetp`.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def __init__(self, etp_client: ETPClient) -> None:
|
|
90
|
+
self.etp_client = etp_client
|
|
91
|
+
|
|
92
|
+
async def close(self) -> None:
|
|
93
|
+
"""
|
|
94
|
+
Method used for manual closing of the ETP-connection when the client
|
|
95
|
+
has been set up outside a context manager. For example, if the client
|
|
96
|
+
has been made via an `await`-statement then this method should be used
|
|
97
|
+
to stop the connection.
|
|
98
|
+
|
|
99
|
+
>>> rddms_client = await rddms_connect(...)
|
|
100
|
+
>>> ...
|
|
101
|
+
>>> await rddms_client.close()
|
|
102
|
+
"""
|
|
103
|
+
await self.etp_client.close()
|
|
104
|
+
|
|
105
|
+
async def list_dataspaces(
|
|
106
|
+
self, store_last_write_filter: datetime.datetime | int | None = None
|
|
107
|
+
) -> list[Dataspace]:
|
|
108
|
+
"""
|
|
109
|
+
Method used to list all dataspaces on the ETP-server.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
store_last_write_filter: datetime.datetime | int | None
|
|
114
|
+
A parameter that can be used to limit the results to only include
|
|
115
|
+
dataspaces that were created after the time specified in the
|
|
116
|
+
filter. The default is `None`, meaning all dataspaces will be
|
|
117
|
+
included.
|
|
118
|
+
|
|
119
|
+
Returns
|
|
120
|
+
-------
|
|
121
|
+
list[Dataspace]
|
|
122
|
+
A list of ETP `Dataspace`-data objects. See section 23.43.10 of the
|
|
123
|
+
ETP v1.2 standards documentation for an accurate description of the
|
|
124
|
+
different fields.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
if isinstance(store_last_write_filter, datetime.datetime):
|
|
128
|
+
# Convert `datetime`-object to a microsecond resolution timestamp.
|
|
129
|
+
store_last_write_filter = int(store_last_write_filter.timestamp() * 1e6)
|
|
130
|
+
|
|
131
|
+
responses = await self.etp_client.send(
|
|
132
|
+
GetDataspaces(store_last_write_filter=store_last_write_filter)
|
|
133
|
+
)
|
|
134
|
+
parse_and_raise_response_errors(
|
|
135
|
+
responses,
|
|
136
|
+
GetDataspacesResponse,
|
|
137
|
+
"RDDMSClient.list_dataspaces",
|
|
138
|
+
)
|
|
139
|
+
dataspaces = [ds for response in responses for ds in response.dataspaces]
|
|
140
|
+
return dataspaces
|
|
141
|
+
|
|
142
|
+
async def delete_dataspace(self, dataspace_uri: DataspaceURI | str) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Method deleting a dataspace.
|
|
145
|
+
|
|
146
|
+
Parameters
|
|
147
|
+
----------
|
|
148
|
+
dataspace_uri: str | DataspaceURI
|
|
149
|
+
The ETP dataspace uri, or path, for the dataspace to delete. If it
|
|
150
|
+
is a dataspace path (on the form `'foo/bar'`) it will be converted
|
|
151
|
+
to the dataspace uri `"eml:///dataspace('foo/bar')"`.
|
|
152
|
+
"""
|
|
153
|
+
dataspace_uri = str(DataspaceURI.from_any(dataspace_uri))
|
|
154
|
+
responses = await self.etp_client.send(
|
|
155
|
+
DeleteDataspaces(uris={dataspace_uri: dataspace_uri})
|
|
156
|
+
)
|
|
157
|
+
parse_and_raise_response_errors(
|
|
158
|
+
responses,
|
|
159
|
+
DeleteDataspacesResponse,
|
|
160
|
+
"RDDMSClient.delete_dataspace",
|
|
161
|
+
)
|
|
162
|
+
assert any([dataspace_uri in response.success for response in responses])
|
|
163
|
+
|
|
164
|
+
async def create_dataspace(
|
|
165
|
+
self,
|
|
166
|
+
dataspace_uri: str | DataspaceURI,
|
|
167
|
+
legal_tags: list[str] = [],
|
|
168
|
+
other_relevant_data_countries: list[str] = [],
|
|
169
|
+
owners: list[str] = [],
|
|
170
|
+
viewers: list[str] = [],
|
|
171
|
+
) -> None:
|
|
172
|
+
"""
|
|
173
|
+
Method creating a new dataspace on the ETP server. This function is
|
|
174
|
+
limited to creating a single dataspace with optional access-control
|
|
175
|
+
list (ACL) information.
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
dataspace_uri: str | DataspaceURI
|
|
180
|
+
The ETP dataspace uri, or path, to create. If it is a dataspace
|
|
181
|
+
path (on the form `'foo/bar'`) it will be converted to the
|
|
182
|
+
dataspace uri `"eml:///dataspace('foo/bar')"`.
|
|
183
|
+
legal_tags: list[str]
|
|
184
|
+
A list of legal tag strings for the ACL. The default is an empty
|
|
185
|
+
list.
|
|
186
|
+
other_relevant_data_countries: list[str]
|
|
187
|
+
A list of data countries for the ACL. The default is an empty list.
|
|
188
|
+
owners: list[str]
|
|
189
|
+
A list of owners ACL. The default is an empty list.
|
|
190
|
+
viewers: list[str]
|
|
191
|
+
A list of viewers ACL. The default is an empty list.
|
|
192
|
+
|
|
193
|
+
"""
|
|
194
|
+
dataspace_uri = DataspaceURI.from_any(dataspace_uri)
|
|
195
|
+
|
|
196
|
+
# A UTC timestamp in microseconds.
|
|
197
|
+
now = int(datetime.datetime.now(datetime.timezone.utc).timestamp() * 1e6)
|
|
198
|
+
|
|
199
|
+
def acl_parsing(acl_key: str, acl: list[str]) -> dict[str, DataValue]:
|
|
200
|
+
if not acl:
|
|
201
|
+
return {}
|
|
202
|
+
return {acl_key: DataValue(item=ArrayOfString(values=acl))}
|
|
203
|
+
|
|
204
|
+
custom_data = {
|
|
205
|
+
**acl_parsing("legaltags", legal_tags),
|
|
206
|
+
**acl_parsing("otherRelevantDataCountries", other_relevant_data_countries),
|
|
207
|
+
**acl_parsing("owners", owners),
|
|
208
|
+
**acl_parsing("viewers", viewers),
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
responses = await self.etp_client.send(
|
|
212
|
+
PutDataspaces(
|
|
213
|
+
dataspaces={
|
|
214
|
+
str(dataspace_uri): Dataspace(
|
|
215
|
+
uri=str(dataspace_uri),
|
|
216
|
+
path=dataspace_uri.dataspace,
|
|
217
|
+
store_created=now,
|
|
218
|
+
store_last_write=now,
|
|
219
|
+
custom_data=custom_data,
|
|
220
|
+
)
|
|
221
|
+
}
|
|
222
|
+
)
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
parse_and_raise_response_errors(
|
|
226
|
+
responses, PutDataspacesResponse, "RDDMSClient.create_dataspace"
|
|
227
|
+
)
|
|
228
|
+
assert any([str(dataspace_uri) in response.success for response in responses])
|
|
229
|
+
|
|
230
|
+
async def start_transaction(
|
|
231
|
+
self,
|
|
232
|
+
dataspace_uri: str | DataspaceURI,
|
|
233
|
+
read_only: bool,
|
|
234
|
+
debounce: bool | float = False,
|
|
235
|
+
) -> uuid.UUID:
|
|
236
|
+
"""
|
|
237
|
+
Method issuing a `StartTransaction`-ETP message, with optional
|
|
238
|
+
debouncing to retry in case the dataspace is occupied with a different
|
|
239
|
+
write transaction. Note that this method (unlike the raw ETP-message)
|
|
240
|
+
is limited to starting a transaction on a single dataspace.
|
|
241
|
+
|
|
242
|
+
Parameters
|
|
243
|
+
----------
|
|
244
|
+
dataspace_uri: str | DataspaceURI
|
|
245
|
+
A dataspace URI, either as a string or a `DataspaceURI`-object.
|
|
246
|
+
read_only: bool
|
|
247
|
+
Set to `False` for writing, and `True` for reading. It is mandatory
|
|
248
|
+
to use a transaction when writing, but optional for reading.
|
|
249
|
+
debounce: bool | float
|
|
250
|
+
Flag to toggle debouncing or maximum total debouncing time.
|
|
251
|
+
If set to `True`, the client will continue to debounce forever
|
|
252
|
+
until a transaction is started. Setting debounce to a floating
|
|
253
|
+
point number will set a maximum total debouncing time (it will
|
|
254
|
+
potentially retry several times within that window). Default is
|
|
255
|
+
`False`, i.e., no debouncing is done, and failure to start a
|
|
256
|
+
transaction will result in an error.
|
|
257
|
+
|
|
258
|
+
Returns
|
|
259
|
+
-------
|
|
260
|
+
uuid.UUID
|
|
261
|
+
A standard library UUID with the transaction uuid.
|
|
262
|
+
"""
|
|
263
|
+
|
|
264
|
+
dataspace_uri = str(DataspaceURI.from_any(dataspace_uri))
|
|
265
|
+
total_timeout = debounce
|
|
266
|
+
|
|
267
|
+
if isinstance(debounce, bool):
|
|
268
|
+
total_timeout = None
|
|
269
|
+
|
|
270
|
+
for ti in timeout_intervals(total_timeout):
|
|
271
|
+
try:
|
|
272
|
+
responses = await self.etp_client.send(
|
|
273
|
+
StartTransaction(
|
|
274
|
+
read_only=read_only, dataspace_uris=[dataspace_uri]
|
|
275
|
+
),
|
|
276
|
+
)
|
|
277
|
+
except ETPError as e:
|
|
278
|
+
# Check if the error corresponds to the ETP Error
|
|
279
|
+
# `EMAX_TRANSACTIONS_EXCEEDED`.
|
|
280
|
+
if e.code != 15:
|
|
281
|
+
raise
|
|
282
|
+
|
|
283
|
+
if debounce:
|
|
284
|
+
logger.info(f"Failed to start transaction retrying in {ti} seconds")
|
|
285
|
+
await asyncio.sleep(ti)
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
raise
|
|
289
|
+
|
|
290
|
+
parse_and_raise_response_errors(
|
|
291
|
+
responses, StartTransactionResponse, "RDDMSClient.start_transaction"
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
assert len(responses) == 1
|
|
295
|
+
response = responses[0]
|
|
296
|
+
|
|
297
|
+
if not response.successful:
|
|
298
|
+
raise ETPTransactionFailure(str(response.failure_reason))
|
|
299
|
+
|
|
300
|
+
transaction_uuid = uuid.UUID(str(response.transaction_uuid))
|
|
301
|
+
logger.debug("Started transaction with uuid: {transaction_uuid}")
|
|
302
|
+
return transaction_uuid
|
|
303
|
+
|
|
304
|
+
raise ETPTransactionFailure(
|
|
305
|
+
f"Failed to start transaction after {total_timeout} seconds",
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
async def commit_transaction(
|
|
309
|
+
self,
|
|
310
|
+
transaction_uuid: bytes | str | uuid.UUID | Uuid,
|
|
311
|
+
) -> None:
|
|
312
|
+
"""
|
|
313
|
+
Method for commiting a transaction after completing all tasks that
|
|
314
|
+
needs to be synchronized between the client and the server.
|
|
315
|
+
|
|
316
|
+
Parameters
|
|
317
|
+
----------
|
|
318
|
+
transaction_uuid: bytes | str | uuid.UUID | Uuid
|
|
319
|
+
The transaction uuid for the current transaction. This will
|
|
320
|
+
typically be the uuid from the
|
|
321
|
+
`RDDMSClient.start_transaction`-method.
|
|
322
|
+
"""
|
|
323
|
+
if isinstance(transaction_uuid, uuid.UUID):
|
|
324
|
+
transaction_uuid = Uuid(transaction_uuid.bytes)
|
|
325
|
+
elif isinstance(transaction_uuid, str | bytes):
|
|
326
|
+
transaction_uuid = Uuid(transaction_uuid)
|
|
327
|
+
|
|
328
|
+
responses = await self.etp_client.send(
|
|
329
|
+
CommitTransaction(transaction_uuid=transaction_uuid),
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
assert len(responses) == 1
|
|
333
|
+
response = responses[0]
|
|
334
|
+
|
|
335
|
+
if not response.successful:
|
|
336
|
+
raise ETPTransactionFailure(str(response))
|
|
337
|
+
|
|
338
|
+
logger.debug("Commited transaction with uuid: {transaction_uuid}")
|
|
339
|
+
|
|
340
|
+
async def rollback_transaction(
|
|
341
|
+
self, transaction_uuid: bytes | str | uuid.UUID | Uuid
|
|
342
|
+
) -> None:
|
|
343
|
+
"""
|
|
344
|
+
Method for cancelling a running transaction. This will tell the server
|
|
345
|
+
that it should disregard any changes incurred by the current
|
|
346
|
+
transaction.
|
|
347
|
+
|
|
348
|
+
Parameters
|
|
349
|
+
----------
|
|
350
|
+
transaction_uuid: bytes | str | uuid.UUID | Uuid
|
|
351
|
+
The transaction uuid for the current transaction. This will
|
|
352
|
+
typically be the uuid from the
|
|
353
|
+
`RDDMSClient.start_transaction`-method.
|
|
354
|
+
"""
|
|
355
|
+
if isinstance(transaction_uuid, uuid.UUID | str):
|
|
356
|
+
transaction_uuid = Uuid(str(transaction_uuid).encode())
|
|
357
|
+
elif isinstance(transaction_uuid, bytes):
|
|
358
|
+
transaction_uuid = Uuid(transaction_uuid)
|
|
359
|
+
|
|
360
|
+
response = await self.etp_client.send(
|
|
361
|
+
RollbackTransaction(transaction_uuid=transaction_uuid)
|
|
362
|
+
)
|
|
363
|
+
parse_and_raise_response_errors(
|
|
364
|
+
[response], RollbackTransactionResponse, "RDDMSClient.rollback_transaction"
|
|
365
|
+
)
|
|
366
|
+
if not response.successful:
|
|
367
|
+
raise ETPTransactionFailure(str(response))
|
|
368
|
+
|
|
369
|
+
async def list_objects_under_dataspace(
|
|
370
|
+
self,
|
|
371
|
+
dataspace_uri: DataspaceURI | str,
|
|
372
|
+
data_object_types: list[str | typing.Type[ro.AbstractCitedDataObject]] = [],
|
|
373
|
+
count_objects: bool = True,
|
|
374
|
+
store_last_write_filter: int | None = None,
|
|
375
|
+
) -> list[Resource]:
|
|
376
|
+
"""
|
|
377
|
+
This method will list all objects under a given dataspace.
|
|
378
|
+
|
|
379
|
+
Parameters
|
|
380
|
+
----------
|
|
381
|
+
dataspace_uri: DataspaceURI | str
|
|
382
|
+
The uri of the dataspace to list objects.
|
|
383
|
+
data_object_types: list[str | typing.Type[ro.AbstractCitedDataObject]]
|
|
384
|
+
Object types to look for. This can either be a list of strings,
|
|
385
|
+
e.g., `["eml20.*", "resqml20.obj_Grid2dRepresentation"]` to query
|
|
386
|
+
all Energistic Common version 2.0-objects and
|
|
387
|
+
`obj_Grid2dRepresentation`-objects from RESQML v2.0.1, or it can be
|
|
388
|
+
a list of objects from `resqml_objects.v201`, e.g.,
|
|
389
|
+
`[ro.obj_Grid2dRepresentation, ro.obj_LocalDepth3dCrs]`. Default is
|
|
390
|
+
`[]`, i.e., an empty list which means that all data object types
|
|
391
|
+
will be returned.
|
|
392
|
+
count_objects: bool
|
|
393
|
+
"""
|
|
394
|
+
dataspace_uri = str(DataspaceURI.from_any(dataspace_uri))
|
|
395
|
+
data_object_types = [
|
|
396
|
+
dot if isinstance(dot, str) else dot.get_qualified_type()
|
|
397
|
+
for dot in data_object_types
|
|
398
|
+
]
|
|
399
|
+
|
|
400
|
+
gr = GetResources(
|
|
401
|
+
context=ContextInfo(
|
|
402
|
+
uri=dataspace_uri,
|
|
403
|
+
depth=1, # Ignored when `scope="self"`.
|
|
404
|
+
data_object_types=data_object_types,
|
|
405
|
+
# TODO: Check if `navigable_edges` give any different results.
|
|
406
|
+
navigable_edges="Primary",
|
|
407
|
+
),
|
|
408
|
+
scope="self",
|
|
409
|
+
count_objects=count_objects,
|
|
410
|
+
store_last_write_filter=store_last_write_filter,
|
|
411
|
+
# Use the `list_linked_objects`-method below to see edges.
|
|
412
|
+
include_edges=False,
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
responses = await self.etp_client.send(gr)
|
|
416
|
+
|
|
417
|
+
parse_and_raise_response_errors(
|
|
418
|
+
responses, GetResourcesResponse, "RDDMSClient.list_objects_under_dataspace"
|
|
419
|
+
)
|
|
420
|
+
return [resource for response in responses for resource in response.resources]
|
|
421
|
+
|
|
422
|
+
async def list_linked_objects(
|
|
423
|
+
self,
|
|
424
|
+
start_uri: DataObjectURI | str,
|
|
425
|
+
data_object_types: list[str | typing.Type[ro.AbstractCitedDataObject]] = [],
|
|
426
|
+
store_last_write_filter: datetime.datetime | int | None = None,
|
|
427
|
+
depth: int = 1,
|
|
428
|
+
) -> LinkedObjects:
|
|
429
|
+
"""
|
|
430
|
+
Method listing all objects that are linked to the provided object uri.
|
|
431
|
+
That is, starting from the object indexed by the uri `start_uri` it
|
|
432
|
+
finds all objects (sources) that links to it, and all objects (targets)
|
|
433
|
+
it links to.
|
|
434
|
+
|
|
435
|
+
Parameters
|
|
436
|
+
----------
|
|
437
|
+
start_uri: DataObjectURI | str
|
|
438
|
+
An ETP data object uri to start the query from.
|
|
439
|
+
data_object_types: list[str | typing.Type[ro.AbstractCitedDataObject]]
|
|
440
|
+
A filter to limit which types of objects to include in the results.
|
|
441
|
+
As a string it is on the form `eml20.obj_EpcExternalPartReference`
|
|
442
|
+
for a specific object, or `eml20.*` for all Energistics Common
|
|
443
|
+
objects. For the RESQML v2.0.1 objects it is similarlarly
|
|
444
|
+
`resqml20.*`, or a specific type instead of the wildcard `*`. This
|
|
445
|
+
can also be classes from `resqml_objects.v201`, in which case the
|
|
446
|
+
filter will be constructed. Default is `[]`, meaning no filter is
|
|
447
|
+
applied.
|
|
448
|
+
store_last_write_filter: datetime.datetime | int | None
|
|
449
|
+
Filter to only include objects that are written after the provided
|
|
450
|
+
datetime or timestamp. Default is `None`, meaning no filter is
|
|
451
|
+
applied. Note that the timestamp should be in microsecond
|
|
452
|
+
resolution.
|
|
453
|
+
depth: int
|
|
454
|
+
The number of links to return. Setting `depth = 1` will only return
|
|
455
|
+
targets and sources that are directly linked to the start object.
|
|
456
|
+
With `depth = 2` we get links to objects that linkes to the targets
|
|
457
|
+
and sources of the start object. Default is `1`.
|
|
458
|
+
|
|
459
|
+
Returns
|
|
460
|
+
-------
|
|
461
|
+
LinkedObjects
|
|
462
|
+
A container (`NamedTuple`) with resources and edges for the sources
|
|
463
|
+
and targets of the start-object.
|
|
464
|
+
"""
|
|
465
|
+
data_object_types = [
|
|
466
|
+
dot if isinstance(dot, str) else dot.get_qualified_type()
|
|
467
|
+
for dot in data_object_types
|
|
468
|
+
]
|
|
469
|
+
if isinstance(store_last_write_filter, datetime.datetime):
|
|
470
|
+
# Convert `datetime`-object to a microsecond resolution timestamp.
|
|
471
|
+
store_last_write_filter = int(store_last_write_filter.timestamp() * 1e6)
|
|
472
|
+
|
|
473
|
+
gr_sources = GetResources(
|
|
474
|
+
context=ContextInfo(
|
|
475
|
+
uri=str(start_uri),
|
|
476
|
+
depth=depth,
|
|
477
|
+
data_object_types=data_object_types,
|
|
478
|
+
navigable_edges=RelationshipKind.PRIMARY,
|
|
479
|
+
),
|
|
480
|
+
# Setting the scope to `SOURCES_OR_SELF` returns the start-object
|
|
481
|
+
# resource _and_ the edge(s) between the start-object and its
|
|
482
|
+
# sources.
|
|
483
|
+
scope=ContextScopeKind.SOURCES_OR_SELF,
|
|
484
|
+
count_objects=True,
|
|
485
|
+
store_last_write_filter=store_last_write_filter,
|
|
486
|
+
include_edges=True,
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
gr_targets = GetResources(
|
|
490
|
+
context=ContextInfo(
|
|
491
|
+
uri=str(start_uri),
|
|
492
|
+
depth=depth,
|
|
493
|
+
data_object_types=data_object_types,
|
|
494
|
+
navigable_edges=RelationshipKind.PRIMARY,
|
|
495
|
+
),
|
|
496
|
+
scope=ContextScopeKind.TARGETS_OR_SELF,
|
|
497
|
+
count_objects=True,
|
|
498
|
+
store_last_write_filter=store_last_write_filter,
|
|
499
|
+
include_edges=True,
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
task_responses = await asyncio.gather(
|
|
503
|
+
self.etp_client.send(gr_sources),
|
|
504
|
+
self.etp_client.send(gr_targets),
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
sources_responses = task_responses[0]
|
|
508
|
+
targets_responses = task_responses[1]
|
|
509
|
+
|
|
510
|
+
source_edges = [
|
|
511
|
+
e
|
|
512
|
+
for grer in filter(
|
|
513
|
+
lambda e: isinstance(e, GetResourcesEdgesResponse), sources_responses
|
|
514
|
+
)
|
|
515
|
+
for e in grer.edges
|
|
516
|
+
]
|
|
517
|
+
source_resources = [
|
|
518
|
+
r
|
|
519
|
+
for grr in filter(
|
|
520
|
+
lambda e: isinstance(e, GetResourcesResponse), sources_responses
|
|
521
|
+
)
|
|
522
|
+
for r in grr.resources
|
|
523
|
+
]
|
|
524
|
+
|
|
525
|
+
target_edges = [
|
|
526
|
+
e
|
|
527
|
+
for grer in filter(
|
|
528
|
+
lambda e: isinstance(e, GetResourcesEdgesResponse), targets_responses
|
|
529
|
+
)
|
|
530
|
+
for e in grer.edges
|
|
531
|
+
]
|
|
532
|
+
target_resources = [
|
|
533
|
+
r
|
|
534
|
+
for grr in filter(
|
|
535
|
+
lambda e: isinstance(e, GetResourcesResponse), targets_responses
|
|
536
|
+
)
|
|
537
|
+
for r in grr.resources
|
|
538
|
+
]
|
|
539
|
+
|
|
540
|
+
return LinkedObjects(
|
|
541
|
+
start_uri=start_uri,
|
|
542
|
+
source_resources=source_resources,
|
|
543
|
+
source_edges=source_edges,
|
|
544
|
+
target_resources=target_resources,
|
|
545
|
+
target_edges=target_edges,
|
|
546
|
+
)
|
|
547
|
+
|
|
548
|
+
async def list_array_metadata(
|
|
549
|
+
self,
|
|
550
|
+
ml_uris: list[str | DataObjectURI],
|
|
551
|
+
) -> dict[str, dict[str, DataArrayMetadata]]:
|
|
552
|
+
"""
|
|
553
|
+
Method used for listing array metadata for all connected arrays to the
|
|
554
|
+
provided data object uris. This method downloads the data objects from
|
|
555
|
+
the uris, and calls `RDDMSClient.list_object_array_metadata` to get the
|
|
556
|
+
actual metadata. If the objects have already been downloaded, then
|
|
557
|
+
using `RDDMSClient.list_object_array_metadata` will be more efficient.
|
|
558
|
+
|
|
559
|
+
The purpose of this method is to provide a more convenient way of
|
|
560
|
+
exploring an RDDMS server without needing to handle data objects. It is
|
|
561
|
+
recommended to use `RDDMSClient.list_object_array_metadata` if the
|
|
562
|
+
objects have already been downloaded.
|
|
563
|
+
|
|
564
|
+
Parameters
|
|
565
|
+
----------
|
|
566
|
+
ml_uris: list[str | DataObjectURI]
|
|
567
|
+
A list of ETP data object uris.
|
|
568
|
+
|
|
569
|
+
Returns
|
|
570
|
+
-------
|
|
571
|
+
dict[str, dict[str, DataArrayMetadata]]
|
|
572
|
+
A dictionary indexed by the data object uri, containing a new
|
|
573
|
+
dictionary with the path in resource as the key and the metadata
|
|
574
|
+
(the ETP datatype `DataArrayMetadata`) as the value.
|
|
575
|
+
|
|
576
|
+
See Also
|
|
577
|
+
--------
|
|
578
|
+
RDDMSClient.list_object_array_metadata
|
|
579
|
+
A similar method that fetches the metadata from the objects
|
|
580
|
+
themselves along with a dataspace uri. It is recommended to use
|
|
581
|
+
`list_object_array_metadata` if you already have the objects in
|
|
582
|
+
memory.
|
|
583
|
+
"""
|
|
584
|
+
ml_objects = await self.download_model(
|
|
585
|
+
ml_uris=ml_uris,
|
|
586
|
+
download_arrays=False,
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
if not ml_objects:
|
|
590
|
+
return {}
|
|
591
|
+
|
|
592
|
+
ml_uris = [DataObjectURI(str(uri)) for uri in ml_uris]
|
|
593
|
+
dataspace_uris = [DataspaceURI.from_any(uri) for uri in ml_uris]
|
|
594
|
+
|
|
595
|
+
array_metadata = await asyncio.gather(
|
|
596
|
+
*[
|
|
597
|
+
self.list_object_array_metadata(
|
|
598
|
+
dataspace_uri=dataspace_uri,
|
|
599
|
+
ml_objects=[ml_object],
|
|
600
|
+
)
|
|
601
|
+
for dataspace_uri, ml_object in zip(dataspace_uris, ml_objects)
|
|
602
|
+
]
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
metadata_map = {}
|
|
606
|
+
for am in array_metadata:
|
|
607
|
+
metadata_map = {**metadata_map, **am}
|
|
608
|
+
|
|
609
|
+
return metadata_map
|
|
610
|
+
|
|
611
|
+
async def list_object_array_metadata(
|
|
612
|
+
self,
|
|
613
|
+
dataspace_uri: str | DataspaceURI,
|
|
614
|
+
ml_objects: Sequence[ro.AbstractCitedDataObject],
|
|
615
|
+
) -> dict[str, dict[str, DataArrayMetadata]]:
|
|
616
|
+
"""
|
|
617
|
+
Method used for listing array metadata for all connected arrays to the
|
|
618
|
+
provided RESQML-objects. This method works by taking in a dataspace uri
|
|
619
|
+
and the objects themselves (instead of their uris) as they would need
|
|
620
|
+
to be downloaded to look up which arrays they link to.
|
|
621
|
+
|
|
622
|
+
Parameters
|
|
623
|
+
----------
|
|
624
|
+
dataspace_uri: str | DataspaceURI
|
|
625
|
+
The ETP dataspace uri where the objects are located.
|
|
626
|
+
ml_objects: Sequence[ro.AbstractCitedDataObject]
|
|
627
|
+
A list (or any sequence) of objects that links to arrays.
|
|
628
|
+
|
|
629
|
+
Returns
|
|
630
|
+
-------
|
|
631
|
+
dict[str, dict[str, DataArrayMetadata]]
|
|
632
|
+
A dictionary indexed by the data object uri, containing a new
|
|
633
|
+
dictionary with the path in resource as the key and the metadata
|
|
634
|
+
(the ETP datatype `DataArrayMetadata`) as the value.
|
|
635
|
+
|
|
636
|
+
See Also
|
|
637
|
+
--------
|
|
638
|
+
RDDMSClient.list_array_metadata
|
|
639
|
+
A similar method that looks up array metadata needing only the uris
|
|
640
|
+
of the objects.
|
|
641
|
+
"""
|
|
642
|
+
dataspace_uri = str(DataspaceURI.from_any(dataspace_uri))
|
|
643
|
+
|
|
644
|
+
ml_uris = []
|
|
645
|
+
tasks = []
|
|
646
|
+
for obj in ml_objects:
|
|
647
|
+
obj_dais = []
|
|
648
|
+
for hdf5_dataset in find_hdf5_datasets(obj):
|
|
649
|
+
path_in_resource = hdf5_dataset.path_in_hdf_file
|
|
650
|
+
epc_uri = hdf5_dataset.hdf_proxy.get_etp_data_object_uri(
|
|
651
|
+
dataspace_path_or_uri=dataspace_uri,
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
dai = DataArrayIdentifier(
|
|
655
|
+
uri=epc_uri,
|
|
656
|
+
path_in_resource=path_in_resource,
|
|
657
|
+
)
|
|
658
|
+
obj_dais.append(dai)
|
|
659
|
+
|
|
660
|
+
if obj_dais:
|
|
661
|
+
ml_uris.append(
|
|
662
|
+
obj.get_etp_data_object_uri(dataspace_path_or_uri=dataspace_uri)
|
|
663
|
+
)
|
|
664
|
+
task = self.etp_client.send(
|
|
665
|
+
GetDataArrayMetadata(
|
|
666
|
+
data_arrays={dai.path_in_resource: dai for dai in obj_dais},
|
|
667
|
+
)
|
|
668
|
+
)
|
|
669
|
+
tasks.append(task)
|
|
670
|
+
|
|
671
|
+
if not tasks:
|
|
672
|
+
logger.info("There were no arrays connected to input objects")
|
|
673
|
+
return {}
|
|
674
|
+
|
|
675
|
+
task_responses = await asyncio.gather(*tasks)
|
|
676
|
+
|
|
677
|
+
metadata_map = {}
|
|
678
|
+
for uri, tr in zip(ml_uris, task_responses):
|
|
679
|
+
parse_and_raise_response_errors(
|
|
680
|
+
tr,
|
|
681
|
+
GetDataArrayMetadataResponse,
|
|
682
|
+
"RDDMSClient.list_array_metadata",
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
pirm = {}
|
|
686
|
+
for response in tr:
|
|
687
|
+
pirm = {**pirm, **response.array_metadata}
|
|
688
|
+
|
|
689
|
+
metadata_map[uri] = pirm
|
|
690
|
+
|
|
691
|
+
return metadata_map
|
|
692
|
+
|
|
693
|
+
async def upload_array(
|
|
694
|
+
self,
|
|
695
|
+
epc_uri: str | DataObjectURI,
|
|
696
|
+
path_in_resource: str,
|
|
697
|
+
data: npt.NDArray[utils_arrays.LogicalArrayDTypes],
|
|
698
|
+
) -> None:
|
|
699
|
+
"""
|
|
700
|
+
Method used for uploading a single array to an ETP server. This method
|
|
701
|
+
will not work without the user setting up a transaction for writing to
|
|
702
|
+
the relevant dataspace. It should not be necessary for a user to call
|
|
703
|
+
this method, prefer `RDDMSClient.upload_model` instead.
|
|
704
|
+
|
|
705
|
+
Parameters
|
|
706
|
+
----------
|
|
707
|
+
epc_uri: str | DataObjectURI
|
|
708
|
+
An ETP data object uri to an `obj_EpcExternalPartReference` that is
|
|
709
|
+
connected to the object that links to the provided array.
|
|
710
|
+
path_in_resource: str
|
|
711
|
+
A key (typically a HDF5-key) that uniquely identifies the array
|
|
712
|
+
along with the `epc_uri`. This key is found in the object that
|
|
713
|
+
links to the provided array.
|
|
714
|
+
data: npt.NDArray[utils_arrays.LogicalArrayDTypes]
|
|
715
|
+
A NumPy-array with the data.
|
|
716
|
+
|
|
717
|
+
See Also
|
|
718
|
+
--------
|
|
719
|
+
RDDMSClient.upload_model
|
|
720
|
+
A higher-level method that wraps transaction handling, data object
|
|
721
|
+
uploading and array uploading in one go.
|
|
722
|
+
"""
|
|
723
|
+
await self.etp_client.upload_array(
|
|
724
|
+
epc_uri=epc_uri,
|
|
725
|
+
path_in_resource=path_in_resource,
|
|
726
|
+
data=data,
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
async def download_array(
|
|
730
|
+
self,
|
|
731
|
+
epc_uri: str | DataObjectURI,
|
|
732
|
+
path_in_resource: str,
|
|
733
|
+
) -> npt.NDArray[utils_arrays.LogicalArrayDTypes]:
|
|
734
|
+
"""
|
|
735
|
+
Method used for downloading a single array from an ETP server. It
|
|
736
|
+
should not be necessary for a user to call this method, prefer
|
|
737
|
+
`RDDMSClient.download_model` instead.
|
|
738
|
+
|
|
739
|
+
Parameters
|
|
740
|
+
----------
|
|
741
|
+
epc_uri: str | DataObjectURI
|
|
742
|
+
An ETP data object uri to an `obj_EpcExternalPartReference` that is
|
|
743
|
+
connected to the object that links to the provided array.
|
|
744
|
+
path_in_resource: str
|
|
745
|
+
A key (typically a HDF5-key) that uniquely identifies the array
|
|
746
|
+
along with the `epc_uri`. This key is found in the object that
|
|
747
|
+
links to the provided array.
|
|
748
|
+
|
|
749
|
+
Returns
|
|
750
|
+
-------
|
|
751
|
+
data: npt.NDArray[utils_arrays.LogicalArrayDTypes]
|
|
752
|
+
A NumPy-array with the data.
|
|
753
|
+
|
|
754
|
+
See Also
|
|
755
|
+
--------
|
|
756
|
+
RDDMSClient.download_model
|
|
757
|
+
A higher-level method that wraps, data object and array downloading
|
|
758
|
+
in one go.
|
|
759
|
+
"""
|
|
760
|
+
return await self.etp_client.download_array(
|
|
761
|
+
epc_uri=epc_uri,
|
|
762
|
+
path_in_resource=path_in_resource,
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
async def upload_model(
|
|
766
|
+
self,
|
|
767
|
+
dataspace_uri: str | DataspaceURI,
|
|
768
|
+
ml_objects: Sequence[ro.AbstractCitedDataObject],
|
|
769
|
+
data_arrays: typing.Mapping[
|
|
770
|
+
str, Sequence[npt.NDArray[utils_arrays.LogicalArrayDTypes]]
|
|
771
|
+
] = {},
|
|
772
|
+
handle_transaction: bool = True,
|
|
773
|
+
debounce: bool | float = False,
|
|
774
|
+
) -> list[str]:
|
|
775
|
+
"""
|
|
776
|
+
The main driver method for uploading data to an ETP server. This method
|
|
777
|
+
takes in a dataspace uri (for uploading to multiple dataspaces you need
|
|
778
|
+
to call `RDDMSClient.upload_model` multiple times), a set of
|
|
779
|
+
RESQML-objects, and a mapping of data arrays that are indexed by their
|
|
780
|
+
path in resource (which is found in the RESQML-objects as well).
|
|
781
|
+
|
|
782
|
+
Parameters
|
|
783
|
+
----------
|
|
784
|
+
dataspace_uri: str | DataspaceURI
|
|
785
|
+
An ETP dataspace uri.
|
|
786
|
+
ml_objects: Sequence[ro.AbstractCitedDataObject]
|
|
787
|
+
A sequence of RESQML v2.0.1-objects.
|
|
788
|
+
data_arrays: typing.Mapping[
|
|
789
|
+
str, Sequence[npt.NDArray[utils_arrays.LogicalArrayDTypes]]
|
|
790
|
+
]
|
|
791
|
+
A mapping, e.g., a dictionary, of data arrays where the path in
|
|
792
|
+
resources (found in the RESQML-objects) are the keys. Default is
|
|
793
|
+
`{}`, meaning that only the RESQML-objects will be uploaded.
|
|
794
|
+
handle_transaction: bool
|
|
795
|
+
A flag to toggle if `RDDMSClient.upload_model` should start and
|
|
796
|
+
commit the transaction towards the dataspace. Default is `True`,
|
|
797
|
+
and the method will ensure that the transaction handling is done
|
|
798
|
+
correctly.
|
|
799
|
+
debounce: bool | float
|
|
800
|
+
Parameter to decide if `RDDMSClient.upload_model` should retry
|
|
801
|
+
starting a transaction if it initially fails. See
|
|
802
|
+
`RDDMSClient.start_transaction` for a more in-depth explanation of
|
|
803
|
+
the parameter. Default is `False`, i.e., no debouncing will occur
|
|
804
|
+
and the method will fail if it is unable to start a transaction.
|
|
805
|
+
|
|
806
|
+
Returns
|
|
807
|
+
-------
|
|
808
|
+
list[str]
|
|
809
|
+
A list of ETP data object uris to the uploaded objects.
|
|
810
|
+
|
|
811
|
+
See Also
|
|
812
|
+
--------
|
|
813
|
+
RDDMSClient.download_model
|
|
814
|
+
RDDMSClient.start_transaction
|
|
815
|
+
"""
|
|
816
|
+
if not ml_objects:
|
|
817
|
+
return []
|
|
818
|
+
|
|
819
|
+
dataspace_uri = str(DataspaceURI.from_any(dataspace_uri))
|
|
820
|
+
|
|
821
|
+
if handle_transaction:
|
|
822
|
+
transaction_uuid = await self.start_transaction(
|
|
823
|
+
dataspace_uri=dataspace_uri,
|
|
824
|
+
read_only=False,
|
|
825
|
+
debounce=debounce,
|
|
826
|
+
)
|
|
827
|
+
|
|
828
|
+
ml_uris = await self._upload_model(
|
|
829
|
+
dataspace_uri,
|
|
830
|
+
ml_objects=ml_objects,
|
|
831
|
+
data_arrays=data_arrays,
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
if handle_transaction:
|
|
835
|
+
await self.commit_transaction(transaction_uuid=transaction_uuid)
|
|
836
|
+
|
|
837
|
+
return ml_uris
|
|
838
|
+
|
|
839
|
+
async def _upload_model(
|
|
840
|
+
self,
|
|
841
|
+
dataspace_uri: str,
|
|
842
|
+
ml_objects: Sequence[ro.AbstractCitedDataObject],
|
|
843
|
+
data_arrays: typing.Mapping[
|
|
844
|
+
str, Sequence[npt.NDArray[utils_arrays.LogicalArrayDTypes]]
|
|
845
|
+
],
|
|
846
|
+
) -> list[str]:
|
|
847
|
+
logger.debug(
|
|
848
|
+
f"Starting to upload model of {len(ml_objects)} objects and "
|
|
849
|
+
f"{len(data_arrays)} arrays to dataspace '{dataspace_uri}'"
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
# A UTC timestamp in microseconds.
|
|
853
|
+
now = int(datetime.datetime.now(datetime.timezone.utc).timestamp() * 1e6)
|
|
854
|
+
|
|
855
|
+
ml_uris = []
|
|
856
|
+
ml_hds = []
|
|
857
|
+
tasks = []
|
|
858
|
+
for obj in ml_objects:
|
|
859
|
+
# Find all `Hdf5Dataset`-objects in the object.
|
|
860
|
+
ml_hds.extend(find_hdf5_datasets(obj))
|
|
861
|
+
|
|
862
|
+
uri = obj.get_etp_data_object_uri(dataspace_uri)
|
|
863
|
+
ml_uris.append(uri)
|
|
864
|
+
obj_xml = serialize_resqml_v201_object(obj)
|
|
865
|
+
|
|
866
|
+
# Note that all objects passed in to this method is
|
|
867
|
+
# expected to contain a `citation`-field.
|
|
868
|
+
last_changed = obj.citation.last_update or obj.citation.creation
|
|
869
|
+
if not isinstance(last_changed, datetime.datetime):
|
|
870
|
+
last_changed = last_changed.to_datetime()
|
|
871
|
+
|
|
872
|
+
# Note that chunking is handled in the ETP-client if that is needed.
|
|
873
|
+
dob = DataObject(
|
|
874
|
+
format="xml",
|
|
875
|
+
data=obj_xml,
|
|
876
|
+
resource=Resource(
|
|
877
|
+
uri=uri,
|
|
878
|
+
name=obj.citation.title,
|
|
879
|
+
last_changed=int(last_changed.timestamp() * 1e6),
|
|
880
|
+
store_created=now,
|
|
881
|
+
store_last_write=now,
|
|
882
|
+
# This is only used by WITSML channel data objects.
|
|
883
|
+
active_status="Inactive",
|
|
884
|
+
# Not used in the Store-protocol.
|
|
885
|
+
source_count=None,
|
|
886
|
+
target_count=None,
|
|
887
|
+
),
|
|
888
|
+
)
|
|
889
|
+
pdo = PutDataObjects(
|
|
890
|
+
data_objects={f"{dob.resource.name} -- {dob.resource.uri}": dob},
|
|
891
|
+
)
|
|
892
|
+
|
|
893
|
+
tasks.append(self.etp_client.send(pdo))
|
|
894
|
+
|
|
895
|
+
task_responses = await asyncio.gather(*tasks)
|
|
896
|
+
responses = [
|
|
897
|
+
response for task_response in task_responses for response in task_response
|
|
898
|
+
]
|
|
899
|
+
|
|
900
|
+
parse_and_raise_response_errors(
|
|
901
|
+
responses, PutDataObjectsResponse, "RDDMSClient.upload_model"
|
|
902
|
+
)
|
|
903
|
+
|
|
904
|
+
logger.debug("Done uploading model objects. Starting on upload of arrays.")
|
|
905
|
+
|
|
906
|
+
dataspace_path = DataspaceURI(dataspace_uri).dataspace
|
|
907
|
+
|
|
908
|
+
uploaded_data_keys = []
|
|
909
|
+
tasks = []
|
|
910
|
+
for hdf5_dataset in ml_hds:
|
|
911
|
+
path_in_resource = hdf5_dataset.path_in_hdf_file
|
|
912
|
+
epc_uri = hdf5_dataset.hdf_proxy.get_etp_data_object_uri(
|
|
913
|
+
dataspace_path_or_uri=dataspace_path
|
|
914
|
+
)
|
|
915
|
+
|
|
916
|
+
data = data_arrays[path_in_resource]
|
|
917
|
+
uploaded_data_keys.append(path_in_resource)
|
|
918
|
+
|
|
919
|
+
tasks.append(
|
|
920
|
+
self.upload_array(
|
|
921
|
+
epc_uri=epc_uri,
|
|
922
|
+
path_in_resource=path_in_resource,
|
|
923
|
+
data=data,
|
|
924
|
+
)
|
|
925
|
+
)
|
|
926
|
+
|
|
927
|
+
if len(data_arrays) > len(uploaded_data_keys):
|
|
928
|
+
logger.warning(
|
|
929
|
+
"Not all arrays were uploaded. The remaining array keys "
|
|
930
|
+
f"{set(data_arrays) - set(uploaded_data_keys)} were not found in the "
|
|
931
|
+
"provided objects."
|
|
932
|
+
)
|
|
933
|
+
|
|
934
|
+
await asyncio.gather(*tasks)
|
|
935
|
+
|
|
936
|
+
logger.debug("Done uploading arrays.")
|
|
937
|
+
return ml_uris
|
|
938
|
+
|
|
939
|
+
@typing.overload
|
|
940
|
+
async def download_model(
|
|
941
|
+
self,
|
|
942
|
+
ml_uris: list[str | DataObjectURI],
|
|
943
|
+
download_arrays: typing.Literal[False],
|
|
944
|
+
) -> list[ro.AbstractCitedDataObject]: ...
|
|
945
|
+
|
|
946
|
+
@typing.overload
|
|
947
|
+
async def download_model(
|
|
948
|
+
self,
|
|
949
|
+
ml_uris: list[str | DataObjectURI],
|
|
950
|
+
download_arrays: typing.Literal[True],
|
|
951
|
+
) -> tuple[
|
|
952
|
+
list[ro.AbstractCitedDataObject],
|
|
953
|
+
dict[str, list[npt.NDArray[utils_arrays.LogicalArrayDTypes]]],
|
|
954
|
+
]: ...
|
|
955
|
+
|
|
956
|
+
async def download_model(
|
|
957
|
+
self,
|
|
958
|
+
ml_uris: list[str | DataObjectURI],
|
|
959
|
+
download_arrays: bool = False,
|
|
960
|
+
) -> (
|
|
961
|
+
tuple[
|
|
962
|
+
list[ro.AbstractCitedDataObject],
|
|
963
|
+
dict[str, list[npt.NDArray[utils_arrays.LogicalArrayDTypes]]],
|
|
964
|
+
]
|
|
965
|
+
| list[ro.AbstractCitedDataObject]
|
|
966
|
+
):
|
|
967
|
+
"""
|
|
968
|
+
Download RESQML-model from the RDDMS. A model in this sense is just a
|
|
969
|
+
grouping of RESQML-objects specified by their uris. The objects do not
|
|
970
|
+
need to be linked in any way.
|
|
971
|
+
|
|
972
|
+
Parameters
|
|
973
|
+
----------
|
|
974
|
+
ml_uris: list[str | DataObjectURI]
|
|
975
|
+
A list of ETP data object uris.
|
|
976
|
+
download_arrays: bool
|
|
977
|
+
A flag to toggle if any referenced arrays should be download
|
|
978
|
+
alongside the RESQML-objects. Setting to `True` will make the
|
|
979
|
+
function return a tuple where the first element contains a list of
|
|
980
|
+
the objects, and the second element a dictionary with the
|
|
981
|
+
`path_in_hdf_file` as the key, and the arrays as the values. If the
|
|
982
|
+
flag is set to `False` no arrays will be downloaded, and the
|
|
983
|
+
function only returns a list of the objects. Default is `False`.
|
|
984
|
+
|
|
985
|
+
Returns
|
|
986
|
+
-------
|
|
987
|
+
tuple[
|
|
988
|
+
list[ro.AbstractCitedDataObject],
|
|
989
|
+
dict[str, list[npt.NDArray[utils_arrays.LogicalArrayDTypes]]],
|
|
990
|
+
]
|
|
991
|
+
| list[ro.AbstractCitedDataObject]
|
|
992
|
+
See the `download_arrays`-argument for an explanation on which part
|
|
993
|
+
of the union is returned when.
|
|
994
|
+
"""
|
|
995
|
+
if not ml_uris:
|
|
996
|
+
return ([], {}) if download_arrays else []
|
|
997
|
+
|
|
998
|
+
tasks = []
|
|
999
|
+
for uri in ml_uris:
|
|
1000
|
+
task = self.etp_client.send(GetDataObjects(uris={str(uri): str(uri)}))
|
|
1001
|
+
tasks.append(task)
|
|
1002
|
+
|
|
1003
|
+
task_responses = await asyncio.gather(*tasks)
|
|
1004
|
+
responses = [
|
|
1005
|
+
response for task_response in task_responses for response in task_response
|
|
1006
|
+
]
|
|
1007
|
+
|
|
1008
|
+
parse_and_raise_response_errors(
|
|
1009
|
+
responses, GetDataObjectsResponse, "RDDMSClient.download_model"
|
|
1010
|
+
)
|
|
1011
|
+
|
|
1012
|
+
data_objects = [r.data_objects[str(u)] for u, r in zip(ml_uris, responses)]
|
|
1013
|
+
ml_objects = [parse_resqml_v201_object(d.data) for d in data_objects]
|
|
1014
|
+
|
|
1015
|
+
if not download_arrays:
|
|
1016
|
+
return ml_objects
|
|
1017
|
+
|
|
1018
|
+
ml_hds = [ml_hd for obj in ml_objects for ml_hd in find_hdf5_datasets(obj)]
|
|
1019
|
+
|
|
1020
|
+
if len(ml_hds) == 0:
|
|
1021
|
+
logger.info("There are no referenced arrays in the downloaded objects")
|
|
1022
|
+
|
|
1023
|
+
return ml_objects, {}
|
|
1024
|
+
|
|
1025
|
+
dataspace_path = DataspaceURI.from_any(ml_uris[0]).dataspace
|
|
1026
|
+
|
|
1027
|
+
tasks = []
|
|
1028
|
+
for hdf5_dataset in ml_hds:
|
|
1029
|
+
path_in_resource = hdf5_dataset.path_in_hdf_file
|
|
1030
|
+
epc_uri = hdf5_dataset.hdf_proxy.get_etp_data_object_uri(
|
|
1031
|
+
dataspace_path_or_uri=dataspace_path
|
|
1032
|
+
)
|
|
1033
|
+
task = self.download_array(
|
|
1034
|
+
epc_uri=epc_uri,
|
|
1035
|
+
path_in_resource=path_in_resource,
|
|
1036
|
+
)
|
|
1037
|
+
tasks.append(task)
|
|
1038
|
+
|
|
1039
|
+
arrays = await asyncio.gather(*tasks)
|
|
1040
|
+
data_arrays = {hdf.path_in_hdf_file: arr for hdf, arr in zip(ml_hds, arrays)}
|
|
1041
|
+
|
|
1042
|
+
return ml_objects, data_arrays
|
|
1043
|
+
|
|
1044
|
+
async def delete_model(
|
|
1045
|
+
self,
|
|
1046
|
+
ml_uris: list[str | DataObjectURI],
|
|
1047
|
+
prune_contained_objects: bool = False,
|
|
1048
|
+
handle_transaction: bool = True,
|
|
1049
|
+
debounce: bool | float = False,
|
|
1050
|
+
) -> None:
|
|
1051
|
+
"""
|
|
1052
|
+
Method used for deleting a set of objects on an ETP server. In order
|
|
1053
|
+
for the deletion to be successful the objects to be deleted can not
|
|
1054
|
+
leave any dangling source-objects. That is, there can be no objects
|
|
1055
|
+
left on the ETP server that references the deleted objects.
|
|
1056
|
+
|
|
1057
|
+
Parameters
|
|
1058
|
+
----------
|
|
1059
|
+
ml_uris: list[str | DataObjectURI]
|
|
1060
|
+
A list of ETP data object uris to delete.
|
|
1061
|
+
prune_contained_objects: bool
|
|
1062
|
+
See section 9.3.4 in the ETP v1.2 standards documentation for an
|
|
1063
|
+
accurate description of this parameter. Default is `False` meaning
|
|
1064
|
+
no pruning is done.
|
|
1065
|
+
handle_transaction: bool
|
|
1066
|
+
A flag to toggle if `RDDMSClient.delete_model` should start and
|
|
1067
|
+
commit the transaction towards the dataspace. Default is `True`,
|
|
1068
|
+
and the method will ensure that the transaction handling is done
|
|
1069
|
+
correctly.
|
|
1070
|
+
debounce: bool | float
|
|
1071
|
+
Parameter to decide if `RDDMSClient.delete_model` should retry
|
|
1072
|
+
starting a transaction if it initially fails. See
|
|
1073
|
+
`RDDMSClient.start_transaction` for a more in-depth explanation of
|
|
1074
|
+
the parameter. Default is `False`, i.e., no debouncing will occur
|
|
1075
|
+
and the method will fail if it is unable to start a transaction.
|
|
1076
|
+
"""
|
|
1077
|
+
if not ml_uris:
|
|
1078
|
+
return
|
|
1079
|
+
|
|
1080
|
+
uris = list(map(str, ml_uris))
|
|
1081
|
+
ddo = DeleteDataObjects(
|
|
1082
|
+
uris=dict(zip(uris, uris)),
|
|
1083
|
+
prune_contained_objects=prune_contained_objects,
|
|
1084
|
+
)
|
|
1085
|
+
|
|
1086
|
+
if handle_transaction:
|
|
1087
|
+
dataspace_uris = [str(DataspaceURI.from_any(u)) for u in ml_uris]
|
|
1088
|
+
assert all([dataspace_uris[0] == du for du in dataspace_uris])
|
|
1089
|
+
transaction_uuid = await self.start_transaction(
|
|
1090
|
+
dataspace_uri=dataspace_uris[0], read_only=False, debounce=debounce
|
|
1091
|
+
)
|
|
1092
|
+
|
|
1093
|
+
responses = await self.etp_client.send(ddo)
|
|
1094
|
+
|
|
1095
|
+
parse_and_raise_response_errors(
|
|
1096
|
+
responses,
|
|
1097
|
+
DeleteDataObjectsResponse,
|
|
1098
|
+
"RDDMSClient.delete_model",
|
|
1099
|
+
)
|
|
1100
|
+
|
|
1101
|
+
if handle_transaction:
|
|
1102
|
+
await self.commit_transaction(transaction_uuid)
|
|
1103
|
+
|
|
1104
|
+
|
|
1105
|
+
class rddms_connect:
|
|
1106
|
+
"""
|
|
1107
|
+
Connect to an RDDMS server via ETP.
|
|
1108
|
+
|
|
1109
|
+
This class can act as:
|
|
1110
|
+
|
|
1111
|
+
1. A context manager handling setup and tear-down of the connection.
|
|
1112
|
+
2. An asynchronous iterator which can be used to persistently retry to
|
|
1113
|
+
connect if the websockets connection drops.
|
|
1114
|
+
3. An awaitable connection that must be manually closed by the user.
|
|
1115
|
+
|
|
1116
|
+
See below for examples of all three cases.
|
|
1117
|
+
|
|
1118
|
+
Parameters
|
|
1119
|
+
----------
|
|
1120
|
+
uri: str
|
|
1121
|
+
The uri to the RDDMS server. This should be the uri to a websockets
|
|
1122
|
+
endpoint to an ETP server.
|
|
1123
|
+
data_partition_id: str | None
|
|
1124
|
+
The data partition id used when connecting to the OSDU open-etp-server
|
|
1125
|
+
in multi-partition mode. Default is `None`.
|
|
1126
|
+
authorization: str | SecretStr | None
|
|
1127
|
+
Bearer token used for authenticating to the RDDMS server. This token
|
|
1128
|
+
should be on the form `"Bearer 1234..."`. Default is `None`.
|
|
1129
|
+
etp_timeout: float | None
|
|
1130
|
+
The timeout in seconds for when to stop waiting for a message from the
|
|
1131
|
+
server. Setting it to `None` will persist the connection indefinetly.
|
|
1132
|
+
Default is `None`.
|
|
1133
|
+
max_message_size: float
|
|
1134
|
+
The maximum number of bytes for a single websockets message. Default is
|
|
1135
|
+
`2**20` corresponding to `1` MiB.
|
|
1136
|
+
|
|
1137
|
+
|
|
1138
|
+
Examples
|
|
1139
|
+
--------
|
|
1140
|
+
An example of connecting to an RDDMS server using :func:`rddms_connect` as a
|
|
1141
|
+
context manager is:
|
|
1142
|
+
|
|
1143
|
+
async with rddms_connect(...) as rddms_client:
|
|
1144
|
+
...
|
|
1145
|
+
|
|
1146
|
+
In this case the closing message is sent and the websockets connection is
|
|
1147
|
+
closed once the program exits the context manager.
|
|
1148
|
+
|
|
1149
|
+
|
|
1150
|
+
To persist a connection if the websockets connection is dropped (for any
|
|
1151
|
+
reason), use :func:`rddms_connect` as an asynchronous generator, viz.:
|
|
1152
|
+
|
|
1153
|
+
import websockets
|
|
1154
|
+
|
|
1155
|
+
async for rddms_client in rddms_connect(...):
|
|
1156
|
+
try:
|
|
1157
|
+
...
|
|
1158
|
+
except websockets.ConnectionClosed:
|
|
1159
|
+
continue
|
|
1160
|
+
|
|
1161
|
+
# Include `break` to avoid re-running the whole block if the
|
|
1162
|
+
# iteration runs without any errors.
|
|
1163
|
+
break
|
|
1164
|
+
|
|
1165
|
+
Note that in this case the whole program under the `try`-block is re-run
|
|
1166
|
+
from the start if the iteration completes normally, or if the websockets
|
|
1167
|
+
connection is dropped. Therefore, make sure to include a `break` at the end
|
|
1168
|
+
of the `try`-block (as in the example above).
|
|
1169
|
+
|
|
1170
|
+
|
|
1171
|
+
The third option is to set up a connection via `await` and then manually
|
|
1172
|
+
close the connection once done:
|
|
1173
|
+
|
|
1174
|
+
rddms_client = await rddms_connect(...)
|
|
1175
|
+
...
|
|
1176
|
+
await rddms_client.close()
|
|
1177
|
+
|
|
1178
|
+
See Also
|
|
1179
|
+
--------
|
|
1180
|
+
pyetp.client.etp_connect
|
|
1181
|
+
The `rddms_connect`-class is a thin wrapper around `etp_connect`.
|
|
1182
|
+
"""
|
|
1183
|
+
|
|
1184
|
+
def __init__(
|
|
1185
|
+
self,
|
|
1186
|
+
uri: str,
|
|
1187
|
+
data_partition_id: str | None = None,
|
|
1188
|
+
authorization: str | SecretStr | None = None,
|
|
1189
|
+
etp_timeout: float | None = None,
|
|
1190
|
+
max_message_size: float = 2**20,
|
|
1191
|
+
) -> None:
|
|
1192
|
+
self.uri = uri
|
|
1193
|
+
self.data_partition_id = data_partition_id
|
|
1194
|
+
|
|
1195
|
+
if isinstance(authorization, SecretStr):
|
|
1196
|
+
self.authorization = authorization
|
|
1197
|
+
else:
|
|
1198
|
+
self.authorization = SecretStr(authorization)
|
|
1199
|
+
|
|
1200
|
+
self.etp_timeout = etp_timeout
|
|
1201
|
+
self.max_message_size = max_message_size
|
|
1202
|
+
|
|
1203
|
+
def __await__(self) -> RDDMSClient:
|
|
1204
|
+
return self.__aenter__().__await__()
|
|
1205
|
+
|
|
1206
|
+
async def __aenter__(self) -> RDDMSClient:
|
|
1207
|
+
etp_client = await etp_connect(
|
|
1208
|
+
uri=self.uri,
|
|
1209
|
+
data_partition_id=self.data_partition_id,
|
|
1210
|
+
authorization=self.authorization,
|
|
1211
|
+
etp_timeout=self.etp_timeout,
|
|
1212
|
+
max_message_size=self.max_message_size,
|
|
1213
|
+
)
|
|
1214
|
+
self.rddms_client = RDDMSClient(etp_client)
|
|
1215
|
+
|
|
1216
|
+
return self.rddms_client
|
|
1217
|
+
|
|
1218
|
+
async def __aexit__(
|
|
1219
|
+
self,
|
|
1220
|
+
exc_type: typing.Type[BaseException] | None,
|
|
1221
|
+
exc_value: BaseException | None,
|
|
1222
|
+
traceback: TracebackType | None,
|
|
1223
|
+
) -> None:
|
|
1224
|
+
return await self.rddms_client.close()
|
|
1225
|
+
|
|
1226
|
+
async def __aiter__(self) -> AsyncGenerator[RDDMSClient]:
|
|
1227
|
+
async for etp_client in etp_connect(
|
|
1228
|
+
uri=self.uri,
|
|
1229
|
+
data_partition_id=self.data_partition_id,
|
|
1230
|
+
authorization=self.authorization,
|
|
1231
|
+
etp_timeout=self.etp_timeout,
|
|
1232
|
+
max_message_size=self.max_message_size,
|
|
1233
|
+
):
|
|
1234
|
+
yield RDDMSClient(etp_client)
|