pyetp 0.0.46__py3-none-any.whl → 0.0.47__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rddms_io/client.py ADDED
@@ -0,0 +1,1234 @@
1
+ import asyncio
2
+ import datetime
3
+ import logging
4
+ import typing
5
+ import uuid
6
+ from collections.abc import AsyncGenerator, Sequence
7
+ from types import TracebackType
8
+
9
+ import numpy.typing as npt
10
+ from pydantic import SecretStr
11
+
12
+ import resqml_objects.v201 as ro
13
+ from energistics.etp.v12.datatypes import ArrayOfString, DataValue, Uuid
14
+ from energistics.etp.v12.datatypes.data_array_types import (
15
+ DataArrayIdentifier,
16
+ DataArrayMetadata,
17
+ )
18
+ from energistics.etp.v12.datatypes.object import (
19
+ ContextInfo,
20
+ ContextScopeKind,
21
+ DataObject,
22
+ Dataspace,
23
+ RelationshipKind,
24
+ Resource,
25
+ )
26
+ from energistics.etp.v12.protocol.data_array import (
27
+ GetDataArrayMetadata,
28
+ GetDataArrayMetadataResponse,
29
+ )
30
+ from energistics.etp.v12.protocol.dataspace import (
31
+ DeleteDataspaces,
32
+ DeleteDataspacesResponse,
33
+ GetDataspaces,
34
+ GetDataspacesResponse,
35
+ PutDataspaces,
36
+ PutDataspacesResponse,
37
+ )
38
+ from energistics.etp.v12.protocol.discovery import (
39
+ GetResources,
40
+ GetResourcesEdgesResponse,
41
+ GetResourcesResponse,
42
+ )
43
+ from energistics.etp.v12.protocol.store import (
44
+ DeleteDataObjects,
45
+ DeleteDataObjectsResponse,
46
+ GetDataObjects,
47
+ GetDataObjectsResponse,
48
+ PutDataObjects,
49
+ PutDataObjectsResponse,
50
+ )
51
+ from energistics.etp.v12.protocol.transaction import (
52
+ CommitTransaction,
53
+ RollbackTransaction,
54
+ RollbackTransactionResponse,
55
+ StartTransaction,
56
+ StartTransactionResponse,
57
+ )
58
+ from pyetp import utils_arrays
59
+ from pyetp.client import ETPClient, ETPError, etp_connect, timeout_intervals
60
+ from pyetp.errors import (
61
+ ETPTransactionFailure,
62
+ parse_and_raise_response_errors,
63
+ )
64
+ from pyetp.uri import DataObjectURI, DataspaceURI
65
+ from rddms_io.data_types import LinkedObjects
66
+ from resqml_objects import parse_resqml_v201_object, serialize_resqml_v201_object
67
+ from resqml_objects.v201.utils import find_hdf5_datasets
68
+
69
+ logger = logging.getLogger(__name__)
70
+
71
+
72
+ class RDDMSClient:
73
+ """
74
+ Client using ETP to communicate with an RDDMS (Reservoir Domain Data
75
+ Management Services) server. It is specifically
76
+ tailored towards the OSDU open-etp-server (see:
77
+ https://community.opengroup.org/osdu/platform/domain-data-mgmt-services/reservoir/open-etp-server)
78
+ and made with the intention to make it easier to interact with RDDMS by
79
+ exposing ergonomic user-facing functions.
80
+
81
+ The client is meant to be set up via :func:`rddms_connect`.
82
+
83
+ Parameters
84
+ ----------
85
+ etp_client: ETPClient
86
+ An instance of `ETPClient` from `pyetp`.
87
+ """
88
+
89
+ def __init__(self, etp_client: ETPClient) -> None:
90
+ self.etp_client = etp_client
91
+
92
+ async def close(self) -> None:
93
+ """
94
+ Method used for manual closing of the ETP-connection when the client
95
+ has been set up outside a context manager. For example, if the client
96
+ has been made via an `await`-statement then this method should be used
97
+ to stop the connection.
98
+
99
+ >>> rddms_client = await rddms_connect(...)
100
+ >>> ...
101
+ >>> await rddms_client.close()
102
+ """
103
+ await self.etp_client.close()
104
+
105
+ async def list_dataspaces(
106
+ self, store_last_write_filter: datetime.datetime | int | None = None
107
+ ) -> list[Dataspace]:
108
+ """
109
+ Method used to list all dataspaces on the ETP-server.
110
+
111
+ Parameters
112
+ ----------
113
+ store_last_write_filter: datetime.datetime | int | None
114
+ A parameter that can be used to limit the results to only include
115
+ dataspaces that were created after the time specified in the
116
+ filter. The default is `None`, meaning all dataspaces will be
117
+ included.
118
+
119
+ Returns
120
+ -------
121
+ list[Dataspace]
122
+ A list of ETP `Dataspace`-data objects. See section 23.43.10 of the
123
+ ETP v1.2 standards documentation for an accurate description of the
124
+ different fields.
125
+ """
126
+
127
+ if isinstance(store_last_write_filter, datetime.datetime):
128
+ # Convert `datetime`-object to a microsecond resolution timestamp.
129
+ store_last_write_filter = int(store_last_write_filter.timestamp() * 1e6)
130
+
131
+ responses = await self.etp_client.send(
132
+ GetDataspaces(store_last_write_filter=store_last_write_filter)
133
+ )
134
+ parse_and_raise_response_errors(
135
+ responses,
136
+ GetDataspacesResponse,
137
+ "RDDMSClient.list_dataspaces",
138
+ )
139
+ dataspaces = [ds for response in responses for ds in response.dataspaces]
140
+ return dataspaces
141
+
142
+ async def delete_dataspace(self, dataspace_uri: DataspaceURI | str) -> None:
143
+ """
144
+ Method deleting a dataspace.
145
+
146
+ Parameters
147
+ ----------
148
+ dataspace_uri: str | DataspaceURI
149
+ The ETP dataspace uri, or path, for the dataspace to delete. If it
150
+ is a dataspace path (on the form `'foo/bar'`) it will be converted
151
+ to the dataspace uri `"eml:///dataspace('foo/bar')"`.
152
+ """
153
+ dataspace_uri = str(DataspaceURI.from_any(dataspace_uri))
154
+ responses = await self.etp_client.send(
155
+ DeleteDataspaces(uris={dataspace_uri: dataspace_uri})
156
+ )
157
+ parse_and_raise_response_errors(
158
+ responses,
159
+ DeleteDataspacesResponse,
160
+ "RDDMSClient.delete_dataspace",
161
+ )
162
+ assert any([dataspace_uri in response.success for response in responses])
163
+
164
+ async def create_dataspace(
165
+ self,
166
+ dataspace_uri: str | DataspaceURI,
167
+ legal_tags: list[str] = [],
168
+ other_relevant_data_countries: list[str] = [],
169
+ owners: list[str] = [],
170
+ viewers: list[str] = [],
171
+ ) -> None:
172
+ """
173
+ Method creating a new dataspace on the ETP server. This function is
174
+ limited to creating a single dataspace with optional access-control
175
+ list (ACL) information.
176
+
177
+ Parameters
178
+ ----------
179
+ dataspace_uri: str | DataspaceURI
180
+ The ETP dataspace uri, or path, to create. If it is a dataspace
181
+ path (on the form `'foo/bar'`) it will be converted to the
182
+ dataspace uri `"eml:///dataspace('foo/bar')"`.
183
+ legal_tags: list[str]
184
+ A list of legal tag strings for the ACL. The default is an empty
185
+ list.
186
+ other_relevant_data_countries: list[str]
187
+ A list of data countries for the ACL. The default is an empty list.
188
+ owners: list[str]
189
+ A list of owners ACL. The default is an empty list.
190
+ viewers: list[str]
191
+ A list of viewers ACL. The default is an empty list.
192
+
193
+ """
194
+ dataspace_uri = DataspaceURI.from_any(dataspace_uri)
195
+
196
+ # A UTC timestamp in microseconds.
197
+ now = int(datetime.datetime.now(datetime.timezone.utc).timestamp() * 1e6)
198
+
199
+ def acl_parsing(acl_key: str, acl: list[str]) -> dict[str, DataValue]:
200
+ if not acl:
201
+ return {}
202
+ return {acl_key: DataValue(item=ArrayOfString(values=acl))}
203
+
204
+ custom_data = {
205
+ **acl_parsing("legaltags", legal_tags),
206
+ **acl_parsing("otherRelevantDataCountries", other_relevant_data_countries),
207
+ **acl_parsing("owners", owners),
208
+ **acl_parsing("viewers", viewers),
209
+ }
210
+
211
+ responses = await self.etp_client.send(
212
+ PutDataspaces(
213
+ dataspaces={
214
+ str(dataspace_uri): Dataspace(
215
+ uri=str(dataspace_uri),
216
+ path=dataspace_uri.dataspace,
217
+ store_created=now,
218
+ store_last_write=now,
219
+ custom_data=custom_data,
220
+ )
221
+ }
222
+ )
223
+ )
224
+
225
+ parse_and_raise_response_errors(
226
+ responses, PutDataspacesResponse, "RDDMSClient.create_dataspace"
227
+ )
228
+ assert any([str(dataspace_uri) in response.success for response in responses])
229
+
230
+ async def start_transaction(
231
+ self,
232
+ dataspace_uri: str | DataspaceURI,
233
+ read_only: bool,
234
+ debounce: bool | float = False,
235
+ ) -> uuid.UUID:
236
+ """
237
+ Method issuing a `StartTransaction`-ETP message, with optional
238
+ debouncing to retry in case the dataspace is occupied with a different
239
+ write transaction. Note that this method (unlike the raw ETP-message)
240
+ is limited to starting a transaction on a single dataspace.
241
+
242
+ Parameters
243
+ ----------
244
+ dataspace_uri: str | DataspaceURI
245
+ A dataspace URI, either as a string or a `DataspaceURI`-object.
246
+ read_only: bool
247
+ Set to `False` for writing, and `True` for reading. It is mandatory
248
+ to use a transaction when writing, but optional for reading.
249
+ debounce: bool | float
250
+ Flag to toggle debouncing or maximum total debouncing time.
251
+ If set to `True`, the client will continue to debounce forever
252
+ until a transaction is started. Setting debounce to a floating
253
+ point number will set a maximum total debouncing time (it will
254
+ potentially retry several times within that window). Default is
255
+ `False`, i.e., no debouncing is done, and failure to start a
256
+ transaction will result in an error.
257
+
258
+ Returns
259
+ -------
260
+ uuid.UUID
261
+ A standard library UUID with the transaction uuid.
262
+ """
263
+
264
+ dataspace_uri = str(DataspaceURI.from_any(dataspace_uri))
265
+ total_timeout = debounce
266
+
267
+ if isinstance(debounce, bool):
268
+ total_timeout = None
269
+
270
+ for ti in timeout_intervals(total_timeout):
271
+ try:
272
+ responses = await self.etp_client.send(
273
+ StartTransaction(
274
+ read_only=read_only, dataspace_uris=[dataspace_uri]
275
+ ),
276
+ )
277
+ except ETPError as e:
278
+ # Check if the error corresponds to the ETP Error
279
+ # `EMAX_TRANSACTIONS_EXCEEDED`.
280
+ if e.code != 15:
281
+ raise
282
+
283
+ if debounce:
284
+ logger.info(f"Failed to start transaction retrying in {ti} seconds")
285
+ await asyncio.sleep(ti)
286
+ continue
287
+
288
+ raise
289
+
290
+ parse_and_raise_response_errors(
291
+ responses, StartTransactionResponse, "RDDMSClient.start_transaction"
292
+ )
293
+
294
+ assert len(responses) == 1
295
+ response = responses[0]
296
+
297
+ if not response.successful:
298
+ raise ETPTransactionFailure(str(response.failure_reason))
299
+
300
+ transaction_uuid = uuid.UUID(str(response.transaction_uuid))
301
+ logger.debug("Started transaction with uuid: {transaction_uuid}")
302
+ return transaction_uuid
303
+
304
+ raise ETPTransactionFailure(
305
+ f"Failed to start transaction after {total_timeout} seconds",
306
+ )
307
+
308
+ async def commit_transaction(
309
+ self,
310
+ transaction_uuid: bytes | str | uuid.UUID | Uuid,
311
+ ) -> None:
312
+ """
313
+ Method for commiting a transaction after completing all tasks that
314
+ needs to be synchronized between the client and the server.
315
+
316
+ Parameters
317
+ ----------
318
+ transaction_uuid: bytes | str | uuid.UUID | Uuid
319
+ The transaction uuid for the current transaction. This will
320
+ typically be the uuid from the
321
+ `RDDMSClient.start_transaction`-method.
322
+ """
323
+ if isinstance(transaction_uuid, uuid.UUID):
324
+ transaction_uuid = Uuid(transaction_uuid.bytes)
325
+ elif isinstance(transaction_uuid, str | bytes):
326
+ transaction_uuid = Uuid(transaction_uuid)
327
+
328
+ responses = await self.etp_client.send(
329
+ CommitTransaction(transaction_uuid=transaction_uuid),
330
+ )
331
+
332
+ assert len(responses) == 1
333
+ response = responses[0]
334
+
335
+ if not response.successful:
336
+ raise ETPTransactionFailure(str(response))
337
+
338
+ logger.debug("Commited transaction with uuid: {transaction_uuid}")
339
+
340
+ async def rollback_transaction(
341
+ self, transaction_uuid: bytes | str | uuid.UUID | Uuid
342
+ ) -> None:
343
+ """
344
+ Method for cancelling a running transaction. This will tell the server
345
+ that it should disregard any changes incurred by the current
346
+ transaction.
347
+
348
+ Parameters
349
+ ----------
350
+ transaction_uuid: bytes | str | uuid.UUID | Uuid
351
+ The transaction uuid for the current transaction. This will
352
+ typically be the uuid from the
353
+ `RDDMSClient.start_transaction`-method.
354
+ """
355
+ if isinstance(transaction_uuid, uuid.UUID | str):
356
+ transaction_uuid = Uuid(str(transaction_uuid).encode())
357
+ elif isinstance(transaction_uuid, bytes):
358
+ transaction_uuid = Uuid(transaction_uuid)
359
+
360
+ response = await self.etp_client.send(
361
+ RollbackTransaction(transaction_uuid=transaction_uuid)
362
+ )
363
+ parse_and_raise_response_errors(
364
+ [response], RollbackTransactionResponse, "RDDMSClient.rollback_transaction"
365
+ )
366
+ if not response.successful:
367
+ raise ETPTransactionFailure(str(response))
368
+
369
+ async def list_objects_under_dataspace(
370
+ self,
371
+ dataspace_uri: DataspaceURI | str,
372
+ data_object_types: list[str | typing.Type[ro.AbstractCitedDataObject]] = [],
373
+ count_objects: bool = True,
374
+ store_last_write_filter: int | None = None,
375
+ ) -> list[Resource]:
376
+ """
377
+ This method will list all objects under a given dataspace.
378
+
379
+ Parameters
380
+ ----------
381
+ dataspace_uri: DataspaceURI | str
382
+ The uri of the dataspace to list objects.
383
+ data_object_types: list[str | typing.Type[ro.AbstractCitedDataObject]]
384
+ Object types to look for. This can either be a list of strings,
385
+ e.g., `["eml20.*", "resqml20.obj_Grid2dRepresentation"]` to query
386
+ all Energistic Common version 2.0-objects and
387
+ `obj_Grid2dRepresentation`-objects from RESQML v2.0.1, or it can be
388
+ a list of objects from `resqml_objects.v201`, e.g.,
389
+ `[ro.obj_Grid2dRepresentation, ro.obj_LocalDepth3dCrs]`. Default is
390
+ `[]`, i.e., an empty list which means that all data object types
391
+ will be returned.
392
+ count_objects: bool
393
+ """
394
+ dataspace_uri = str(DataspaceURI.from_any(dataspace_uri))
395
+ data_object_types = [
396
+ dot if isinstance(dot, str) else dot.get_qualified_type()
397
+ for dot in data_object_types
398
+ ]
399
+
400
+ gr = GetResources(
401
+ context=ContextInfo(
402
+ uri=dataspace_uri,
403
+ depth=1, # Ignored when `scope="self"`.
404
+ data_object_types=data_object_types,
405
+ # TODO: Check if `navigable_edges` give any different results.
406
+ navigable_edges="Primary",
407
+ ),
408
+ scope="self",
409
+ count_objects=count_objects,
410
+ store_last_write_filter=store_last_write_filter,
411
+ # Use the `list_linked_objects`-method below to see edges.
412
+ include_edges=False,
413
+ )
414
+
415
+ responses = await self.etp_client.send(gr)
416
+
417
+ parse_and_raise_response_errors(
418
+ responses, GetResourcesResponse, "RDDMSClient.list_objects_under_dataspace"
419
+ )
420
+ return [resource for response in responses for resource in response.resources]
421
+
422
+ async def list_linked_objects(
423
+ self,
424
+ start_uri: DataObjectURI | str,
425
+ data_object_types: list[str | typing.Type[ro.AbstractCitedDataObject]] = [],
426
+ store_last_write_filter: datetime.datetime | int | None = None,
427
+ depth: int = 1,
428
+ ) -> LinkedObjects:
429
+ """
430
+ Method listing all objects that are linked to the provided object uri.
431
+ That is, starting from the object indexed by the uri `start_uri` it
432
+ finds all objects (sources) that links to it, and all objects (targets)
433
+ it links to.
434
+
435
+ Parameters
436
+ ----------
437
+ start_uri: DataObjectURI | str
438
+ An ETP data object uri to start the query from.
439
+ data_object_types: list[str | typing.Type[ro.AbstractCitedDataObject]]
440
+ A filter to limit which types of objects to include in the results.
441
+ As a string it is on the form `eml20.obj_EpcExternalPartReference`
442
+ for a specific object, or `eml20.*` for all Energistics Common
443
+ objects. For the RESQML v2.0.1 objects it is similarlarly
444
+ `resqml20.*`, or a specific type instead of the wildcard `*`. This
445
+ can also be classes from `resqml_objects.v201`, in which case the
446
+ filter will be constructed. Default is `[]`, meaning no filter is
447
+ applied.
448
+ store_last_write_filter: datetime.datetime | int | None
449
+ Filter to only include objects that are written after the provided
450
+ datetime or timestamp. Default is `None`, meaning no filter is
451
+ applied. Note that the timestamp should be in microsecond
452
+ resolution.
453
+ depth: int
454
+ The number of links to return. Setting `depth = 1` will only return
455
+ targets and sources that are directly linked to the start object.
456
+ With `depth = 2` we get links to objects that linkes to the targets
457
+ and sources of the start object. Default is `1`.
458
+
459
+ Returns
460
+ -------
461
+ LinkedObjects
462
+ A container (`NamedTuple`) with resources and edges for the sources
463
+ and targets of the start-object.
464
+ """
465
+ data_object_types = [
466
+ dot if isinstance(dot, str) else dot.get_qualified_type()
467
+ for dot in data_object_types
468
+ ]
469
+ if isinstance(store_last_write_filter, datetime.datetime):
470
+ # Convert `datetime`-object to a microsecond resolution timestamp.
471
+ store_last_write_filter = int(store_last_write_filter.timestamp() * 1e6)
472
+
473
+ gr_sources = GetResources(
474
+ context=ContextInfo(
475
+ uri=str(start_uri),
476
+ depth=depth,
477
+ data_object_types=data_object_types,
478
+ navigable_edges=RelationshipKind.PRIMARY,
479
+ ),
480
+ # Setting the scope to `SOURCES_OR_SELF` returns the start-object
481
+ # resource _and_ the edge(s) between the start-object and its
482
+ # sources.
483
+ scope=ContextScopeKind.SOURCES_OR_SELF,
484
+ count_objects=True,
485
+ store_last_write_filter=store_last_write_filter,
486
+ include_edges=True,
487
+ )
488
+
489
+ gr_targets = GetResources(
490
+ context=ContextInfo(
491
+ uri=str(start_uri),
492
+ depth=depth,
493
+ data_object_types=data_object_types,
494
+ navigable_edges=RelationshipKind.PRIMARY,
495
+ ),
496
+ scope=ContextScopeKind.TARGETS_OR_SELF,
497
+ count_objects=True,
498
+ store_last_write_filter=store_last_write_filter,
499
+ include_edges=True,
500
+ )
501
+
502
+ task_responses = await asyncio.gather(
503
+ self.etp_client.send(gr_sources),
504
+ self.etp_client.send(gr_targets),
505
+ )
506
+
507
+ sources_responses = task_responses[0]
508
+ targets_responses = task_responses[1]
509
+
510
+ source_edges = [
511
+ e
512
+ for grer in filter(
513
+ lambda e: isinstance(e, GetResourcesEdgesResponse), sources_responses
514
+ )
515
+ for e in grer.edges
516
+ ]
517
+ source_resources = [
518
+ r
519
+ for grr in filter(
520
+ lambda e: isinstance(e, GetResourcesResponse), sources_responses
521
+ )
522
+ for r in grr.resources
523
+ ]
524
+
525
+ target_edges = [
526
+ e
527
+ for grer in filter(
528
+ lambda e: isinstance(e, GetResourcesEdgesResponse), targets_responses
529
+ )
530
+ for e in grer.edges
531
+ ]
532
+ target_resources = [
533
+ r
534
+ for grr in filter(
535
+ lambda e: isinstance(e, GetResourcesResponse), targets_responses
536
+ )
537
+ for r in grr.resources
538
+ ]
539
+
540
+ return LinkedObjects(
541
+ start_uri=start_uri,
542
+ source_resources=source_resources,
543
+ source_edges=source_edges,
544
+ target_resources=target_resources,
545
+ target_edges=target_edges,
546
+ )
547
+
548
+ async def list_array_metadata(
549
+ self,
550
+ ml_uris: list[str | DataObjectURI],
551
+ ) -> dict[str, dict[str, DataArrayMetadata]]:
552
+ """
553
+ Method used for listing array metadata for all connected arrays to the
554
+ provided data object uris. This method downloads the data objects from
555
+ the uris, and calls `RDDMSClient.list_object_array_metadata` to get the
556
+ actual metadata. If the objects have already been downloaded, then
557
+ using `RDDMSClient.list_object_array_metadata` will be more efficient.
558
+
559
+ The purpose of this method is to provide a more convenient way of
560
+ exploring an RDDMS server without needing to handle data objects. It is
561
+ recommended to use `RDDMSClient.list_object_array_metadata` if the
562
+ objects have already been downloaded.
563
+
564
+ Parameters
565
+ ----------
566
+ ml_uris: list[str | DataObjectURI]
567
+ A list of ETP data object uris.
568
+
569
+ Returns
570
+ -------
571
+ dict[str, dict[str, DataArrayMetadata]]
572
+ A dictionary indexed by the data object uri, containing a new
573
+ dictionary with the path in resource as the key and the metadata
574
+ (the ETP datatype `DataArrayMetadata`) as the value.
575
+
576
+ See Also
577
+ --------
578
+ RDDMSClient.list_object_array_metadata
579
+ A similar method that fetches the metadata from the objects
580
+ themselves along with a dataspace uri. It is recommended to use
581
+ `list_object_array_metadata` if you already have the objects in
582
+ memory.
583
+ """
584
+ ml_objects = await self.download_model(
585
+ ml_uris=ml_uris,
586
+ download_arrays=False,
587
+ )
588
+
589
+ if not ml_objects:
590
+ return {}
591
+
592
+ ml_uris = [DataObjectURI(str(uri)) for uri in ml_uris]
593
+ dataspace_uris = [DataspaceURI.from_any(uri) for uri in ml_uris]
594
+
595
+ array_metadata = await asyncio.gather(
596
+ *[
597
+ self.list_object_array_metadata(
598
+ dataspace_uri=dataspace_uri,
599
+ ml_objects=[ml_object],
600
+ )
601
+ for dataspace_uri, ml_object in zip(dataspace_uris, ml_objects)
602
+ ]
603
+ )
604
+
605
+ metadata_map = {}
606
+ for am in array_metadata:
607
+ metadata_map = {**metadata_map, **am}
608
+
609
+ return metadata_map
610
+
611
+ async def list_object_array_metadata(
612
+ self,
613
+ dataspace_uri: str | DataspaceURI,
614
+ ml_objects: Sequence[ro.AbstractCitedDataObject],
615
+ ) -> dict[str, dict[str, DataArrayMetadata]]:
616
+ """
617
+ Method used for listing array metadata for all connected arrays to the
618
+ provided RESQML-objects. This method works by taking in a dataspace uri
619
+ and the objects themselves (instead of their uris) as they would need
620
+ to be downloaded to look up which arrays they link to.
621
+
622
+ Parameters
623
+ ----------
624
+ dataspace_uri: str | DataspaceURI
625
+ The ETP dataspace uri where the objects are located.
626
+ ml_objects: Sequence[ro.AbstractCitedDataObject]
627
+ A list (or any sequence) of objects that links to arrays.
628
+
629
+ Returns
630
+ -------
631
+ dict[str, dict[str, DataArrayMetadata]]
632
+ A dictionary indexed by the data object uri, containing a new
633
+ dictionary with the path in resource as the key and the metadata
634
+ (the ETP datatype `DataArrayMetadata`) as the value.
635
+
636
+ See Also
637
+ --------
638
+ RDDMSClient.list_array_metadata
639
+ A similar method that looks up array metadata needing only the uris
640
+ of the objects.
641
+ """
642
+ dataspace_uri = str(DataspaceURI.from_any(dataspace_uri))
643
+
644
+ ml_uris = []
645
+ tasks = []
646
+ for obj in ml_objects:
647
+ obj_dais = []
648
+ for hdf5_dataset in find_hdf5_datasets(obj):
649
+ path_in_resource = hdf5_dataset.path_in_hdf_file
650
+ epc_uri = hdf5_dataset.hdf_proxy.get_etp_data_object_uri(
651
+ dataspace_path_or_uri=dataspace_uri,
652
+ )
653
+
654
+ dai = DataArrayIdentifier(
655
+ uri=epc_uri,
656
+ path_in_resource=path_in_resource,
657
+ )
658
+ obj_dais.append(dai)
659
+
660
+ if obj_dais:
661
+ ml_uris.append(
662
+ obj.get_etp_data_object_uri(dataspace_path_or_uri=dataspace_uri)
663
+ )
664
+ task = self.etp_client.send(
665
+ GetDataArrayMetadata(
666
+ data_arrays={dai.path_in_resource: dai for dai in obj_dais},
667
+ )
668
+ )
669
+ tasks.append(task)
670
+
671
+ if not tasks:
672
+ logger.info("There were no arrays connected to input objects")
673
+ return {}
674
+
675
+ task_responses = await asyncio.gather(*tasks)
676
+
677
+ metadata_map = {}
678
+ for uri, tr in zip(ml_uris, task_responses):
679
+ parse_and_raise_response_errors(
680
+ tr,
681
+ GetDataArrayMetadataResponse,
682
+ "RDDMSClient.list_array_metadata",
683
+ )
684
+
685
+ pirm = {}
686
+ for response in tr:
687
+ pirm = {**pirm, **response.array_metadata}
688
+
689
+ metadata_map[uri] = pirm
690
+
691
+ return metadata_map
692
+
693
+ async def upload_array(
694
+ self,
695
+ epc_uri: str | DataObjectURI,
696
+ path_in_resource: str,
697
+ data: npt.NDArray[utils_arrays.LogicalArrayDTypes],
698
+ ) -> None:
699
+ """
700
+ Method used for uploading a single array to an ETP server. This method
701
+ will not work without the user setting up a transaction for writing to
702
+ the relevant dataspace. It should not be necessary for a user to call
703
+ this method, prefer `RDDMSClient.upload_model` instead.
704
+
705
+ Parameters
706
+ ----------
707
+ epc_uri: str | DataObjectURI
708
+ An ETP data object uri to an `obj_EpcExternalPartReference` that is
709
+ connected to the object that links to the provided array.
710
+ path_in_resource: str
711
+ A key (typically a HDF5-key) that uniquely identifies the array
712
+ along with the `epc_uri`. This key is found in the object that
713
+ links to the provided array.
714
+ data: npt.NDArray[utils_arrays.LogicalArrayDTypes]
715
+ A NumPy-array with the data.
716
+
717
+ See Also
718
+ --------
719
+ RDDMSClient.upload_model
720
+ A higher-level method that wraps transaction handling, data object
721
+ uploading and array uploading in one go.
722
+ """
723
+ await self.etp_client.upload_array(
724
+ epc_uri=epc_uri,
725
+ path_in_resource=path_in_resource,
726
+ data=data,
727
+ )
728
+
729
+ async def download_array(
730
+ self,
731
+ epc_uri: str | DataObjectURI,
732
+ path_in_resource: str,
733
+ ) -> npt.NDArray[utils_arrays.LogicalArrayDTypes]:
734
+ """
735
+ Method used for downloading a single array from an ETP server. It
736
+ should not be necessary for a user to call this method, prefer
737
+ `RDDMSClient.download_model` instead.
738
+
739
+ Parameters
740
+ ----------
741
+ epc_uri: str | DataObjectURI
742
+ An ETP data object uri to an `obj_EpcExternalPartReference` that is
743
+ connected to the object that links to the provided array.
744
+ path_in_resource: str
745
+ A key (typically a HDF5-key) that uniquely identifies the array
746
+ along with the `epc_uri`. This key is found in the object that
747
+ links to the provided array.
748
+
749
+ Returns
750
+ -------
751
+ data: npt.NDArray[utils_arrays.LogicalArrayDTypes]
752
+ A NumPy-array with the data.
753
+
754
+ See Also
755
+ --------
756
+ RDDMSClient.download_model
757
+ A higher-level method that wraps, data object and array downloading
758
+ in one go.
759
+ """
760
+ return await self.etp_client.download_array(
761
+ epc_uri=epc_uri,
762
+ path_in_resource=path_in_resource,
763
+ )
764
+
765
+ async def upload_model(
766
+ self,
767
+ dataspace_uri: str | DataspaceURI,
768
+ ml_objects: Sequence[ro.AbstractCitedDataObject],
769
+ data_arrays: typing.Mapping[
770
+ str, Sequence[npt.NDArray[utils_arrays.LogicalArrayDTypes]]
771
+ ] = {},
772
+ handle_transaction: bool = True,
773
+ debounce: bool | float = False,
774
+ ) -> list[str]:
775
+ """
776
+ The main driver method for uploading data to an ETP server. This method
777
+ takes in a dataspace uri (for uploading to multiple dataspaces you need
778
+ to call `RDDMSClient.upload_model` multiple times), a set of
779
+ RESQML-objects, and a mapping of data arrays that are indexed by their
780
+ path in resource (which is found in the RESQML-objects as well).
781
+
782
+ Parameters
783
+ ----------
784
+ dataspace_uri: str | DataspaceURI
785
+ An ETP dataspace uri.
786
+ ml_objects: Sequence[ro.AbstractCitedDataObject]
787
+ A sequence of RESQML v2.0.1-objects.
788
+ data_arrays: typing.Mapping[
789
+ str, Sequence[npt.NDArray[utils_arrays.LogicalArrayDTypes]]
790
+ ]
791
+ A mapping, e.g., a dictionary, of data arrays where the path in
792
+ resources (found in the RESQML-objects) are the keys. Default is
793
+ `{}`, meaning that only the RESQML-objects will be uploaded.
794
+ handle_transaction: bool
795
+ A flag to toggle if `RDDMSClient.upload_model` should start and
796
+ commit the transaction towards the dataspace. Default is `True`,
797
+ and the method will ensure that the transaction handling is done
798
+ correctly.
799
+ debounce: bool | float
800
+ Parameter to decide if `RDDMSClient.upload_model` should retry
801
+ starting a transaction if it initially fails. See
802
+ `RDDMSClient.start_transaction` for a more in-depth explanation of
803
+ the parameter. Default is `False`, i.e., no debouncing will occur
804
+ and the method will fail if it is unable to start a transaction.
805
+
806
+ Returns
807
+ -------
808
+ list[str]
809
+ A list of ETP data object uris to the uploaded objects.
810
+
811
+ See Also
812
+ --------
813
+ RDDMSClient.download_model
814
+ RDDMSClient.start_transaction
815
+ """
816
+ if not ml_objects:
817
+ return []
818
+
819
+ dataspace_uri = str(DataspaceURI.from_any(dataspace_uri))
820
+
821
+ if handle_transaction:
822
+ transaction_uuid = await self.start_transaction(
823
+ dataspace_uri=dataspace_uri,
824
+ read_only=False,
825
+ debounce=debounce,
826
+ )
827
+
828
+ ml_uris = await self._upload_model(
829
+ dataspace_uri,
830
+ ml_objects=ml_objects,
831
+ data_arrays=data_arrays,
832
+ )
833
+
834
+ if handle_transaction:
835
+ await self.commit_transaction(transaction_uuid=transaction_uuid)
836
+
837
+ return ml_uris
838
+
839
+ async def _upload_model(
840
+ self,
841
+ dataspace_uri: str,
842
+ ml_objects: Sequence[ro.AbstractCitedDataObject],
843
+ data_arrays: typing.Mapping[
844
+ str, Sequence[npt.NDArray[utils_arrays.LogicalArrayDTypes]]
845
+ ],
846
+ ) -> list[str]:
847
+ logger.debug(
848
+ f"Starting to upload model of {len(ml_objects)} objects and "
849
+ f"{len(data_arrays)} arrays to dataspace '{dataspace_uri}'"
850
+ )
851
+
852
+ # A UTC timestamp in microseconds.
853
+ now = int(datetime.datetime.now(datetime.timezone.utc).timestamp() * 1e6)
854
+
855
+ ml_uris = []
856
+ ml_hds = []
857
+ tasks = []
858
+ for obj in ml_objects:
859
+ # Find all `Hdf5Dataset`-objects in the object.
860
+ ml_hds.extend(find_hdf5_datasets(obj))
861
+
862
+ uri = obj.get_etp_data_object_uri(dataspace_uri)
863
+ ml_uris.append(uri)
864
+ obj_xml = serialize_resqml_v201_object(obj)
865
+
866
+ # Note that all objects passed in to this method is
867
+ # expected to contain a `citation`-field.
868
+ last_changed = obj.citation.last_update or obj.citation.creation
869
+ if not isinstance(last_changed, datetime.datetime):
870
+ last_changed = last_changed.to_datetime()
871
+
872
+ # Note that chunking is handled in the ETP-client if that is needed.
873
+ dob = DataObject(
874
+ format="xml",
875
+ data=obj_xml,
876
+ resource=Resource(
877
+ uri=uri,
878
+ name=obj.citation.title,
879
+ last_changed=int(last_changed.timestamp() * 1e6),
880
+ store_created=now,
881
+ store_last_write=now,
882
+ # This is only used by WITSML channel data objects.
883
+ active_status="Inactive",
884
+ # Not used in the Store-protocol.
885
+ source_count=None,
886
+ target_count=None,
887
+ ),
888
+ )
889
+ pdo = PutDataObjects(
890
+ data_objects={f"{dob.resource.name} -- {dob.resource.uri}": dob},
891
+ )
892
+
893
+ tasks.append(self.etp_client.send(pdo))
894
+
895
+ task_responses = await asyncio.gather(*tasks)
896
+ responses = [
897
+ response for task_response in task_responses for response in task_response
898
+ ]
899
+
900
+ parse_and_raise_response_errors(
901
+ responses, PutDataObjectsResponse, "RDDMSClient.upload_model"
902
+ )
903
+
904
+ logger.debug("Done uploading model objects. Starting on upload of arrays.")
905
+
906
+ dataspace_path = DataspaceURI(dataspace_uri).dataspace
907
+
908
+ uploaded_data_keys = []
909
+ tasks = []
910
+ for hdf5_dataset in ml_hds:
911
+ path_in_resource = hdf5_dataset.path_in_hdf_file
912
+ epc_uri = hdf5_dataset.hdf_proxy.get_etp_data_object_uri(
913
+ dataspace_path_or_uri=dataspace_path
914
+ )
915
+
916
+ data = data_arrays[path_in_resource]
917
+ uploaded_data_keys.append(path_in_resource)
918
+
919
+ tasks.append(
920
+ self.upload_array(
921
+ epc_uri=epc_uri,
922
+ path_in_resource=path_in_resource,
923
+ data=data,
924
+ )
925
+ )
926
+
927
+ if len(data_arrays) > len(uploaded_data_keys):
928
+ logger.warning(
929
+ "Not all arrays were uploaded. The remaining array keys "
930
+ f"{set(data_arrays) - set(uploaded_data_keys)} were not found in the "
931
+ "provided objects."
932
+ )
933
+
934
+ await asyncio.gather(*tasks)
935
+
936
+ logger.debug("Done uploading arrays.")
937
+ return ml_uris
938
+
939
+ @typing.overload
940
+ async def download_model(
941
+ self,
942
+ ml_uris: list[str | DataObjectURI],
943
+ download_arrays: typing.Literal[False],
944
+ ) -> list[ro.AbstractCitedDataObject]: ...
945
+
946
+ @typing.overload
947
+ async def download_model(
948
+ self,
949
+ ml_uris: list[str | DataObjectURI],
950
+ download_arrays: typing.Literal[True],
951
+ ) -> tuple[
952
+ list[ro.AbstractCitedDataObject],
953
+ dict[str, list[npt.NDArray[utils_arrays.LogicalArrayDTypes]]],
954
+ ]: ...
955
+
956
+ async def download_model(
957
+ self,
958
+ ml_uris: list[str | DataObjectURI],
959
+ download_arrays: bool = False,
960
+ ) -> (
961
+ tuple[
962
+ list[ro.AbstractCitedDataObject],
963
+ dict[str, list[npt.NDArray[utils_arrays.LogicalArrayDTypes]]],
964
+ ]
965
+ | list[ro.AbstractCitedDataObject]
966
+ ):
967
+ """
968
+ Download RESQML-model from the RDDMS. A model in this sense is just a
969
+ grouping of RESQML-objects specified by their uris. The objects do not
970
+ need to be linked in any way.
971
+
972
+ Parameters
973
+ ----------
974
+ ml_uris: list[str | DataObjectURI]
975
+ A list of ETP data object uris.
976
+ download_arrays: bool
977
+ A flag to toggle if any referenced arrays should be download
978
+ alongside the RESQML-objects. Setting to `True` will make the
979
+ function return a tuple where the first element contains a list of
980
+ the objects, and the second element a dictionary with the
981
+ `path_in_hdf_file` as the key, and the arrays as the values. If the
982
+ flag is set to `False` no arrays will be downloaded, and the
983
+ function only returns a list of the objects. Default is `False`.
984
+
985
+ Returns
986
+ -------
987
+ tuple[
988
+ list[ro.AbstractCitedDataObject],
989
+ dict[str, list[npt.NDArray[utils_arrays.LogicalArrayDTypes]]],
990
+ ]
991
+ | list[ro.AbstractCitedDataObject]
992
+ See the `download_arrays`-argument for an explanation on which part
993
+ of the union is returned when.
994
+ """
995
+ if not ml_uris:
996
+ return ([], {}) if download_arrays else []
997
+
998
+ tasks = []
999
+ for uri in ml_uris:
1000
+ task = self.etp_client.send(GetDataObjects(uris={str(uri): str(uri)}))
1001
+ tasks.append(task)
1002
+
1003
+ task_responses = await asyncio.gather(*tasks)
1004
+ responses = [
1005
+ response for task_response in task_responses for response in task_response
1006
+ ]
1007
+
1008
+ parse_and_raise_response_errors(
1009
+ responses, GetDataObjectsResponse, "RDDMSClient.download_model"
1010
+ )
1011
+
1012
+ data_objects = [r.data_objects[str(u)] for u, r in zip(ml_uris, responses)]
1013
+ ml_objects = [parse_resqml_v201_object(d.data) for d in data_objects]
1014
+
1015
+ if not download_arrays:
1016
+ return ml_objects
1017
+
1018
+ ml_hds = [ml_hd for obj in ml_objects for ml_hd in find_hdf5_datasets(obj)]
1019
+
1020
+ if len(ml_hds) == 0:
1021
+ logger.info("There are no referenced arrays in the downloaded objects")
1022
+
1023
+ return ml_objects, {}
1024
+
1025
+ dataspace_path = DataspaceURI.from_any(ml_uris[0]).dataspace
1026
+
1027
+ tasks = []
1028
+ for hdf5_dataset in ml_hds:
1029
+ path_in_resource = hdf5_dataset.path_in_hdf_file
1030
+ epc_uri = hdf5_dataset.hdf_proxy.get_etp_data_object_uri(
1031
+ dataspace_path_or_uri=dataspace_path
1032
+ )
1033
+ task = self.download_array(
1034
+ epc_uri=epc_uri,
1035
+ path_in_resource=path_in_resource,
1036
+ )
1037
+ tasks.append(task)
1038
+
1039
+ arrays = await asyncio.gather(*tasks)
1040
+ data_arrays = {hdf.path_in_hdf_file: arr for hdf, arr in zip(ml_hds, arrays)}
1041
+
1042
+ return ml_objects, data_arrays
1043
+
1044
+ async def delete_model(
1045
+ self,
1046
+ ml_uris: list[str | DataObjectURI],
1047
+ prune_contained_objects: bool = False,
1048
+ handle_transaction: bool = True,
1049
+ debounce: bool | float = False,
1050
+ ) -> None:
1051
+ """
1052
+ Method used for deleting a set of objects on an ETP server. In order
1053
+ for the deletion to be successful the objects to be deleted can not
1054
+ leave any dangling source-objects. That is, there can be no objects
1055
+ left on the ETP server that references the deleted objects.
1056
+
1057
+ Parameters
1058
+ ----------
1059
+ ml_uris: list[str | DataObjectURI]
1060
+ A list of ETP data object uris to delete.
1061
+ prune_contained_objects: bool
1062
+ See section 9.3.4 in the ETP v1.2 standards documentation for an
1063
+ accurate description of this parameter. Default is `False` meaning
1064
+ no pruning is done.
1065
+ handle_transaction: bool
1066
+ A flag to toggle if `RDDMSClient.delete_model` should start and
1067
+ commit the transaction towards the dataspace. Default is `True`,
1068
+ and the method will ensure that the transaction handling is done
1069
+ correctly.
1070
+ debounce: bool | float
1071
+ Parameter to decide if `RDDMSClient.delete_model` should retry
1072
+ starting a transaction if it initially fails. See
1073
+ `RDDMSClient.start_transaction` for a more in-depth explanation of
1074
+ the parameter. Default is `False`, i.e., no debouncing will occur
1075
+ and the method will fail if it is unable to start a transaction.
1076
+ """
1077
+ if not ml_uris:
1078
+ return
1079
+
1080
+ uris = list(map(str, ml_uris))
1081
+ ddo = DeleteDataObjects(
1082
+ uris=dict(zip(uris, uris)),
1083
+ prune_contained_objects=prune_contained_objects,
1084
+ )
1085
+
1086
+ if handle_transaction:
1087
+ dataspace_uris = [str(DataspaceURI.from_any(u)) for u in ml_uris]
1088
+ assert all([dataspace_uris[0] == du for du in dataspace_uris])
1089
+ transaction_uuid = await self.start_transaction(
1090
+ dataspace_uri=dataspace_uris[0], read_only=False, debounce=debounce
1091
+ )
1092
+
1093
+ responses = await self.etp_client.send(ddo)
1094
+
1095
+ parse_and_raise_response_errors(
1096
+ responses,
1097
+ DeleteDataObjectsResponse,
1098
+ "RDDMSClient.delete_model",
1099
+ )
1100
+
1101
+ if handle_transaction:
1102
+ await self.commit_transaction(transaction_uuid)
1103
+
1104
+
1105
+ class rddms_connect:
1106
+ """
1107
+ Connect to an RDDMS server via ETP.
1108
+
1109
+ This class can act as:
1110
+
1111
+ 1. A context manager handling setup and tear-down of the connection.
1112
+ 2. An asynchronous iterator which can be used to persistently retry to
1113
+ connect if the websockets connection drops.
1114
+ 3. An awaitable connection that must be manually closed by the user.
1115
+
1116
+ See below for examples of all three cases.
1117
+
1118
+ Parameters
1119
+ ----------
1120
+ uri: str
1121
+ The uri to the RDDMS server. This should be the uri to a websockets
1122
+ endpoint to an ETP server.
1123
+ data_partition_id: str | None
1124
+ The data partition id used when connecting to the OSDU open-etp-server
1125
+ in multi-partition mode. Default is `None`.
1126
+ authorization: str | SecretStr | None
1127
+ Bearer token used for authenticating to the RDDMS server. This token
1128
+ should be on the form `"Bearer 1234..."`. Default is `None`.
1129
+ etp_timeout: float | None
1130
+ The timeout in seconds for when to stop waiting for a message from the
1131
+ server. Setting it to `None` will persist the connection indefinetly.
1132
+ Default is `None`.
1133
+ max_message_size: float
1134
+ The maximum number of bytes for a single websockets message. Default is
1135
+ `2**20` corresponding to `1` MiB.
1136
+
1137
+
1138
+ Examples
1139
+ --------
1140
+ An example of connecting to an RDDMS server using :func:`rddms_connect` as a
1141
+ context manager is:
1142
+
1143
+ async with rddms_connect(...) as rddms_client:
1144
+ ...
1145
+
1146
+ In this case the closing message is sent and the websockets connection is
1147
+ closed once the program exits the context manager.
1148
+
1149
+
1150
+ To persist a connection if the websockets connection is dropped (for any
1151
+ reason), use :func:`rddms_connect` as an asynchronous generator, viz.:
1152
+
1153
+ import websockets
1154
+
1155
+ async for rddms_client in rddms_connect(...):
1156
+ try:
1157
+ ...
1158
+ except websockets.ConnectionClosed:
1159
+ continue
1160
+
1161
+ # Include `break` to avoid re-running the whole block if the
1162
+ # iteration runs without any errors.
1163
+ break
1164
+
1165
+ Note that in this case the whole program under the `try`-block is re-run
1166
+ from the start if the iteration completes normally, or if the websockets
1167
+ connection is dropped. Therefore, make sure to include a `break` at the end
1168
+ of the `try`-block (as in the example above).
1169
+
1170
+
1171
+ The third option is to set up a connection via `await` and then manually
1172
+ close the connection once done:
1173
+
1174
+ rddms_client = await rddms_connect(...)
1175
+ ...
1176
+ await rddms_client.close()
1177
+
1178
+ See Also
1179
+ --------
1180
+ pyetp.client.etp_connect
1181
+ The `rddms_connect`-class is a thin wrapper around `etp_connect`.
1182
+ """
1183
+
1184
+ def __init__(
1185
+ self,
1186
+ uri: str,
1187
+ data_partition_id: str | None = None,
1188
+ authorization: str | SecretStr | None = None,
1189
+ etp_timeout: float | None = None,
1190
+ max_message_size: float = 2**20,
1191
+ ) -> None:
1192
+ self.uri = uri
1193
+ self.data_partition_id = data_partition_id
1194
+
1195
+ if isinstance(authorization, SecretStr):
1196
+ self.authorization = authorization
1197
+ else:
1198
+ self.authorization = SecretStr(authorization)
1199
+
1200
+ self.etp_timeout = etp_timeout
1201
+ self.max_message_size = max_message_size
1202
+
1203
+ def __await__(self) -> RDDMSClient:
1204
+ return self.__aenter__().__await__()
1205
+
1206
+ async def __aenter__(self) -> RDDMSClient:
1207
+ etp_client = await etp_connect(
1208
+ uri=self.uri,
1209
+ data_partition_id=self.data_partition_id,
1210
+ authorization=self.authorization,
1211
+ etp_timeout=self.etp_timeout,
1212
+ max_message_size=self.max_message_size,
1213
+ )
1214
+ self.rddms_client = RDDMSClient(etp_client)
1215
+
1216
+ return self.rddms_client
1217
+
1218
+ async def __aexit__(
1219
+ self,
1220
+ exc_type: typing.Type[BaseException] | None,
1221
+ exc_value: BaseException | None,
1222
+ traceback: TracebackType | None,
1223
+ ) -> None:
1224
+ return await self.rddms_client.close()
1225
+
1226
+ async def __aiter__(self) -> AsyncGenerator[RDDMSClient]:
1227
+ async for etp_client in etp_connect(
1228
+ uri=self.uri,
1229
+ data_partition_id=self.data_partition_id,
1230
+ authorization=self.authorization,
1231
+ etp_timeout=self.etp_timeout,
1232
+ max_message_size=self.max_message_size,
1233
+ ):
1234
+ yield RDDMSClient(etp_client)