dkist-processing-common 11.7.0rc6__py3-none-any.whl → 11.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dkist_processing_common/config.py +28 -6
- dkist_processing_common/models/constants.py +8 -0
- dkist_processing_common/models/graphql.py +0 -33
- dkist_processing_common/parsers/lookup_bud.py +129 -0
- dkist_processing_common/tasks/l1_output_data.py +44 -43
- dkist_processing_common/tasks/mixin/globus.py +23 -26
- dkist_processing_common/tasks/mixin/metadata_store.py +0 -49
- dkist_processing_common/tasks/mixin/object_store.py +21 -0
- dkist_processing_common/tasks/mixin/quality/_metrics.py +4 -6
- dkist_processing_common/tasks/parse_l0_input_data.py +7 -0
- dkist_processing_common/tasks/trial_catalog.py +49 -1
- dkist_processing_common/tasks/trial_output_data.py +1 -1
- dkist_processing_common/tests/mock_metadata_store.py +39 -4
- dkist_processing_common/tests/test_input_dataset.py +1 -37
- dkist_processing_common/tests/test_parse_l0_input_data.py +36 -16
- dkist_processing_common/tests/test_publish_catalog_messages.py +0 -21
- dkist_processing_common/tests/test_quality_mixin.py +11 -3
- dkist_processing_common/tests/test_stems.py +49 -1
- dkist_processing_common/tests/test_submit_dataset_metadata.py +1 -5
- dkist_processing_common/tests/test_trial_catalog.py +72 -2
- dkist_processing_common/tests/test_trial_output_data.py +1 -2
- dkist_processing_common/tests/test_workflow_task_base.py +11 -0
- {dkist_processing_common-11.7.0rc6.dist-info → dkist_processing_common-11.9.3.dist-info}/METADATA +17 -13
- {dkist_processing_common-11.7.0rc6.dist-info → dkist_processing_common-11.9.3.dist-info}/RECORD +26 -31
- changelog/267.feature.1.rst +0 -1
- changelog/267.feature.2.rst +0 -1
- changelog/267.feature.rst +0 -1
- changelog/267.misc.rst +0 -1
- changelog/267.removal.1.rst +0 -2
- changelog/267.removal.rst +0 -1
- {dkist_processing_common-11.7.0rc6.dist-info → dkist_processing_common-11.9.3.dist-info}/WHEEL +0 -0
- {dkist_processing_common-11.7.0rc6.dist-info → dkist_processing_common-11.9.3.dist-info}/top_level.txt +0 -0
|
@@ -3,12 +3,20 @@
|
|
|
3
3
|
from dkist_processing_core.config import DKISTProcessingCoreConfiguration
|
|
4
4
|
from dkist_service_configuration.settings import DEFAULT_MESH_SERVICE
|
|
5
5
|
from dkist_service_configuration.settings import MeshService
|
|
6
|
+
from pydantic import BaseModel
|
|
6
7
|
from pydantic import Field
|
|
7
8
|
from talus import ConnectionRetryerFactory
|
|
8
9
|
from talus import ConsumerConnectionParameterFactory
|
|
9
10
|
from talus import ProducerConnectionParameterFactory
|
|
10
11
|
|
|
11
12
|
|
|
13
|
+
class GlobusClientCredential(BaseModel):
|
|
14
|
+
"""Globus client credential."""
|
|
15
|
+
|
|
16
|
+
client_id: str = Field(..., description="Globus client ID for transfers.")
|
|
17
|
+
client_secret: str = Field(..., description="Globus client secret for transfers.")
|
|
18
|
+
|
|
19
|
+
|
|
12
20
|
class DKISTProcessingCommonConfiguration(DKISTProcessingCoreConfiguration):
|
|
13
21
|
"""Common configurations."""
|
|
14
22
|
|
|
@@ -40,14 +48,28 @@ class DKISTProcessingCommonConfiguration(DKISTProcessingCoreConfiguration):
|
|
|
40
48
|
default=None, description="S3 download configuration for the object store."
|
|
41
49
|
)
|
|
42
50
|
# globus
|
|
43
|
-
|
|
44
|
-
|
|
51
|
+
globus_max_retries: int = Field(
|
|
52
|
+
default=5, description="Max retries for transient errors on calls to the globus api."
|
|
45
53
|
)
|
|
46
|
-
|
|
47
|
-
|
|
54
|
+
globus_inbound_client_credentials: list[GlobusClientCredential] = Field(
|
|
55
|
+
default_factory=list,
|
|
56
|
+
description="Globus client credentials for inbound transfers.",
|
|
57
|
+
examples=[
|
|
58
|
+
[
|
|
59
|
+
{"client_id": "id1", "client_secret": "secret1"},
|
|
60
|
+
{"client_id": "id2", "client_secret": "secret2"},
|
|
61
|
+
],
|
|
62
|
+
],
|
|
48
63
|
)
|
|
49
|
-
|
|
50
|
-
|
|
64
|
+
globus_outbound_client_credentials: list[GlobusClientCredential] = Field(
|
|
65
|
+
default_factory=list,
|
|
66
|
+
description="Globus client credentials for outbound transfers.",
|
|
67
|
+
examples=[
|
|
68
|
+
[
|
|
69
|
+
{"client_id": "id3", "client_secret": "secret3"},
|
|
70
|
+
{"client_id": "id4", "client_secret": "secret4"},
|
|
71
|
+
],
|
|
72
|
+
],
|
|
51
73
|
)
|
|
52
74
|
object_store_endpoint: str | None = Field(
|
|
53
75
|
default=None, description="Object store Globus Endpoint ID."
|
|
@@ -53,6 +53,7 @@ class BudName(StrEnum):
|
|
|
53
53
|
dark_date_end = "DARK_DATE_END"
|
|
54
54
|
solar_gain_date_end = "SOLAR_GAIN_DATE_END"
|
|
55
55
|
polcal_date_end = "POLCAL_DATE_END"
|
|
56
|
+
dark_num_raw_frames_per_fpa = "DARK_NUM_RAW_FRAMES_PER_FPA"
|
|
56
57
|
solar_gain_num_raw_frames_per_fpa = "SOLAR_GAIN_NUM_RAW_FRAMES_PER_FPA"
|
|
57
58
|
polcal_num_raw_frames_per_fpa = "POLCAL_NUM_RAW_FRAMES_PER_FPA"
|
|
58
59
|
solar_gain_telescope_tracking_mode = "SOLAR_GAIN_TELESCOPE_TRACKING_MODE"
|
|
@@ -328,6 +329,13 @@ class ConstantsBase:
|
|
|
328
329
|
"""Return the date end constant for the polcal task."""
|
|
329
330
|
return self._db_dict[BudName.polcal_date_end]
|
|
330
331
|
|
|
332
|
+
@property
|
|
333
|
+
def dark_num_raw_frames_per_fpa(self) -> dict[float, list]:
|
|
334
|
+
"""Return the dictionary of exposure times to number of raw frames per fpa."""
|
|
335
|
+
raw_return = self._db_dict[BudName.dark_num_raw_frames_per_fpa]
|
|
336
|
+
# convert nested lists to dictionary
|
|
337
|
+
return {k: v for k, v in raw_return}
|
|
338
|
+
|
|
331
339
|
@property
|
|
332
340
|
def solar_gain_num_raw_frames_per_fpa(self) -> int:
|
|
333
341
|
"""Return the number of raw frames per fpa constant for the solar gain task."""
|
|
@@ -176,36 +176,3 @@ class RecipeRunProvenanceMutation(GraphqlBaseModel):
|
|
|
176
176
|
libraryVersions: str
|
|
177
177
|
workflowVersion: str
|
|
178
178
|
codeVersion: str | None = None
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
class QualityCreation(GraphqlBaseModel):
|
|
182
|
-
"""Quality data creation record."""
|
|
183
|
-
|
|
184
|
-
datasetId: str
|
|
185
|
-
metricCode: str
|
|
186
|
-
facet: str | None = None
|
|
187
|
-
name: str | None = None
|
|
188
|
-
description: str | None = None
|
|
189
|
-
statement: str | None = None
|
|
190
|
-
# JSON array
|
|
191
|
-
warnings: str | None = None
|
|
192
|
-
# JSON objects
|
|
193
|
-
plotData: str | None = None
|
|
194
|
-
multiPlotData: str | None = None
|
|
195
|
-
tableData: str | None = None
|
|
196
|
-
histogramData: str | None = None
|
|
197
|
-
modmatData: str | None = None
|
|
198
|
-
raincloudData: str | None = None
|
|
199
|
-
efficiencyData: str | None = None
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
class QualitiesRequest(GraphqlBaseModel):
|
|
203
|
-
"""Query parameters for quality data."""
|
|
204
|
-
|
|
205
|
-
datasetId: str
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
class QualityResponse(GraphqlBaseModel):
|
|
209
|
-
"""Query Response for quality data."""
|
|
210
|
-
|
|
211
|
-
qualityId: int
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Simple bud that is used to set a constant to a mapping dictionary."""
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from enum import StrEnum
|
|
5
|
+
from typing import Any
|
|
6
|
+
from typing import Callable
|
|
7
|
+
from typing import DefaultDict
|
|
8
|
+
|
|
9
|
+
from dkist_processing_common.models.flower_pot import SpilledDirt
|
|
10
|
+
from dkist_processing_common.models.flower_pot import Stem
|
|
11
|
+
from dkist_processing_common.models.tags import EXP_TIME_ROUND_DIGITS
|
|
12
|
+
from dkist_processing_common.parsers.l0_fits_access import L0FitsAccess
|
|
13
|
+
from dkist_processing_common.parsers.task import passthrough_header_ip_task
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TimeLookupBud(Stem):
|
|
17
|
+
"""
|
|
18
|
+
Bud that reads two header keys from all files and creates a dictionary mapping a time KEY value to sets of a VALUE value.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
constant_name
|
|
23
|
+
The name for the constant to be defined
|
|
24
|
+
|
|
25
|
+
key_metadata_key
|
|
26
|
+
The time metadata key for the resulting dictionary key
|
|
27
|
+
|
|
28
|
+
value_metadata_key
|
|
29
|
+
The metadata key for the resulting dictionary value
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
constant_name: str,
|
|
35
|
+
key_metadata_key: str | StrEnum,
|
|
36
|
+
value_metadata_key: str | StrEnum,
|
|
37
|
+
):
|
|
38
|
+
super().__init__(stem_name=constant_name)
|
|
39
|
+
|
|
40
|
+
if isinstance(key_metadata_key, StrEnum):
|
|
41
|
+
key_metadata_key = key_metadata_key.name
|
|
42
|
+
self.key_metadata_key = key_metadata_key
|
|
43
|
+
if isinstance(value_metadata_key, StrEnum):
|
|
44
|
+
value_metadata_key = value_metadata_key.name
|
|
45
|
+
self.value_metadata_key = value_metadata_key
|
|
46
|
+
|
|
47
|
+
self.mapping: DefaultDict[float, set[Any]] = defaultdict(set)
|
|
48
|
+
|
|
49
|
+
def setter(self, fits_obj: L0FitsAccess):
|
|
50
|
+
"""
|
|
51
|
+
Update the mapping dictionary.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
fits_obj
|
|
56
|
+
The input fits object
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
Updates the dictionary and returns None
|
|
60
|
+
"""
|
|
61
|
+
key = getattr(fits_obj, self.key_metadata_key)
|
|
62
|
+
rounded_key = round(key, EXP_TIME_ROUND_DIGITS)
|
|
63
|
+
value = getattr(fits_obj, self.value_metadata_key)
|
|
64
|
+
self.mapping[rounded_key].add(value)
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
def getter(self, key):
|
|
68
|
+
"""
|
|
69
|
+
Get the dictionary mapping created by the setter converted into hashable nested tuples.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
key
|
|
74
|
+
The input key
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
77
|
+
The mapping dictionary converted into hashable nested tuples
|
|
78
|
+
"""
|
|
79
|
+
hashable_mapping = tuple((k, tuple(v)) for k, v in self.mapping.items())
|
|
80
|
+
return hashable_mapping
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class TaskTimeLookupBud(TimeLookupBud):
|
|
84
|
+
"""
|
|
85
|
+
Subclass of `TimeLookupBud` that only considers objects that have specific task types.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
constant_name
|
|
90
|
+
The name for the constant to be defined
|
|
91
|
+
|
|
92
|
+
key_metadata_key
|
|
93
|
+
The time metadata key for the resulting dictionary key
|
|
94
|
+
|
|
95
|
+
value_metadata_key
|
|
96
|
+
The metadata key for the resulting dictionary value
|
|
97
|
+
|
|
98
|
+
ip_task_types
|
|
99
|
+
Only consider objects whose parsed header IP task type matches a string in this list
|
|
100
|
+
|
|
101
|
+
task_type_parsing_function
|
|
102
|
+
The function used to convert a header into an IP task type
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
def __init__(
|
|
106
|
+
self,
|
|
107
|
+
constant_name: str,
|
|
108
|
+
key_metadata_key: str | StrEnum,
|
|
109
|
+
value_metadata_key: str | StrEnum,
|
|
110
|
+
ip_task_types: str | list[str],
|
|
111
|
+
task_type_parsing_function: Callable = passthrough_header_ip_task,
|
|
112
|
+
):
|
|
113
|
+
super().__init__(
|
|
114
|
+
constant_name=constant_name,
|
|
115
|
+
key_metadata_key=key_metadata_key,
|
|
116
|
+
value_metadata_key=value_metadata_key,
|
|
117
|
+
)
|
|
118
|
+
if isinstance(ip_task_types, str):
|
|
119
|
+
ip_task_types = [ip_task_types]
|
|
120
|
+
self.ip_task_types = [task.casefold() for task in ip_task_types]
|
|
121
|
+
self.parsing_function = task_type_parsing_function
|
|
122
|
+
|
|
123
|
+
def setter(self, fits_obj: L0FitsAccess):
|
|
124
|
+
"""Ingest an object only if its parsed IP task type matches what's desired."""
|
|
125
|
+
task = self.parsing_function(fits_obj)
|
|
126
|
+
if task.casefold() in self.ip_task_types:
|
|
127
|
+
return super().setter(fits_obj)
|
|
128
|
+
|
|
129
|
+
return SpilledDirt
|
|
@@ -38,8 +38,9 @@ class L1OutputDataBase(OutputDataBase, ABC):
|
|
|
38
38
|
|
|
39
39
|
@property
|
|
40
40
|
def dataset_has_quality_data(self) -> bool:
|
|
41
|
-
"""Return True if
|
|
42
|
-
|
|
41
|
+
"""Return True if the dataset has quality data."""
|
|
42
|
+
path_count = self.count(tags=[Tag.output(), Tag.quality_data()])
|
|
43
|
+
return path_count > 0
|
|
43
44
|
|
|
44
45
|
def rollback(self):
|
|
45
46
|
"""Warn that the metadata-store and the interservice bus retain the effect of this tasks execution. Rolling back this task may not be achievable without other action."""
|
|
@@ -58,6 +59,9 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
|
|
|
58
59
|
# Movie needs to be transferred separately as the movie headers need to go with it
|
|
59
60
|
self.transfer_movie()
|
|
60
61
|
|
|
62
|
+
with self.telemetry_span("Upload quality data"):
|
|
63
|
+
self.transfer_quality_data()
|
|
64
|
+
|
|
61
65
|
with self.telemetry_span("Upload science frames"):
|
|
62
66
|
self.transfer_output_frames()
|
|
63
67
|
|
|
@@ -79,19 +83,14 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
|
|
|
79
83
|
def transfer_movie(self):
|
|
80
84
|
"""Transfer the movie to the object store."""
|
|
81
85
|
paths = list(self.read(tags=[Tag.output(), Tag.movie()]))
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
)
|
|
86
|
-
return
|
|
87
|
-
movie = paths[0]
|
|
88
|
-
if count := len(paths) > 1:
|
|
89
|
-
# note: this needs to be an error or the dataset receipt accounting will have an
|
|
90
|
-
# expected count > the eventual actual
|
|
86
|
+
|
|
87
|
+
count = len(paths)
|
|
88
|
+
if count != 1:
|
|
91
89
|
raise RuntimeError(
|
|
92
|
-
f"
|
|
93
|
-
f"
|
|
90
|
+
f"Expected exactly one movie to upload, found {count}. "
|
|
91
|
+
f"recipe_run_id={self.recipe_run_id}"
|
|
94
92
|
)
|
|
93
|
+
movie = paths[0]
|
|
95
94
|
logger.info(f"Uploading Movie: recipe_run_id={self.recipe_run_id}, {movie=}")
|
|
96
95
|
movie_object_key = self.format_object_key(movie)
|
|
97
96
|
self.object_store_upload_movie(
|
|
@@ -101,6 +100,33 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
|
|
|
101
100
|
content_type="video/mp4",
|
|
102
101
|
)
|
|
103
102
|
|
|
103
|
+
def transfer_quality_data(self):
|
|
104
|
+
"""Transfer quality data to the object store."""
|
|
105
|
+
paths = list(self.read(tags=[Tag.output(), Tag.quality_data()]))
|
|
106
|
+
if len(paths) == 0:
|
|
107
|
+
logger.info(
|
|
108
|
+
f"No quality data found to upload for dataset. recipe_run_id={self.recipe_run_id}"
|
|
109
|
+
)
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
if count := len(paths) > 1:
|
|
113
|
+
# dataset inventory does not support multiple quality data object keys
|
|
114
|
+
raise RuntimeError(
|
|
115
|
+
f"Found multiple quality data files to upload. Not supported."
|
|
116
|
+
f"{count=}, recipe_run_id={self.recipe_run_id}"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
with self.telemetry_span(f"Uploading the trial quality data"):
|
|
120
|
+
path = paths[0]
|
|
121
|
+
logger.info(f"Uploading quality data: recipe_run_id={self.recipe_run_id}, {path=}")
|
|
122
|
+
quality_data_object_key = self.format_object_key(path)
|
|
123
|
+
self.object_store_upload_quality_data(
|
|
124
|
+
quality_data=path,
|
|
125
|
+
bucket=self.destination_bucket,
|
|
126
|
+
object_key=quality_data_object_key,
|
|
127
|
+
content_type="application/json",
|
|
128
|
+
)
|
|
129
|
+
|
|
104
130
|
|
|
105
131
|
class AssembleQualityData(L1OutputDataBase, QualityMixin):
|
|
106
132
|
"""
|
|
@@ -128,7 +154,7 @@ class AssembleQualityData(L1OutputDataBase, QualityMixin):
|
|
|
128
154
|
):
|
|
129
155
|
self.write(
|
|
130
156
|
quality_data,
|
|
131
|
-
tags=Tag.quality_data(),
|
|
157
|
+
tags=[Tag.output(), Tag.quality_data()],
|
|
132
158
|
encoder=quality_data_encoder,
|
|
133
159
|
relative_path=f"{self.constants.dataset_id}_quality_data.json",
|
|
134
160
|
)
|
|
@@ -136,31 +162,18 @@ class AssembleQualityData(L1OutputDataBase, QualityMixin):
|
|
|
136
162
|
|
|
137
163
|
class SubmitDatasetMetadata(L1OutputDataBase):
|
|
138
164
|
"""
|
|
139
|
-
Add
|
|
165
|
+
Add receipt account to the metadata store.
|
|
140
166
|
|
|
141
|
-
Add the quality data to the Quality database.
|
|
142
167
|
Add a Dataset Receipt Account record to Processing Support for use by the Dataset Catalog Locker.
|
|
143
|
-
Adds the number of files created during the calibration processing to the Processing Support table
|
|
168
|
+
Adds the number of files to be created during the calibration processing to the Processing Support table
|
|
144
169
|
for use by the Dataset Catalog Locker.
|
|
145
170
|
"""
|
|
146
171
|
|
|
147
172
|
def run(self) -> None:
|
|
148
173
|
"""Run method for this task."""
|
|
149
|
-
with self.telemetry_span(f"Storing quality data to metadata store"):
|
|
150
|
-
# each quality_data file is a list - this will combine the elements of multiple lists into a single list
|
|
151
|
-
quality_data = list(
|
|
152
|
-
chain.from_iterable(
|
|
153
|
-
self.read(tags=Tag.quality_data(), decoder=quality_data_decoder)
|
|
154
|
-
)
|
|
155
|
-
)
|
|
156
|
-
self.metadata_store_add_quality_data(
|
|
157
|
-
dataset_id=self.constants.dataset_id, quality_data=quality_data
|
|
158
|
-
)
|
|
159
174
|
with self.telemetry_span("Count Expected Outputs"):
|
|
160
175
|
dataset_id = self.constants.dataset_id
|
|
161
176
|
expected_object_count = self.count(tags=Tag.output())
|
|
162
|
-
if quality_data:
|
|
163
|
-
expected_object_count += 1
|
|
164
177
|
logger.info(
|
|
165
178
|
f"Adding Dataset Receipt Account: "
|
|
166
179
|
f"{dataset_id=}, {expected_object_count=}, recipe_run_id={self.recipe_run_id}"
|
|
@@ -230,24 +243,12 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
|
|
|
230
243
|
messages = [CatalogObjectMessage(body=body) for body in message_bodies]
|
|
231
244
|
return messages
|
|
232
245
|
|
|
233
|
-
@property
|
|
234
|
-
def quality_report_message(self) -> CreateQualityReportMessage:
|
|
235
|
-
"""Create the Quality Report Message."""
|
|
236
|
-
file_name = Path(f"{self.constants.dataset_id}_quality_report.pdf")
|
|
237
|
-
body = CreateQualityReportMessageBody(
|
|
238
|
-
bucket=self.destination_bucket,
|
|
239
|
-
objectName=self.format_object_key(file_name),
|
|
240
|
-
conversationId=str(self.recipe_run_id),
|
|
241
|
-
datasetId=self.constants.dataset_id,
|
|
242
|
-
incrementDatasetCatalogReceiptCount=True,
|
|
243
|
-
)
|
|
244
|
-
return CreateQualityReportMessage(body=body)
|
|
245
|
-
|
|
246
246
|
def run(self) -> None:
|
|
247
247
|
"""Run method for this task."""
|
|
248
248
|
with self.telemetry_span("Gather output data"):
|
|
249
249
|
frames = self.read(tags=self.output_frame_tags)
|
|
250
250
|
movies = self.read(tags=[Tag.output(), Tag.movie()])
|
|
251
|
+
quality_data = self.read(tags=[Tag.output(), Tag.quality_data()])
|
|
251
252
|
with self.telemetry_span("Create message objects"):
|
|
252
253
|
messages = []
|
|
253
254
|
messages += self.frame_messages(paths=frames)
|
|
@@ -256,7 +257,7 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
|
|
|
256
257
|
object_message_count = len(messages) - frame_message_count
|
|
257
258
|
dataset_has_quality_data = self.dataset_has_quality_data
|
|
258
259
|
if dataset_has_quality_data:
|
|
259
|
-
messages.
|
|
260
|
+
messages += self.object_messages(paths=quality_data, object_type="QDATA")
|
|
260
261
|
with self.telemetry_span(
|
|
261
262
|
f"Publish messages: {frame_message_count = }, {object_message_count = }, {dataset_has_quality_data = }"
|
|
262
263
|
):
|
|
@@ -9,6 +9,8 @@ from globus_sdk import ConfidentialAppAuthClient
|
|
|
9
9
|
from globus_sdk import GlobusError
|
|
10
10
|
from globus_sdk import TransferClient
|
|
11
11
|
from globus_sdk import TransferData
|
|
12
|
+
from globus_sdk.scopes import TransferScopes
|
|
13
|
+
from globus_sdk.transport import RetryConfig
|
|
12
14
|
|
|
13
15
|
from dkist_processing_common.config import common_configurations
|
|
14
16
|
|
|
@@ -31,27 +33,32 @@ class GlobusTransferItem:
|
|
|
31
33
|
class GlobusMixin:
|
|
32
34
|
"""Mixin to add methods to a Task to support globus transfers."""
|
|
33
35
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
36
|
+
def globus_transfer_client_factory(self, transfer_data: TransferData) -> TransferClient:
|
|
37
|
+
"""Create a globus transfer client based on the direction of transfer and round-robin the available application credentials."""
|
|
38
|
+
if (
|
|
39
|
+
transfer_data["source_endpoint"] == common_configurations.object_store_endpoint
|
|
40
|
+
): # inbound
|
|
41
|
+
client_credentials = common_configurations.globus_inbound_client_credentials
|
|
42
|
+
else: # outbound
|
|
43
|
+
client_credentials = common_configurations.globus_outbound_client_credentials
|
|
44
|
+
|
|
45
|
+
# Round-robin the client credentials based on the recipe run id
|
|
46
|
+
index = self.recipe_run_id % len(client_credentials)
|
|
47
|
+
selected_credential = client_credentials[index]
|
|
48
|
+
|
|
39
49
|
confidential_client = ConfidentialAppAuthClient(
|
|
40
|
-
client_id=
|
|
41
|
-
client_secret=
|
|
42
|
-
transport_params=common_configurations.globus_transport_params,
|
|
50
|
+
client_id=selected_credential.client_id,
|
|
51
|
+
client_secret=selected_credential.client_secret,
|
|
43
52
|
)
|
|
44
|
-
authorizer = ClientCredentialsAuthorizer(
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
return self._globus_transfer_client
|
|
53
|
+
authorizer = ClientCredentialsAuthorizer(confidential_client, scopes=TransferScopes)
|
|
54
|
+
retry_config = RetryConfig(max_retries=common_configurations.globus_max_retries)
|
|
55
|
+
|
|
56
|
+
return TransferClient(authorizer=authorizer, retry_config=retry_config)
|
|
49
57
|
|
|
50
58
|
def globus_transfer_scratch_to_object_store(
|
|
51
59
|
self,
|
|
52
60
|
transfer_items: list[GlobusTransferItem],
|
|
53
61
|
label: str = None,
|
|
54
|
-
sync_level: str = None,
|
|
55
62
|
verify_checksum: bool = True,
|
|
56
63
|
) -> None:
|
|
57
64
|
"""Transfer data from scratch to the object store."""
|
|
@@ -60,7 +67,6 @@ class GlobusMixin:
|
|
|
60
67
|
destination_endpoint=common_configurations.object_store_endpoint,
|
|
61
68
|
transfer_items=transfer_items,
|
|
62
69
|
label=label,
|
|
63
|
-
sync_level=sync_level,
|
|
64
70
|
verify_checksum=verify_checksum,
|
|
65
71
|
)
|
|
66
72
|
|
|
@@ -68,7 +74,6 @@ class GlobusMixin:
|
|
|
68
74
|
self,
|
|
69
75
|
transfer_items: list[GlobusTransferItem],
|
|
70
76
|
label: str = None,
|
|
71
|
-
sync_level: str = None,
|
|
72
77
|
verify_checksum: bool = True,
|
|
73
78
|
) -> None:
|
|
74
79
|
"""Transfer data from the object store to scratch."""
|
|
@@ -77,7 +82,6 @@ class GlobusMixin:
|
|
|
77
82
|
destination_endpoint=common_configurations.scratch_endpoint,
|
|
78
83
|
transfer_items=transfer_items,
|
|
79
84
|
label=label,
|
|
80
|
-
sync_level=sync_level,
|
|
81
85
|
verify_checksum=verify_checksum,
|
|
82
86
|
)
|
|
83
87
|
|
|
@@ -87,7 +91,6 @@ class GlobusMixin:
|
|
|
87
91
|
destination_endpoint: str,
|
|
88
92
|
transfer_items: list[GlobusTransferItem],
|
|
89
93
|
label: str = None,
|
|
90
|
-
sync_level: str = None,
|
|
91
94
|
verify_checksum: bool = True,
|
|
92
95
|
) -> TransferData:
|
|
93
96
|
"""Format a globus TransferData instance."""
|
|
@@ -95,7 +98,6 @@ class GlobusMixin:
|
|
|
95
98
|
source_endpoint=source_endpoint,
|
|
96
99
|
destination_endpoint=destination_endpoint,
|
|
97
100
|
label=label,
|
|
98
|
-
sync_level=sync_level,
|
|
99
101
|
verify_checksum=verify_checksum,
|
|
100
102
|
)
|
|
101
103
|
for item in transfer_items:
|
|
@@ -112,7 +114,6 @@ class GlobusMixin:
|
|
|
112
114
|
destination_endpoint: str,
|
|
113
115
|
transfer_items: list[GlobusTransferItem],
|
|
114
116
|
label: str = None,
|
|
115
|
-
sync_level: str = None,
|
|
116
117
|
verify_checksum: bool = True,
|
|
117
118
|
) -> None:
|
|
118
119
|
"""Perform a transfer of data using globus."""
|
|
@@ -121,7 +122,6 @@ class GlobusMixin:
|
|
|
121
122
|
destination_endpoint=destination_endpoint,
|
|
122
123
|
transfer_items=transfer_items,
|
|
123
124
|
label=label,
|
|
124
|
-
sync_level=sync_level,
|
|
125
125
|
verify_checksum=verify_checksum,
|
|
126
126
|
)
|
|
127
127
|
self._blocking_globus_transfer(transfer_data=transfer_data)
|
|
@@ -131,24 +131,21 @@ class GlobusMixin:
|
|
|
131
131
|
source_endpoint: str,
|
|
132
132
|
destination_endpoint: str,
|
|
133
133
|
label: str = None,
|
|
134
|
-
sync_level: str = None,
|
|
135
134
|
verify_checksum: bool = True,
|
|
136
135
|
) -> TransferData:
|
|
137
136
|
label = label or "Data Processing Transfer"
|
|
138
137
|
return TransferData(
|
|
139
|
-
transfer_client=self.globus_transfer_client,
|
|
140
138
|
source_endpoint=source_endpoint,
|
|
141
139
|
destination_endpoint=destination_endpoint,
|
|
142
140
|
label=label,
|
|
143
|
-
sync_level=sync_level,
|
|
144
141
|
verify_checksum=verify_checksum,
|
|
145
142
|
)
|
|
146
143
|
|
|
147
144
|
def _blocking_globus_transfer(self, transfer_data: TransferData) -> None:
|
|
148
|
-
tc = self.
|
|
149
|
-
logger.info(f"Starting globus transfer: label={transfer_data.get('label')}")
|
|
145
|
+
tc = self.globus_transfer_client_factory(transfer_data=transfer_data)
|
|
150
146
|
transfer_result = tc.submit_transfer(transfer_data)
|
|
151
147
|
task_id = transfer_result["task_id"]
|
|
148
|
+
logger.info(f"Starting globus transfer: label={transfer_data.get('label')}, {task_id=}, ")
|
|
152
149
|
polling_interval = 60
|
|
153
150
|
while not tc.task_wait(
|
|
154
151
|
task_id=task_id, timeout=polling_interval, polling_interval=polling_interval
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Mixin for a WorkflowDataTaskBase subclass which implements Metadata Store data access functionality."""
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
3
|
import logging
|
|
5
4
|
from functools import cached_property
|
|
6
5
|
from typing import Literal
|
|
@@ -8,15 +7,11 @@ from typing import Literal
|
|
|
8
7
|
from pydantic import validate_call
|
|
9
8
|
|
|
10
9
|
from dkist_processing_common._util.graphql import GraphQLClient
|
|
11
|
-
from dkist_processing_common.codecs.quality import QualityDataEncoder
|
|
12
10
|
from dkist_processing_common.config import common_configurations
|
|
13
11
|
from dkist_processing_common.models.graphql import DatasetCatalogReceiptAccountMutation
|
|
14
12
|
from dkist_processing_common.models.graphql import DatasetCatalogReceiptAccountResponse
|
|
15
13
|
from dkist_processing_common.models.graphql import InputDatasetPartResponse
|
|
16
14
|
from dkist_processing_common.models.graphql import InputDatasetRecipeRunResponse
|
|
17
|
-
from dkist_processing_common.models.graphql import QualitiesRequest
|
|
18
|
-
from dkist_processing_common.models.graphql import QualityCreation
|
|
19
|
-
from dkist_processing_common.models.graphql import QualityResponse
|
|
20
15
|
from dkist_processing_common.models.graphql import RecipeRunMutation
|
|
21
16
|
from dkist_processing_common.models.graphql import RecipeRunMutationResponse
|
|
22
17
|
from dkist_processing_common.models.graphql import RecipeRunProvenanceMutation
|
|
@@ -150,50 +145,6 @@ class MetadataStoreMixin:
|
|
|
150
145
|
mutation_response_cls=RecipeRunProvenanceResponse,
|
|
151
146
|
)
|
|
152
147
|
|
|
153
|
-
# QUALITY
|
|
154
|
-
|
|
155
|
-
def metadata_store_add_quality_data(self, dataset_id: str, quality_data: list[dict]):
|
|
156
|
-
"""Add the quality data to the metadata-store."""
|
|
157
|
-
if self.metadata_store_quality_data_exists(dataset_id):
|
|
158
|
-
raise RuntimeError(f"Quality data already persisted for dataset {dataset_id!r}")
|
|
159
|
-
for metric in quality_data:
|
|
160
|
-
if (metric_code := metric.get("metric_code")) is None:
|
|
161
|
-
name = metric.get("name")
|
|
162
|
-
raise ValueError(f"No metric_code for {name!r} in dataset {dataset_id!r}")
|
|
163
|
-
params = QualityCreation(
|
|
164
|
-
datasetId=dataset_id,
|
|
165
|
-
metricCode=metric_code,
|
|
166
|
-
facet=metric.get("facet"),
|
|
167
|
-
name=metric.get("name"),
|
|
168
|
-
description=metric.get("description"),
|
|
169
|
-
statement=metric.get("statement"),
|
|
170
|
-
# JSON array
|
|
171
|
-
warnings=json.dumps(metric.get("warnings")),
|
|
172
|
-
# JSON objects
|
|
173
|
-
plotData=json.dumps(metric.get("plot_data"), cls=QualityDataEncoder),
|
|
174
|
-
multiPlotData=json.dumps(metric.get("multi_plot_data"), cls=QualityDataEncoder),
|
|
175
|
-
tableData=json.dumps(metric.get("table_data"), cls=QualityDataEncoder),
|
|
176
|
-
histogramData=json.dumps(metric.get("histogram_data"), cls=QualityDataEncoder),
|
|
177
|
-
modmatData=json.dumps(metric.get("modmat_data"), cls=QualityDataEncoder),
|
|
178
|
-
raincloudData=json.dumps(metric.get("raincloud_data"), cls=QualityDataEncoder),
|
|
179
|
-
efficiencyData=json.dumps(metric.get("efficiency_data"), cls=QualityDataEncoder),
|
|
180
|
-
)
|
|
181
|
-
self.metadata_store_client.execute_gql_mutation(
|
|
182
|
-
mutation_base="createQuality",
|
|
183
|
-
mutation_parameters=params,
|
|
184
|
-
mutation_response_cls=QualityResponse,
|
|
185
|
-
)
|
|
186
|
-
|
|
187
|
-
def metadata_store_quality_data_exists(self, dataset_id: str) -> bool:
|
|
188
|
-
"""Return True if quality data exists in the metadata-store for the given dataset id."""
|
|
189
|
-
params = QualitiesRequest(datasetId=dataset_id)
|
|
190
|
-
response = self.metadata_store_client.execute_gql_query(
|
|
191
|
-
query_base="qualities",
|
|
192
|
-
query_response_cls=QualityResponse,
|
|
193
|
-
query_parameters=params,
|
|
194
|
-
)
|
|
195
|
-
return bool(response)
|
|
196
|
-
|
|
197
148
|
# INPUT DATASET RECIPE RUN
|
|
198
149
|
|
|
199
150
|
@cached_property
|
|
@@ -55,6 +55,27 @@ class ObjectStoreMixin:
|
|
|
55
55
|
},
|
|
56
56
|
)
|
|
57
57
|
|
|
58
|
+
def object_store_upload_quality_data(
|
|
59
|
+
self,
|
|
60
|
+
quality_data: Path | bytes,
|
|
61
|
+
bucket: str,
|
|
62
|
+
object_key: str,
|
|
63
|
+
content_type: str = "application/json",
|
|
64
|
+
):
|
|
65
|
+
"""Upload quality data to the object store."""
|
|
66
|
+
self.object_store_client.upload_object(
|
|
67
|
+
object_data=quality_data,
|
|
68
|
+
bucket=bucket,
|
|
69
|
+
object_key=object_key,
|
|
70
|
+
verify_checksum=True,
|
|
71
|
+
content_type=content_type,
|
|
72
|
+
metadata={
|
|
73
|
+
"groupname": "DATASET",
|
|
74
|
+
"groupid": self.constants.dataset_id,
|
|
75
|
+
"objecttype": "QDATA",
|
|
76
|
+
},
|
|
77
|
+
)
|
|
78
|
+
|
|
58
79
|
def object_store_remove_folder_objects(self, bucket: str, path: Path | str) -> list[str]:
|
|
59
80
|
"""
|
|
60
81
|
Remove folder objects (end with /) in the specified bucket and path.
|
|
@@ -1356,15 +1356,13 @@ class _WavecalQualityMixin:
|
|
|
1356
1356
|
Note that the residuals are the *unweighed* residuals.
|
|
1357
1357
|
"""
|
|
1358
1358
|
weight_data = np.ones(input_wavelength.size) if weights is None else weights
|
|
1359
|
-
prepared_weights =
|
|
1359
|
+
prepared_weights = fit_result.prepared_weights
|
|
1360
1360
|
residuals = fit_result.minimizer_result.residual / prepared_weights
|
|
1361
1361
|
residuals[~np.isfinite(residuals)] = 0.0
|
|
1362
|
-
best_fit_atlas = input_spectrum - residuals
|
|
1363
1362
|
normalized_residuals = residuals / input_spectrum
|
|
1364
1363
|
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
best_fit_wavelength = wcs.spectral.pixel_to_world(np.arange(input_spectrum.size))
|
|
1364
|
+
best_fit_atlas = fit_result.best_fit_atlas
|
|
1365
|
+
best_fit_wavelength = fit_result.best_fit_wavelength_vector
|
|
1368
1366
|
|
|
1369
1367
|
finite_idx = (
|
|
1370
1368
|
np.isfinite(input_wavelength)
|
|
@@ -1378,7 +1376,7 @@ class _WavecalQualityMixin:
|
|
|
1378
1376
|
data = {
|
|
1379
1377
|
"input_wavelength_nm": input_wavelength.to_value(u.nm)[finite_idx].tolist(),
|
|
1380
1378
|
"input_spectrum": input_spectrum[finite_idx].tolist(),
|
|
1381
|
-
"best_fit_wavelength_nm": best_fit_wavelength
|
|
1379
|
+
"best_fit_wavelength_nm": best_fit_wavelength[finite_idx].tolist(),
|
|
1382
1380
|
"best_fit_atlas": best_fit_atlas[finite_idx].tolist(),
|
|
1383
1381
|
"normalized_residuals": normalized_residuals[finite_idx].tolist(),
|
|
1384
1382
|
"weights": None if weights is None else weight_data[finite_idx].tolist(),
|