dkist-processing-common 11.7.0rc3__py3-none-any.whl → 11.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dkist_processing_common/config.py +28 -6
- dkist_processing_common/models/constants.py +12 -36
- dkist_processing_common/models/graphql.py +0 -33
- dkist_processing_common/parsers/observing_program_id_bud.py +24 -0
- dkist_processing_common/tasks/l1_output_data.py +38 -32
- dkist_processing_common/tasks/mixin/globus.py +23 -26
- dkist_processing_common/tasks/mixin/metadata_store.py +0 -49
- dkist_processing_common/tasks/mixin/object_store.py +21 -0
- dkist_processing_common/tasks/mixin/quality/_metrics.py +4 -6
- dkist_processing_common/tasks/parse_l0_input_data.py +9 -30
- dkist_processing_common/tasks/trial_catalog.py +49 -1
- dkist_processing_common/tasks/trial_output_data.py +1 -1
- dkist_processing_common/tests/mock_metadata_store.py +39 -4
- dkist_processing_common/tests/test_input_dataset.py +1 -37
- dkist_processing_common/tests/test_parse_l0_input_data.py +9 -3
- dkist_processing_common/tests/test_publish_catalog_messages.py +0 -21
- dkist_processing_common/tests/test_quality_mixin.py +11 -3
- dkist_processing_common/tests/test_stems.py +35 -0
- dkist_processing_common/tests/test_submit_dataset_metadata.py +1 -5
- dkist_processing_common/tests/test_trial_catalog.py +72 -2
- dkist_processing_common/tests/test_trial_output_data.py +1 -2
- {dkist_processing_common-11.7.0rc3.dist-info → dkist_processing_common-11.9.1.dist-info}/METADATA +17 -13
- {dkist_processing_common-11.7.0rc3.dist-info → dkist_processing_common-11.9.1.dist-info}/RECORD +25 -30
- changelog/267.feature.1.rst +0 -1
- changelog/267.feature.2.rst +0 -1
- changelog/267.feature.rst +0 -1
- changelog/267.misc.rst +0 -1
- changelog/267.removal.1.rst +0 -2
- changelog/267.removal.rst +0 -1
- {dkist_processing_common-11.7.0rc3.dist-info → dkist_processing_common-11.9.1.dist-info}/WHEEL +0 -0
- {dkist_processing_common-11.7.0rc3.dist-info → dkist_processing_common-11.9.1.dist-info}/top_level.txt +0 -0
|
@@ -3,12 +3,20 @@
|
|
|
3
3
|
from dkist_processing_core.config import DKISTProcessingCoreConfiguration
|
|
4
4
|
from dkist_service_configuration.settings import DEFAULT_MESH_SERVICE
|
|
5
5
|
from dkist_service_configuration.settings import MeshService
|
|
6
|
+
from pydantic import BaseModel
|
|
6
7
|
from pydantic import Field
|
|
7
8
|
from talus import ConnectionRetryerFactory
|
|
8
9
|
from talus import ConsumerConnectionParameterFactory
|
|
9
10
|
from talus import ProducerConnectionParameterFactory
|
|
10
11
|
|
|
11
12
|
|
|
13
|
+
class GlobusClientCredential(BaseModel):
|
|
14
|
+
"""Globus client credential."""
|
|
15
|
+
|
|
16
|
+
client_id: str = Field(..., description="Globus client ID for transfers.")
|
|
17
|
+
client_secret: str = Field(..., description="Globus client secret for transfers.")
|
|
18
|
+
|
|
19
|
+
|
|
12
20
|
class DKISTProcessingCommonConfiguration(DKISTProcessingCoreConfiguration):
|
|
13
21
|
"""Common configurations."""
|
|
14
22
|
|
|
@@ -40,14 +48,28 @@ class DKISTProcessingCommonConfiguration(DKISTProcessingCoreConfiguration):
|
|
|
40
48
|
default=None, description="S3 download configuration for the object store."
|
|
41
49
|
)
|
|
42
50
|
# globus
|
|
43
|
-
|
|
44
|
-
|
|
51
|
+
globus_max_retries: int = Field(
|
|
52
|
+
default=5, description="Max retries for transient errors on calls to the globus api."
|
|
45
53
|
)
|
|
46
|
-
|
|
47
|
-
|
|
54
|
+
globus_inbound_client_credentials: list[GlobusClientCredential] = Field(
|
|
55
|
+
default_factory=list,
|
|
56
|
+
description="Globus client credentials for inbound transfers.",
|
|
57
|
+
examples=[
|
|
58
|
+
[
|
|
59
|
+
{"client_id": "id1", "client_secret": "secret1"},
|
|
60
|
+
{"client_id": "id2", "client_secret": "secret2"},
|
|
61
|
+
],
|
|
62
|
+
],
|
|
48
63
|
)
|
|
49
|
-
|
|
50
|
-
|
|
64
|
+
globus_outbound_client_credentials: list[GlobusClientCredential] = Field(
|
|
65
|
+
default_factory=list,
|
|
66
|
+
description="Globus client credentials for outbound transfers.",
|
|
67
|
+
examples=[
|
|
68
|
+
[
|
|
69
|
+
{"client_id": "id3", "client_secret": "secret3"},
|
|
70
|
+
{"client_id": "id4", "client_secret": "secret4"},
|
|
71
|
+
],
|
|
72
|
+
],
|
|
51
73
|
)
|
|
52
74
|
object_store_endpoint: str | None = Field(
|
|
53
75
|
default=None, description="Object store Globus Endpoint ID."
|
|
@@ -44,25 +44,21 @@ class BudName(StrEnum):
|
|
|
44
44
|
software_binning_y = "SOFTWARE_BINNING_Y"
|
|
45
45
|
hls_version = "HLS_VERSION"
|
|
46
46
|
# Multi-task buds start here:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
47
|
+
dark_observing_program_execution_ids = "DARK_OBSERVING_PROGRAM_EXECUTION_IDS"
|
|
48
|
+
solar_gain_observing_program_execution_ids = "SOLAR_GAIN_OBSERVING_PROGRAM_EXECUTION_IDS"
|
|
49
|
+
polcal_observing_program_execution_ids = "POLCAL_OBSERVING_PROGRAM_EXECUTION_IDS"
|
|
50
50
|
dark_date_begin = "DARK_DATE_BEGIN"
|
|
51
51
|
solar_gain_date_begin = "SOLAR_GAIN_DATE_BEGIN"
|
|
52
52
|
polcal_date_begin = "POLCAL_DATE_BEGIN"
|
|
53
53
|
dark_date_end = "DARK_DATE_END"
|
|
54
54
|
solar_gain_date_end = "SOLAR_GAIN_DATE_END"
|
|
55
55
|
polcal_date_end = "POLCAL_DATE_END"
|
|
56
|
-
dark_num_raw_frames_per_fpa = "DARK_NUM_RAW_FRAMES_PER_FPA"
|
|
57
56
|
solar_gain_num_raw_frames_per_fpa = "SOLAR_GAIN_NUM_RAW_FRAMES_PER_FPA"
|
|
58
57
|
polcal_num_raw_frames_per_fpa = "POLCAL_NUM_RAW_FRAMES_PER_FPA"
|
|
59
|
-
dark_telescope_tracking_mode = "DARK_TELESCOPE_TRACKING_MODE"
|
|
60
58
|
solar_gain_telescope_tracking_mode = "SOLAR_GAIN_TELESCOPE_TRACKING_MODE"
|
|
61
59
|
polcal_telescope_tracking_mode = "POLCAL_TELESCOPE_TRACKING_MODE"
|
|
62
|
-
dark_coude_table_tracking_mode = "DARK_COUDE_TABLE_TRACKING_MODE"
|
|
63
60
|
solar_gain_coude_table_tracking_mode = "SOLAR_GAIN_COUDE_TABLE_TRACKING_MODE"
|
|
64
61
|
polcal_coude_table_tracking_mode = "POLCAL_COUDE_TABLE_TRACKING_MODE"
|
|
65
|
-
dark_telescope_scanning_mode = "DARK_TELESCOPE_SCANNING_MODE"
|
|
66
62
|
solar_gain_telescope_scanning_mode = "SOLAR_GAIN_TELESCOPE_SCANNING_MODE"
|
|
67
63
|
polcal_telescope_scanning_mode = "POLCAL_TELESCOPE_SCANNING_MODE"
|
|
68
64
|
dark_average_light_level = "DARK_AVERAGE_LIGHT_LEVEL"
|
|
@@ -285,21 +281,21 @@ class ConstantsBase:
|
|
|
285
281
|
# Multi-task constants start here:
|
|
286
282
|
|
|
287
283
|
@property
|
|
288
|
-
def
|
|
289
|
-
"""Return the observing program execution
|
|
290
|
-
observing_programs = self._db_dict[BudName.
|
|
284
|
+
def dark_observing_program_execution_ids(self) -> list[str]:
|
|
285
|
+
"""Return the observing program execution ids constant for the dark task."""
|
|
286
|
+
observing_programs = self._db_dict[BudName.dark_observing_program_execution_ids]
|
|
291
287
|
return list(observing_programs)
|
|
292
288
|
|
|
293
289
|
@property
|
|
294
|
-
def
|
|
295
|
-
"""Return the observing program execution
|
|
296
|
-
observing_programs = self._db_dict[BudName.
|
|
290
|
+
def solar_gain_observing_program_execution_ids(self) -> list[str]:
|
|
291
|
+
"""Return the observing program execution ids constant for the solar_gain task."""
|
|
292
|
+
observing_programs = self._db_dict[BudName.solar_gain_observing_program_execution_ids]
|
|
297
293
|
return list(observing_programs)
|
|
298
294
|
|
|
299
295
|
@property
|
|
300
|
-
def
|
|
301
|
-
"""Return the observing program execution
|
|
302
|
-
observing_programs = self._db_dict[BudName.
|
|
296
|
+
def polcal_observing_program_execution_ids(self) -> list[str]:
|
|
297
|
+
"""Return the observing program execution ids constant."""
|
|
298
|
+
observing_programs = self._db_dict[BudName.polcal_observing_program_execution_ids]
|
|
303
299
|
return list(observing_programs)
|
|
304
300
|
|
|
305
301
|
@property
|
|
@@ -332,11 +328,6 @@ class ConstantsBase:
|
|
|
332
328
|
"""Return the date end constant for the polcal task."""
|
|
333
329
|
return self._db_dict[BudName.polcal_date_end]
|
|
334
330
|
|
|
335
|
-
@property
|
|
336
|
-
def dark_num_raw_frames_per_fpa(self) -> int:
|
|
337
|
-
"""Return the number of raw frames per fpa constant for the dark task."""
|
|
338
|
-
return self._db_dict[BudName.dark_num_raw_frames_per_fpa]
|
|
339
|
-
|
|
340
331
|
@property
|
|
341
332
|
def solar_gain_num_raw_frames_per_fpa(self) -> int:
|
|
342
333
|
"""Return the number of raw frames per fpa constant for the solar gain task."""
|
|
@@ -347,11 +338,6 @@ class ConstantsBase:
|
|
|
347
338
|
"""Return the num raw frames per fpa constant for the polcal task."""
|
|
348
339
|
return self._db_dict[BudName.polcal_num_raw_frames_per_fpa]
|
|
349
340
|
|
|
350
|
-
@property
|
|
351
|
-
def dark_telescope_tracking_mode(self) -> str:
|
|
352
|
-
"""Return the telescope tracking mode constant for the dark task."""
|
|
353
|
-
return self._db_dict[BudName.dark_telescope_tracking_mode]
|
|
354
|
-
|
|
355
341
|
@property
|
|
356
342
|
def solar_gain_telescope_tracking_mode(self) -> str:
|
|
357
343
|
"""Return the telescope tracking mode constant for the solar gain task."""
|
|
@@ -362,11 +348,6 @@ class ConstantsBase:
|
|
|
362
348
|
"""Return the telescope tracking mode constant for the polcal task."""
|
|
363
349
|
return self._db_dict[BudName.polcal_telescope_tracking_mode]
|
|
364
350
|
|
|
365
|
-
@property
|
|
366
|
-
def dark_coude_table_tracking_mode(self) -> str:
|
|
367
|
-
"""Return the coude table tracking mode constant for the dark task."""
|
|
368
|
-
return self._db_dict[BudName.dark_coude_table_tracking_mode]
|
|
369
|
-
|
|
370
351
|
@property
|
|
371
352
|
def solar_gain_coude_table_tracking_mode(self) -> str:
|
|
372
353
|
"""Return the coude table tracking mode constant for the solar gain task."""
|
|
@@ -377,11 +358,6 @@ class ConstantsBase:
|
|
|
377
358
|
"""Return the coude table tracking mode constant for the polcal task."""
|
|
378
359
|
return self._db_dict[BudName.polcal_coude_table_tracking_mode]
|
|
379
360
|
|
|
380
|
-
@property
|
|
381
|
-
def dark_telescope_scanning_mode(self) -> str:
|
|
382
|
-
"""Return the telescope scanning mode constant for the dark task."""
|
|
383
|
-
return self._db_dict[BudName.dark_telescope_scanning_mode]
|
|
384
|
-
|
|
385
361
|
@property
|
|
386
362
|
def solar_gain_telescope_scanning_mode(self) -> str:
|
|
387
363
|
"""Return the telescope scanning mode constant for the solar gain task."""
|
|
@@ -176,36 +176,3 @@ class RecipeRunProvenanceMutation(GraphqlBaseModel):
|
|
|
176
176
|
libraryVersions: str
|
|
177
177
|
workflowVersion: str
|
|
178
178
|
codeVersion: str | None = None
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
class QualityCreation(GraphqlBaseModel):
|
|
182
|
-
"""Quality data creation record."""
|
|
183
|
-
|
|
184
|
-
datasetId: str
|
|
185
|
-
metricCode: str
|
|
186
|
-
facet: str | None = None
|
|
187
|
-
name: str | None = None
|
|
188
|
-
description: str | None = None
|
|
189
|
-
statement: str | None = None
|
|
190
|
-
# JSON array
|
|
191
|
-
warnings: str | None = None
|
|
192
|
-
# JSON objects
|
|
193
|
-
plotData: str | None = None
|
|
194
|
-
multiPlotData: str | None = None
|
|
195
|
-
tableData: str | None = None
|
|
196
|
-
histogramData: str | None = None
|
|
197
|
-
modmatData: str | None = None
|
|
198
|
-
raincloudData: str | None = None
|
|
199
|
-
efficiencyData: str | None = None
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
class QualitiesRequest(GraphqlBaseModel):
|
|
203
|
-
"""Query parameters for quality data."""
|
|
204
|
-
|
|
205
|
-
datasetId: str
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
class QualityResponse(GraphqlBaseModel):
|
|
209
|
-
"""Query Response for quality data."""
|
|
210
|
-
|
|
211
|
-
qualityId: int
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Observing Program Id parser."""
|
|
2
|
+
|
|
3
|
+
from typing import Callable
|
|
4
|
+
|
|
5
|
+
from dkist_processing_common.models.fits_access import MetadataKey
|
|
6
|
+
from dkist_processing_common.parsers.id_bud import TaskContributingIdsBud
|
|
7
|
+
from dkist_processing_common.parsers.task import passthrough_header_ip_task
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TaskContributingObservingProgramExecutionIdsBud(TaskContributingIdsBud):
|
|
11
|
+
"""Class to create a Bud for the supporting observing_program_execution_ids."""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
constant_name: str,
|
|
16
|
+
ip_task_types: str | list[str],
|
|
17
|
+
task_type_parsing_function: Callable = passthrough_header_ip_task,
|
|
18
|
+
):
|
|
19
|
+
super().__init__(
|
|
20
|
+
constant_name=constant_name,
|
|
21
|
+
metadata_key=MetadataKey.observing_program_execution_id,
|
|
22
|
+
ip_task_types=ip_task_types,
|
|
23
|
+
task_type_parsing_function=task_type_parsing_function,
|
|
24
|
+
)
|
|
@@ -38,8 +38,9 @@ class L1OutputDataBase(OutputDataBase, ABC):
|
|
|
38
38
|
|
|
39
39
|
@property
|
|
40
40
|
def dataset_has_quality_data(self) -> bool:
|
|
41
|
-
"""Return True if
|
|
42
|
-
|
|
41
|
+
"""Return True if the dataset has quality data."""
|
|
42
|
+
path_count = self.count(tags=[Tag.output(), Tag.quality_data()])
|
|
43
|
+
return path_count > 0
|
|
43
44
|
|
|
44
45
|
def rollback(self):
|
|
45
46
|
"""Warn that the metadata-store and the interservice bus retain the effect of this tasks execution. Rolling back this task may not be achievable without other action."""
|
|
@@ -58,6 +59,9 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
|
|
|
58
59
|
# Movie needs to be transferred separately as the movie headers need to go with it
|
|
59
60
|
self.transfer_movie()
|
|
60
61
|
|
|
62
|
+
with self.telemetry_span("Upload quality data"):
|
|
63
|
+
self.transfer_quality_data()
|
|
64
|
+
|
|
61
65
|
with self.telemetry_span("Upload science frames"):
|
|
62
66
|
self.transfer_output_frames()
|
|
63
67
|
|
|
@@ -101,6 +105,33 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
|
|
|
101
105
|
content_type="video/mp4",
|
|
102
106
|
)
|
|
103
107
|
|
|
108
|
+
def transfer_quality_data(self):
|
|
109
|
+
"""Transfer quality data to the object store."""
|
|
110
|
+
paths = list(self.read(tags=[Tag.output(), Tag.quality_data()]))
|
|
111
|
+
if len(paths) == 0:
|
|
112
|
+
logger.info(
|
|
113
|
+
f"No quality data found to upload for dataset. recipe_run_id={self.recipe_run_id}"
|
|
114
|
+
)
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
if count := len(paths) > 1:
|
|
118
|
+
# dataset inventory does not support multiple quality data object keys
|
|
119
|
+
raise RuntimeError(
|
|
120
|
+
f"Found multiple quality data files to upload. Not supported."
|
|
121
|
+
f"{count=}, recipe_run_id={self.recipe_run_id}"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
with self.telemetry_span(f"Uploading the trial quality data"):
|
|
125
|
+
path = paths[0]
|
|
126
|
+
logger.info(f"Uploading quality data: recipe_run_id={self.recipe_run_id}, {path=}")
|
|
127
|
+
quality_data_object_key = self.format_object_key(path)
|
|
128
|
+
self.object_store_upload_quality_data(
|
|
129
|
+
quality_data=path,
|
|
130
|
+
bucket=self.destination_bucket,
|
|
131
|
+
object_key=quality_data_object_key,
|
|
132
|
+
content_type="application/json",
|
|
133
|
+
)
|
|
134
|
+
|
|
104
135
|
|
|
105
136
|
class AssembleQualityData(L1OutputDataBase, QualityMixin):
|
|
106
137
|
"""
|
|
@@ -128,7 +159,7 @@ class AssembleQualityData(L1OutputDataBase, QualityMixin):
|
|
|
128
159
|
):
|
|
129
160
|
self.write(
|
|
130
161
|
quality_data,
|
|
131
|
-
tags=Tag.quality_data(),
|
|
162
|
+
tags=[Tag.output(), Tag.quality_data()],
|
|
132
163
|
encoder=quality_data_encoder,
|
|
133
164
|
relative_path=f"{self.constants.dataset_id}_quality_data.json",
|
|
134
165
|
)
|
|
@@ -136,31 +167,18 @@ class AssembleQualityData(L1OutputDataBase, QualityMixin):
|
|
|
136
167
|
|
|
137
168
|
class SubmitDatasetMetadata(L1OutputDataBase):
|
|
138
169
|
"""
|
|
139
|
-
Add
|
|
170
|
+
Add receipt account to the metadata store.
|
|
140
171
|
|
|
141
|
-
Add the quality data to the Quality database.
|
|
142
172
|
Add a Dataset Receipt Account record to Processing Support for use by the Dataset Catalog Locker.
|
|
143
|
-
Adds the number of files created during the calibration processing to the Processing Support table
|
|
173
|
+
Adds the number of files to be created during the calibration processing to the Processing Support table
|
|
144
174
|
for use by the Dataset Catalog Locker.
|
|
145
175
|
"""
|
|
146
176
|
|
|
147
177
|
def run(self) -> None:
|
|
148
178
|
"""Run method for this task."""
|
|
149
|
-
with self.telemetry_span(f"Storing quality data to metadata store"):
|
|
150
|
-
# each quality_data file is a list - this will combine the elements of multiple lists into a single list
|
|
151
|
-
quality_data = list(
|
|
152
|
-
chain.from_iterable(
|
|
153
|
-
self.read(tags=Tag.quality_data(), decoder=quality_data_decoder)
|
|
154
|
-
)
|
|
155
|
-
)
|
|
156
|
-
self.metadata_store_add_quality_data(
|
|
157
|
-
dataset_id=self.constants.dataset_id, quality_data=quality_data
|
|
158
|
-
)
|
|
159
179
|
with self.telemetry_span("Count Expected Outputs"):
|
|
160
180
|
dataset_id = self.constants.dataset_id
|
|
161
181
|
expected_object_count = self.count(tags=Tag.output())
|
|
162
|
-
if quality_data:
|
|
163
|
-
expected_object_count += 1
|
|
164
182
|
logger.info(
|
|
165
183
|
f"Adding Dataset Receipt Account: "
|
|
166
184
|
f"{dataset_id=}, {expected_object_count=}, recipe_run_id={self.recipe_run_id}"
|
|
@@ -230,24 +248,12 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
|
|
|
230
248
|
messages = [CatalogObjectMessage(body=body) for body in message_bodies]
|
|
231
249
|
return messages
|
|
232
250
|
|
|
233
|
-
@property
|
|
234
|
-
def quality_report_message(self) -> CreateQualityReportMessage:
|
|
235
|
-
"""Create the Quality Report Message."""
|
|
236
|
-
file_name = Path(f"{self.constants.dataset_id}_quality_report.pdf")
|
|
237
|
-
body = CreateQualityReportMessageBody(
|
|
238
|
-
bucket=self.destination_bucket,
|
|
239
|
-
objectName=self.format_object_key(file_name),
|
|
240
|
-
conversationId=str(self.recipe_run_id),
|
|
241
|
-
datasetId=self.constants.dataset_id,
|
|
242
|
-
incrementDatasetCatalogReceiptCount=True,
|
|
243
|
-
)
|
|
244
|
-
return CreateQualityReportMessage(body=body)
|
|
245
|
-
|
|
246
251
|
def run(self) -> None:
|
|
247
252
|
"""Run method for this task."""
|
|
248
253
|
with self.telemetry_span("Gather output data"):
|
|
249
254
|
frames = self.read(tags=self.output_frame_tags)
|
|
250
255
|
movies = self.read(tags=[Tag.output(), Tag.movie()])
|
|
256
|
+
quality_data = self.read(tags=[Tag.output(), Tag.quality_data()])
|
|
251
257
|
with self.telemetry_span("Create message objects"):
|
|
252
258
|
messages = []
|
|
253
259
|
messages += self.frame_messages(paths=frames)
|
|
@@ -256,7 +262,7 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
|
|
|
256
262
|
object_message_count = len(messages) - frame_message_count
|
|
257
263
|
dataset_has_quality_data = self.dataset_has_quality_data
|
|
258
264
|
if dataset_has_quality_data:
|
|
259
|
-
messages.
|
|
265
|
+
messages += self.object_messages(paths=quality_data, object_type="QDATA")
|
|
260
266
|
with self.telemetry_span(
|
|
261
267
|
f"Publish messages: {frame_message_count = }, {object_message_count = }, {dataset_has_quality_data = }"
|
|
262
268
|
):
|
|
@@ -9,6 +9,8 @@ from globus_sdk import ConfidentialAppAuthClient
|
|
|
9
9
|
from globus_sdk import GlobusError
|
|
10
10
|
from globus_sdk import TransferClient
|
|
11
11
|
from globus_sdk import TransferData
|
|
12
|
+
from globus_sdk.scopes import TransferScopes
|
|
13
|
+
from globus_sdk.transport import RetryConfig
|
|
12
14
|
|
|
13
15
|
from dkist_processing_common.config import common_configurations
|
|
14
16
|
|
|
@@ -31,27 +33,32 @@ class GlobusTransferItem:
|
|
|
31
33
|
class GlobusMixin:
|
|
32
34
|
"""Mixin to add methods to a Task to support globus transfers."""
|
|
33
35
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
36
|
+
def globus_transfer_client_factory(self, transfer_data: TransferData) -> TransferClient:
|
|
37
|
+
"""Create a globus transfer client based on the direction of transfer and round-robin the available application credentials."""
|
|
38
|
+
if (
|
|
39
|
+
transfer_data["source_endpoint"] == common_configurations.object_store_endpoint
|
|
40
|
+
): # inbound
|
|
41
|
+
client_credentials = common_configurations.globus_inbound_client_credentials
|
|
42
|
+
else: # outbound
|
|
43
|
+
client_credentials = common_configurations.globus_outbound_client_credentials
|
|
44
|
+
|
|
45
|
+
# Round-robin the client credentials based on the recipe run id
|
|
46
|
+
index = self.recipe_run_id % len(client_credentials)
|
|
47
|
+
selected_credential = client_credentials[index]
|
|
48
|
+
|
|
39
49
|
confidential_client = ConfidentialAppAuthClient(
|
|
40
|
-
client_id=
|
|
41
|
-
client_secret=
|
|
42
|
-
transport_params=common_configurations.globus_transport_params,
|
|
50
|
+
client_id=selected_credential.client_id,
|
|
51
|
+
client_secret=selected_credential.client_secret,
|
|
43
52
|
)
|
|
44
|
-
authorizer = ClientCredentialsAuthorizer(
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
return self._globus_transfer_client
|
|
53
|
+
authorizer = ClientCredentialsAuthorizer(confidential_client, scopes=TransferScopes)
|
|
54
|
+
retry_config = RetryConfig(max_retries=common_configurations.globus_max_retries)
|
|
55
|
+
|
|
56
|
+
return TransferClient(authorizer=authorizer, retry_config=retry_config)
|
|
49
57
|
|
|
50
58
|
def globus_transfer_scratch_to_object_store(
|
|
51
59
|
self,
|
|
52
60
|
transfer_items: list[GlobusTransferItem],
|
|
53
61
|
label: str = None,
|
|
54
|
-
sync_level: str = None,
|
|
55
62
|
verify_checksum: bool = True,
|
|
56
63
|
) -> None:
|
|
57
64
|
"""Transfer data from scratch to the object store."""
|
|
@@ -60,7 +67,6 @@ class GlobusMixin:
|
|
|
60
67
|
destination_endpoint=common_configurations.object_store_endpoint,
|
|
61
68
|
transfer_items=transfer_items,
|
|
62
69
|
label=label,
|
|
63
|
-
sync_level=sync_level,
|
|
64
70
|
verify_checksum=verify_checksum,
|
|
65
71
|
)
|
|
66
72
|
|
|
@@ -68,7 +74,6 @@ class GlobusMixin:
|
|
|
68
74
|
self,
|
|
69
75
|
transfer_items: list[GlobusTransferItem],
|
|
70
76
|
label: str = None,
|
|
71
|
-
sync_level: str = None,
|
|
72
77
|
verify_checksum: bool = True,
|
|
73
78
|
) -> None:
|
|
74
79
|
"""Transfer data from the object store to scratch."""
|
|
@@ -77,7 +82,6 @@ class GlobusMixin:
|
|
|
77
82
|
destination_endpoint=common_configurations.scratch_endpoint,
|
|
78
83
|
transfer_items=transfer_items,
|
|
79
84
|
label=label,
|
|
80
|
-
sync_level=sync_level,
|
|
81
85
|
verify_checksum=verify_checksum,
|
|
82
86
|
)
|
|
83
87
|
|
|
@@ -87,7 +91,6 @@ class GlobusMixin:
|
|
|
87
91
|
destination_endpoint: str,
|
|
88
92
|
transfer_items: list[GlobusTransferItem],
|
|
89
93
|
label: str = None,
|
|
90
|
-
sync_level: str = None,
|
|
91
94
|
verify_checksum: bool = True,
|
|
92
95
|
) -> TransferData:
|
|
93
96
|
"""Format a globus TransferData instance."""
|
|
@@ -95,7 +98,6 @@ class GlobusMixin:
|
|
|
95
98
|
source_endpoint=source_endpoint,
|
|
96
99
|
destination_endpoint=destination_endpoint,
|
|
97
100
|
label=label,
|
|
98
|
-
sync_level=sync_level,
|
|
99
101
|
verify_checksum=verify_checksum,
|
|
100
102
|
)
|
|
101
103
|
for item in transfer_items:
|
|
@@ -112,7 +114,6 @@ class GlobusMixin:
|
|
|
112
114
|
destination_endpoint: str,
|
|
113
115
|
transfer_items: list[GlobusTransferItem],
|
|
114
116
|
label: str = None,
|
|
115
|
-
sync_level: str = None,
|
|
116
117
|
verify_checksum: bool = True,
|
|
117
118
|
) -> None:
|
|
118
119
|
"""Perform a transfer of data using globus."""
|
|
@@ -121,7 +122,6 @@ class GlobusMixin:
|
|
|
121
122
|
destination_endpoint=destination_endpoint,
|
|
122
123
|
transfer_items=transfer_items,
|
|
123
124
|
label=label,
|
|
124
|
-
sync_level=sync_level,
|
|
125
125
|
verify_checksum=verify_checksum,
|
|
126
126
|
)
|
|
127
127
|
self._blocking_globus_transfer(transfer_data=transfer_data)
|
|
@@ -131,24 +131,21 @@ class GlobusMixin:
|
|
|
131
131
|
source_endpoint: str,
|
|
132
132
|
destination_endpoint: str,
|
|
133
133
|
label: str = None,
|
|
134
|
-
sync_level: str = None,
|
|
135
134
|
verify_checksum: bool = True,
|
|
136
135
|
) -> TransferData:
|
|
137
136
|
label = label or "Data Processing Transfer"
|
|
138
137
|
return TransferData(
|
|
139
|
-
transfer_client=self.globus_transfer_client,
|
|
140
138
|
source_endpoint=source_endpoint,
|
|
141
139
|
destination_endpoint=destination_endpoint,
|
|
142
140
|
label=label,
|
|
143
|
-
sync_level=sync_level,
|
|
144
141
|
verify_checksum=verify_checksum,
|
|
145
142
|
)
|
|
146
143
|
|
|
147
144
|
def _blocking_globus_transfer(self, transfer_data: TransferData) -> None:
|
|
148
|
-
tc = self.
|
|
149
|
-
logger.info(f"Starting globus transfer: label={transfer_data.get('label')}")
|
|
145
|
+
tc = self.globus_transfer_client_factory(transfer_data=transfer_data)
|
|
150
146
|
transfer_result = tc.submit_transfer(transfer_data)
|
|
151
147
|
task_id = transfer_result["task_id"]
|
|
148
|
+
logger.info(f"Starting globus transfer: label={transfer_data.get('label')}, {task_id=}, ")
|
|
152
149
|
polling_interval = 60
|
|
153
150
|
while not tc.task_wait(
|
|
154
151
|
task_id=task_id, timeout=polling_interval, polling_interval=polling_interval
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
"""Mixin for a WorkflowDataTaskBase subclass which implements Metadata Store data access functionality."""
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
3
|
import logging
|
|
5
4
|
from functools import cached_property
|
|
6
5
|
from typing import Literal
|
|
@@ -8,15 +7,11 @@ from typing import Literal
|
|
|
8
7
|
from pydantic import validate_call
|
|
9
8
|
|
|
10
9
|
from dkist_processing_common._util.graphql import GraphQLClient
|
|
11
|
-
from dkist_processing_common.codecs.quality import QualityDataEncoder
|
|
12
10
|
from dkist_processing_common.config import common_configurations
|
|
13
11
|
from dkist_processing_common.models.graphql import DatasetCatalogReceiptAccountMutation
|
|
14
12
|
from dkist_processing_common.models.graphql import DatasetCatalogReceiptAccountResponse
|
|
15
13
|
from dkist_processing_common.models.graphql import InputDatasetPartResponse
|
|
16
14
|
from dkist_processing_common.models.graphql import InputDatasetRecipeRunResponse
|
|
17
|
-
from dkist_processing_common.models.graphql import QualitiesRequest
|
|
18
|
-
from dkist_processing_common.models.graphql import QualityCreation
|
|
19
|
-
from dkist_processing_common.models.graphql import QualityResponse
|
|
20
15
|
from dkist_processing_common.models.graphql import RecipeRunMutation
|
|
21
16
|
from dkist_processing_common.models.graphql import RecipeRunMutationResponse
|
|
22
17
|
from dkist_processing_common.models.graphql import RecipeRunProvenanceMutation
|
|
@@ -150,50 +145,6 @@ class MetadataStoreMixin:
|
|
|
150
145
|
mutation_response_cls=RecipeRunProvenanceResponse,
|
|
151
146
|
)
|
|
152
147
|
|
|
153
|
-
# QUALITY
|
|
154
|
-
|
|
155
|
-
def metadata_store_add_quality_data(self, dataset_id: str, quality_data: list[dict]):
|
|
156
|
-
"""Add the quality data to the metadata-store."""
|
|
157
|
-
if self.metadata_store_quality_data_exists(dataset_id):
|
|
158
|
-
raise RuntimeError(f"Quality data already persisted for dataset {dataset_id!r}")
|
|
159
|
-
for metric in quality_data:
|
|
160
|
-
if (metric_code := metric.get("metric_code")) is None:
|
|
161
|
-
name = metric.get("name")
|
|
162
|
-
raise ValueError(f"No metric_code for {name!r} in dataset {dataset_id!r}")
|
|
163
|
-
params = QualityCreation(
|
|
164
|
-
datasetId=dataset_id,
|
|
165
|
-
metricCode=metric_code,
|
|
166
|
-
facet=metric.get("facet"),
|
|
167
|
-
name=metric.get("name"),
|
|
168
|
-
description=metric.get("description"),
|
|
169
|
-
statement=metric.get("statement"),
|
|
170
|
-
# JSON array
|
|
171
|
-
warnings=json.dumps(metric.get("warnings")),
|
|
172
|
-
# JSON objects
|
|
173
|
-
plotData=json.dumps(metric.get("plot_data"), cls=QualityDataEncoder),
|
|
174
|
-
multiPlotData=json.dumps(metric.get("multi_plot_data"), cls=QualityDataEncoder),
|
|
175
|
-
tableData=json.dumps(metric.get("table_data"), cls=QualityDataEncoder),
|
|
176
|
-
histogramData=json.dumps(metric.get("histogram_data"), cls=QualityDataEncoder),
|
|
177
|
-
modmatData=json.dumps(metric.get("modmat_data"), cls=QualityDataEncoder),
|
|
178
|
-
raincloudData=json.dumps(metric.get("raincloud_data"), cls=QualityDataEncoder),
|
|
179
|
-
efficiencyData=json.dumps(metric.get("efficiency_data"), cls=QualityDataEncoder),
|
|
180
|
-
)
|
|
181
|
-
self.metadata_store_client.execute_gql_mutation(
|
|
182
|
-
mutation_base="createQuality",
|
|
183
|
-
mutation_parameters=params,
|
|
184
|
-
mutation_response_cls=QualityResponse,
|
|
185
|
-
)
|
|
186
|
-
|
|
187
|
-
def metadata_store_quality_data_exists(self, dataset_id: str) -> bool:
|
|
188
|
-
"""Return True if quality data exists in the metadata-store for the given dataset id."""
|
|
189
|
-
params = QualitiesRequest(datasetId=dataset_id)
|
|
190
|
-
response = self.metadata_store_client.execute_gql_query(
|
|
191
|
-
query_base="qualities",
|
|
192
|
-
query_response_cls=QualityResponse,
|
|
193
|
-
query_parameters=params,
|
|
194
|
-
)
|
|
195
|
-
return bool(response)
|
|
196
|
-
|
|
197
148
|
# INPUT DATASET RECIPE RUN
|
|
198
149
|
|
|
199
150
|
@cached_property
|
|
@@ -55,6 +55,27 @@ class ObjectStoreMixin:
|
|
|
55
55
|
},
|
|
56
56
|
)
|
|
57
57
|
|
|
58
|
+
def object_store_upload_quality_data(
|
|
59
|
+
self,
|
|
60
|
+
quality_data: Path | bytes,
|
|
61
|
+
bucket: str,
|
|
62
|
+
object_key: str,
|
|
63
|
+
content_type: str = "application/json",
|
|
64
|
+
):
|
|
65
|
+
"""Upload quality data to the object store."""
|
|
66
|
+
self.object_store_client.upload_object(
|
|
67
|
+
object_data=quality_data,
|
|
68
|
+
bucket=bucket,
|
|
69
|
+
object_key=object_key,
|
|
70
|
+
verify_checksum=True,
|
|
71
|
+
content_type=content_type,
|
|
72
|
+
metadata={
|
|
73
|
+
"groupname": "DATASET",
|
|
74
|
+
"groupid": self.constants.dataset_id,
|
|
75
|
+
"objecttype": "QDATA",
|
|
76
|
+
},
|
|
77
|
+
)
|
|
78
|
+
|
|
58
79
|
def object_store_remove_folder_objects(self, bucket: str, path: Path | str) -> list[str]:
|
|
59
80
|
"""
|
|
60
81
|
Remove folder objects (end with /) in the specified bucket and path.
|
|
@@ -1356,15 +1356,13 @@ class _WavecalQualityMixin:
|
|
|
1356
1356
|
Note that the residuals are the *unweighed* residuals.
|
|
1357
1357
|
"""
|
|
1358
1358
|
weight_data = np.ones(input_wavelength.size) if weights is None else weights
|
|
1359
|
-
prepared_weights =
|
|
1359
|
+
prepared_weights = fit_result.prepared_weights
|
|
1360
1360
|
residuals = fit_result.minimizer_result.residual / prepared_weights
|
|
1361
1361
|
residuals[~np.isfinite(residuals)] = 0.0
|
|
1362
|
-
best_fit_atlas = input_spectrum - residuals
|
|
1363
1362
|
normalized_residuals = residuals / input_spectrum
|
|
1364
1363
|
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
best_fit_wavelength = wcs.spectral.pixel_to_world(np.arange(input_spectrum.size))
|
|
1364
|
+
best_fit_atlas = fit_result.best_fit_atlas
|
|
1365
|
+
best_fit_wavelength = fit_result.best_fit_wavelength_vector
|
|
1368
1366
|
|
|
1369
1367
|
finite_idx = (
|
|
1370
1368
|
np.isfinite(input_wavelength)
|
|
@@ -1378,7 +1376,7 @@ class _WavecalQualityMixin:
|
|
|
1378
1376
|
data = {
|
|
1379
1377
|
"input_wavelength_nm": input_wavelength.to_value(u.nm)[finite_idx].tolist(),
|
|
1380
1378
|
"input_spectrum": input_spectrum[finite_idx].tolist(),
|
|
1381
|
-
"best_fit_wavelength_nm": best_fit_wavelength
|
|
1379
|
+
"best_fit_wavelength_nm": best_fit_wavelength[finite_idx].tolist(),
|
|
1382
1380
|
"best_fit_atlas": best_fit_atlas[finite_idx].tolist(),
|
|
1383
1381
|
"normalized_residuals": normalized_residuals[finite_idx].tolist(),
|
|
1384
1382
|
"weights": None if weights is None else weight_data[finite_idx].tolist(),
|