dkist-processing-common 10.5.4__py3-none-any.whl → 12.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- changelog/280.misc.rst +1 -0
- changelog/282.feature.2.rst +2 -0
- changelog/282.feature.rst +2 -0
- changelog/284.feature.rst +1 -0
- changelog/285.feature.rst +2 -0
- changelog/285.misc.rst +2 -0
- changelog/286.feature.rst +2 -0
- changelog/287.misc.rst +1 -0
- dkist_processing_common/__init__.py +1 -0
- dkist_processing_common/_util/constants.py +1 -0
- dkist_processing_common/_util/graphql.py +1 -0
- dkist_processing_common/_util/scratch.py +9 -9
- dkist_processing_common/_util/tags.py +1 -0
- dkist_processing_common/codecs/array.py +20 -0
- dkist_processing_common/codecs/asdf.py +9 -3
- dkist_processing_common/codecs/basemodel.py +22 -0
- dkist_processing_common/codecs/bytes.py +1 -0
- dkist_processing_common/codecs/fits.py +37 -9
- dkist_processing_common/codecs/iobase.py +1 -0
- dkist_processing_common/codecs/json.py +1 -0
- dkist_processing_common/codecs/path.py +1 -0
- dkist_processing_common/codecs/quality.py +1 -1
- dkist_processing_common/codecs/str.py +1 -0
- dkist_processing_common/config.py +64 -25
- dkist_processing_common/manual.py +6 -8
- dkist_processing_common/models/constants.py +373 -37
- dkist_processing_common/models/dkist_location.py +27 -0
- dkist_processing_common/models/fits_access.py +48 -0
- dkist_processing_common/models/flower_pot.py +231 -9
- dkist_processing_common/models/fried_parameter.py +41 -0
- dkist_processing_common/models/graphql.py +66 -75
- dkist_processing_common/models/input_dataset.py +117 -0
- dkist_processing_common/models/message.py +1 -1
- dkist_processing_common/models/message_queue_binding.py +1 -1
- dkist_processing_common/models/metric_code.py +2 -0
- dkist_processing_common/models/parameters.py +65 -28
- dkist_processing_common/models/quality.py +50 -5
- dkist_processing_common/models/tags.py +23 -21
- dkist_processing_common/models/task_name.py +3 -2
- dkist_processing_common/models/telemetry.py +28 -0
- dkist_processing_common/models/wavelength.py +3 -1
- dkist_processing_common/parsers/average_bud.py +46 -0
- dkist_processing_common/parsers/cs_step.py +13 -12
- dkist_processing_common/parsers/dsps_repeat.py +6 -4
- dkist_processing_common/parsers/experiment_id_bud.py +12 -4
- dkist_processing_common/parsers/id_bud.py +42 -27
- dkist_processing_common/parsers/l0_fits_access.py +5 -3
- dkist_processing_common/parsers/l1_fits_access.py +51 -23
- dkist_processing_common/parsers/lookup_bud.py +125 -0
- dkist_processing_common/parsers/near_bud.py +21 -20
- dkist_processing_common/parsers/observing_program_id_bud.py +24 -0
- dkist_processing_common/parsers/proposal_id_bud.py +13 -5
- dkist_processing_common/parsers/quality.py +2 -0
- dkist_processing_common/parsers/retarder.py +32 -0
- dkist_processing_common/parsers/single_value_single_key_flower.py +6 -1
- dkist_processing_common/parsers/task.py +8 -6
- dkist_processing_common/parsers/time.py +178 -72
- dkist_processing_common/parsers/unique_bud.py +21 -22
- dkist_processing_common/parsers/wavelength.py +5 -3
- dkist_processing_common/tasks/__init__.py +3 -2
- dkist_processing_common/tasks/assemble_movie.py +4 -3
- dkist_processing_common/tasks/base.py +59 -60
- dkist_processing_common/tasks/l1_output_data.py +54 -53
- dkist_processing_common/tasks/mixin/globus.py +24 -27
- dkist_processing_common/tasks/mixin/interservice_bus.py +1 -0
- dkist_processing_common/tasks/mixin/metadata_store.py +108 -243
- dkist_processing_common/tasks/mixin/object_store.py +22 -0
- dkist_processing_common/tasks/mixin/quality/__init__.py +1 -0
- dkist_processing_common/tasks/mixin/quality/_base.py +8 -1
- dkist_processing_common/tasks/mixin/quality/_metrics.py +166 -14
- dkist_processing_common/tasks/output_data_base.py +4 -3
- dkist_processing_common/tasks/parse_l0_input_data.py +277 -15
- dkist_processing_common/tasks/quality_metrics.py +9 -9
- dkist_processing_common/tasks/teardown.py +7 -7
- dkist_processing_common/tasks/transfer_input_data.py +67 -69
- dkist_processing_common/tasks/trial_catalog.py +77 -17
- dkist_processing_common/tasks/trial_output_data.py +16 -17
- dkist_processing_common/tasks/write_l1.py +102 -72
- dkist_processing_common/tests/conftest.py +32 -173
- dkist_processing_common/tests/mock_metadata_store.py +271 -0
- dkist_processing_common/tests/test_assemble_movie.py +4 -4
- dkist_processing_common/tests/test_assemble_quality.py +32 -4
- dkist_processing_common/tests/test_base.py +5 -19
- dkist_processing_common/tests/test_codecs.py +103 -12
- dkist_processing_common/tests/test_constants.py +15 -0
- dkist_processing_common/tests/test_dkist_location.py +15 -0
- dkist_processing_common/tests/test_fits_access.py +56 -19
- dkist_processing_common/tests/test_flower_pot.py +147 -5
- dkist_processing_common/tests/test_fried_parameter.py +27 -0
- dkist_processing_common/tests/test_input_dataset.py +78 -361
- dkist_processing_common/tests/test_interservice_bus.py +1 -0
- dkist_processing_common/tests/test_interservice_bus_mixin.py +1 -1
- dkist_processing_common/tests/test_manual_processing.py +33 -0
- dkist_processing_common/tests/test_output_data_base.py +5 -7
- dkist_processing_common/tests/test_parameters.py +71 -22
- dkist_processing_common/tests/test_parse_l0_input_data.py +115 -32
- dkist_processing_common/tests/test_publish_catalog_messages.py +2 -24
- dkist_processing_common/tests/test_quality.py +1 -0
- dkist_processing_common/tests/test_quality_mixin.py +255 -23
- dkist_processing_common/tests/test_scratch.py +2 -1
- dkist_processing_common/tests/test_stems.py +511 -168
- dkist_processing_common/tests/test_submit_dataset_metadata.py +3 -7
- dkist_processing_common/tests/test_tags.py +1 -0
- dkist_processing_common/tests/test_task_name.py +1 -1
- dkist_processing_common/tests/test_task_parsing.py +17 -7
- dkist_processing_common/tests/test_teardown.py +28 -24
- dkist_processing_common/tests/test_transfer_input_data.py +270 -125
- dkist_processing_common/tests/test_transfer_l1_output_data.py +2 -3
- dkist_processing_common/tests/test_trial_catalog.py +83 -8
- dkist_processing_common/tests/test_trial_output_data.py +46 -73
- dkist_processing_common/tests/test_workflow_task_base.py +8 -10
- dkist_processing_common/tests/test_write_l1.py +298 -76
- dkist_processing_common-12.1.0rc1.dist-info/METADATA +265 -0
- dkist_processing_common-12.1.0rc1.dist-info/RECORD +134 -0
- {dkist_processing_common-10.5.4.dist-info → dkist_processing_common-12.1.0rc1.dist-info}/WHEEL +1 -1
- docs/conf.py +1 -0
- docs/index.rst +1 -1
- docs/landing_page.rst +13 -0
- dkist_processing_common/tasks/mixin/input_dataset.py +0 -166
- dkist_processing_common-10.5.4.dist-info/METADATA +0 -175
- dkist_processing_common-10.5.4.dist-info/RECORD +0 -112
- {dkist_processing_common-10.5.4.dist-info → dkist_processing_common-12.1.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
"""Wrappers for all workflow tasks."""
|
|
2
|
+
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
4
5
|
import re
|
|
5
6
|
from abc import ABC
|
|
7
|
+
from importlib import metadata
|
|
6
8
|
from pathlib import Path
|
|
7
|
-
from types import NoneType
|
|
8
9
|
from typing import Any
|
|
9
10
|
from typing import Generator
|
|
10
11
|
from typing import Iterable
|
|
11
12
|
from typing import Type
|
|
12
13
|
|
|
13
|
-
import pkg_resources
|
|
14
14
|
from dkist_processing_core import TaskBase
|
|
15
|
+
from opentelemetry.metrics import CallbackOptions
|
|
16
|
+
from opentelemetry.metrics import Counter
|
|
17
|
+
from opentelemetry.metrics import ObservableGauge
|
|
18
|
+
from opentelemetry.metrics import Observation
|
|
15
19
|
|
|
16
20
|
from dkist_processing_common._util.scratch import WorkflowFileSystem
|
|
17
21
|
from dkist_processing_common._util.tags import TagDB
|
|
@@ -21,6 +25,7 @@ from dkist_processing_common.config import common_configurations
|
|
|
21
25
|
from dkist_processing_common.models.constants import ConstantsBase
|
|
22
26
|
from dkist_processing_common.models.tags import StemName
|
|
23
27
|
from dkist_processing_common.models.tags import Tag
|
|
28
|
+
from dkist_processing_common.models.telemetry import ObservableProgress
|
|
24
29
|
from dkist_processing_common.tasks.mixin.metadata_store import MetadataStoreMixin
|
|
25
30
|
|
|
26
31
|
__all__ = ["WorkflowTaskBase", "tag_type_hint"]
|
|
@@ -66,7 +71,6 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
|
|
|
66
71
|
workflow_name=workflow_name,
|
|
67
72
|
workflow_version=workflow_version,
|
|
68
73
|
)
|
|
69
|
-
self.task_name = self.__class__.__name__
|
|
70
74
|
self.scratch = WorkflowFileSystem(recipe_run_id=recipe_run_id, task_name=self.task_name)
|
|
71
75
|
self.constants = self.constants_model_class(
|
|
72
76
|
recipe_run_id=recipe_run_id, task_name=self.task_name
|
|
@@ -76,50 +80,30 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
|
|
|
76
80
|
recipe_run_id=recipe_run_id, task_name=self.task_name, namespace="counter"
|
|
77
81
|
)
|
|
78
82
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
arg_span_type: str = None,
|
|
85
|
-
arg_labels: dict[str, str] = None,
|
|
86
|
-
**kwargs,
|
|
87
|
-
):
|
|
88
|
-
"""Groom inputs to apm_step to handle various kwarg collisions."""
|
|
89
|
-
if "span_type" in kwargs:
|
|
90
|
-
raise RuntimeError(
|
|
91
|
-
f"Cannot specify 'span_type' {kwargs['span_type']} in step that forces is it to be {arg_span_type}"
|
|
92
|
-
)
|
|
93
|
-
|
|
94
|
-
if "labels" in kwargs:
|
|
95
|
-
arg_labels.update(kwargs["labels"])
|
|
96
|
-
del kwargs["labels"]
|
|
97
|
-
logger.info(
|
|
98
|
-
f"Recording APM span: {name = }, {arg_span_type = }, {arg_labels = }, "
|
|
99
|
-
f"recipe_run_id = {self.recipe_run_id}"
|
|
83
|
+
# meter instruments
|
|
84
|
+
self.read_counter: Counter = self.meter.create_counter(
|
|
85
|
+
name=self.format_metric_name("tasks.reads"),
|
|
86
|
+
unit="1",
|
|
87
|
+
description="The number of reads executed in the processing stack.",
|
|
100
88
|
)
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
return self.apm_type_base(
|
|
106
|
-
name, *args, arg_span_type="code.task", arg_labels={"type": "task"}, **kwargs
|
|
89
|
+
self.write_counter: Counter = self.meter.create_counter(
|
|
90
|
+
name=self.format_metric_name("tasks.writes"),
|
|
91
|
+
unit="1",
|
|
92
|
+
description="The number of writes executed in the processing stack.",
|
|
107
93
|
)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
*args,
|
|
114
|
-
arg_span_type="code.processing",
|
|
115
|
-
arg_labels={"type": "processing"},
|
|
116
|
-
**kwargs,
|
|
94
|
+
self.outer_loop_progress = ObservableProgress()
|
|
95
|
+
self.outer_loop_progress_gauge: ObservableGauge = self.meter.create_observable_gauge(
|
|
96
|
+
name=self.format_metric_name("tasks.outer.loop.progress"),
|
|
97
|
+
description="The progress of a task through the main processing loop.",
|
|
98
|
+
callbacks=[lambda options: self.outer_loop_run_progress(options)],
|
|
117
99
|
)
|
|
118
100
|
|
|
119
|
-
def
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
101
|
+
def outer_loop_run_progress(
|
|
102
|
+
self, options: CallbackOptions
|
|
103
|
+
) -> Generator[Observation, None, None]:
|
|
104
|
+
"""Observe the progress of the current task as a percentage."""
|
|
105
|
+
yield Observation(
|
|
106
|
+
self.outer_loop_progress.percent_complete, attributes=self.base_telemetry_attributes
|
|
123
107
|
)
|
|
124
108
|
|
|
125
109
|
@property
|
|
@@ -130,13 +114,20 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
|
|
|
130
114
|
@property
|
|
131
115
|
def library_versions(self) -> str:
|
|
132
116
|
"""Harvest the dependency names and versions from the environment for all packages beginning with 'dkist' or are a requirement for a package beginning with 'dkist'."""
|
|
133
|
-
distributions = {
|
|
117
|
+
distributions = {
|
|
118
|
+
d.name.lower().replace("_", "-"): d.version for d in metadata.distributions()
|
|
119
|
+
}
|
|
134
120
|
libraries = {}
|
|
135
|
-
for pkg in
|
|
136
|
-
if pkg.
|
|
137
|
-
libraries[pkg.
|
|
138
|
-
for req in
|
|
139
|
-
|
|
121
|
+
for pkg in metadata.distributions():
|
|
122
|
+
if pkg.name.startswith("dkist"):
|
|
123
|
+
libraries[pkg.name.lower().replace("_", "-")] = pkg.version
|
|
124
|
+
for req in metadata.requires(pkg.name):
|
|
125
|
+
is_extra_requirement = "extra" in req
|
|
126
|
+
if not is_extra_requirement:
|
|
127
|
+
key = re.split(r"[ \[=<>~!]", req.lower())[
|
|
128
|
+
0
|
|
129
|
+
] # get the raw name of the package
|
|
130
|
+
libraries[key] = distributions[key]
|
|
140
131
|
return json.dumps(libraries)
|
|
141
132
|
|
|
142
133
|
def _record_provenance(self):
|
|
@@ -154,9 +145,14 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
|
|
|
154
145
|
"""Execute any pre-task setup required."""
|
|
155
146
|
super().pre_run()
|
|
156
147
|
if self.record_provenance or self.is_task_manual:
|
|
157
|
-
with self.
|
|
148
|
+
with self.telemetry_span("Record Provenance"):
|
|
158
149
|
self._record_provenance()
|
|
159
150
|
|
|
151
|
+
def post_run(self) -> None:
|
|
152
|
+
"""Execute and post-task bookkeeping required."""
|
|
153
|
+
super().post_run()
|
|
154
|
+
self.outer_loop_progress.set_complete()
|
|
155
|
+
|
|
160
156
|
def read(
|
|
161
157
|
self, tags: tag_type_hint, decoder: callable = path_decoder, **decoder_kwargs
|
|
162
158
|
) -> Generator[Any, None, None]:
|
|
@@ -176,7 +172,9 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
|
|
|
176
172
|
**decoder_kwargs
|
|
177
173
|
Additional arguments to pass to the `decoder` function.
|
|
178
174
|
"""
|
|
179
|
-
|
|
175
|
+
for p in self.scratch.find_all(tags=tags):
|
|
176
|
+
self.read_counter.add(amount=1, attributes=self.base_telemetry_attributes)
|
|
177
|
+
yield decoder(p, **decoder_kwargs)
|
|
180
178
|
|
|
181
179
|
def write(
|
|
182
180
|
self,
|
|
@@ -214,6 +212,7 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
|
|
|
214
212
|
-------
|
|
215
213
|
The path for the written file
|
|
216
214
|
"""
|
|
215
|
+
self.write_counter.add(amount=1, attributes=self.base_telemetry_attributes)
|
|
217
216
|
file_obj = encoder(data, **encoder_kwargs)
|
|
218
217
|
if isinstance(tags, str):
|
|
219
218
|
tags = [tags]
|
|
@@ -248,7 +247,7 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
|
|
|
248
247
|
StemName.modstate.value,
|
|
249
248
|
]
|
|
250
249
|
|
|
251
|
-
def build_generic_tag_filename(self, tags:
|
|
250
|
+
def build_generic_tag_filename(self, tags: list) -> str:
|
|
252
251
|
"""
|
|
253
252
|
Build a filename from a set of tags.
|
|
254
253
|
|
|
@@ -264,9 +263,9 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
|
|
|
264
263
|
|
|
265
264
|
4. You can have any extension you want so long as it's ".dat".
|
|
266
265
|
"""
|
|
267
|
-
# This call
|
|
268
|
-
#
|
|
269
|
-
copied_tags =
|
|
266
|
+
# This call copies the input list so it doesn't get modified in place and flattens the list to allow
|
|
267
|
+
# arbitrarily nested lists.
|
|
268
|
+
copied_tags = self.scratch.parse_tags(tags)
|
|
270
269
|
try:
|
|
271
270
|
copied_tags.remove(StemName.frame.value)
|
|
272
271
|
except ValueError:
|
|
@@ -283,8 +282,8 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
|
|
|
283
282
|
sorted_remaining_tags = sorted(copied_tags)
|
|
284
283
|
filename_parts += sorted_remaining_tags
|
|
285
284
|
|
|
286
|
-
# replace spaces and
|
|
287
|
-
dash_separated_parts = [re.sub("[ _]", "-", t) for t in filename_parts]
|
|
285
|
+
# replace spaces, underscores, and colons with dashes - dynamic part (e.g. polcal `Beam 1` label) may include spaces
|
|
286
|
+
dash_separated_parts = [re.sub("[ _:]", "-", t) for t in filename_parts]
|
|
288
287
|
|
|
289
288
|
base_filename = "_".join(dash_separated_parts)
|
|
290
289
|
base_filename_counter = str(self.filename_counter.increment(base_filename))
|
|
@@ -365,11 +364,11 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
|
|
|
365
364
|
Filename Counter: not rolled back but its purpose of preventing file name collisions is not impacted
|
|
366
365
|
"""
|
|
367
366
|
super().rollback()
|
|
368
|
-
with self.
|
|
367
|
+
with self.telemetry_span("Rollback Scratch"):
|
|
369
368
|
self.scratch.rollback()
|
|
370
|
-
with self.
|
|
369
|
+
with self.telemetry_span("Rollback Constants"):
|
|
371
370
|
self.constants._rollback()
|
|
372
|
-
with self.
|
|
371
|
+
with self.telemetry_span("Change Recipe Run to Inprogress"):
|
|
373
372
|
self.metadata_store_change_recipe_run_to_inprogress()
|
|
374
373
|
|
|
375
374
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Task(s) for the transfer and publishing of L1 data from a production run of a processing pipeline."""
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
from abc import ABC
|
|
4
5
|
from itertools import chain
|
|
@@ -20,7 +21,6 @@ from dkist_processing_common.tasks.mixin.quality import QualityMixin
|
|
|
20
21
|
from dkist_processing_common.tasks.output_data_base import OutputDataBase
|
|
21
22
|
from dkist_processing_common.tasks.output_data_base import TransferDataBase
|
|
22
23
|
|
|
23
|
-
|
|
24
24
|
__all__ = [
|
|
25
25
|
"L1OutputDataBase",
|
|
26
26
|
"TransferL1Data",
|
|
@@ -38,8 +38,9 @@ class L1OutputDataBase(OutputDataBase, ABC):
|
|
|
38
38
|
|
|
39
39
|
@property
|
|
40
40
|
def dataset_has_quality_data(self) -> bool:
|
|
41
|
-
"""Return True if
|
|
42
|
-
|
|
41
|
+
"""Return True if the dataset has quality data."""
|
|
42
|
+
path_count = self.count(tags=[Tag.output(), Tag.quality_data()])
|
|
43
|
+
return path_count > 0
|
|
43
44
|
|
|
44
45
|
def rollback(self):
|
|
45
46
|
"""Warn that the metadata-store and the interservice bus retain the effect of this tasks execution. Rolling back this task may not be achievable without other action."""
|
|
@@ -54,11 +55,14 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
|
|
|
54
55
|
|
|
55
56
|
def transfer_objects(self):
|
|
56
57
|
"""Transfer movie and L1 output frames."""
|
|
57
|
-
with self.
|
|
58
|
+
with self.telemetry_span("Upload movie"):
|
|
58
59
|
# Movie needs to be transferred separately as the movie headers need to go with it
|
|
59
60
|
self.transfer_movie()
|
|
60
61
|
|
|
61
|
-
with self.
|
|
62
|
+
with self.telemetry_span("Upload quality data"):
|
|
63
|
+
self.transfer_quality_data()
|
|
64
|
+
|
|
65
|
+
with self.telemetry_span("Upload science frames"):
|
|
62
66
|
self.transfer_output_frames()
|
|
63
67
|
|
|
64
68
|
def transfer_output_frames(self):
|
|
@@ -79,19 +83,14 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
|
|
|
79
83
|
def transfer_movie(self):
|
|
80
84
|
"""Transfer the movie to the object store."""
|
|
81
85
|
paths = list(self.read(tags=[Tag.output(), Tag.movie()]))
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
)
|
|
86
|
-
return
|
|
87
|
-
movie = paths[0]
|
|
88
|
-
if count := len(paths) > 1:
|
|
89
|
-
# note: this needs to be an error or the dataset receipt accounting will have an
|
|
90
|
-
# expected count > the eventual actual
|
|
86
|
+
|
|
87
|
+
count = len(paths)
|
|
88
|
+
if count != 1:
|
|
91
89
|
raise RuntimeError(
|
|
92
|
-
f"
|
|
93
|
-
f"
|
|
90
|
+
f"Expected exactly one movie to upload, found {count}. "
|
|
91
|
+
f"recipe_run_id={self.recipe_run_id}"
|
|
94
92
|
)
|
|
93
|
+
movie = paths[0]
|
|
95
94
|
logger.info(f"Uploading Movie: recipe_run_id={self.recipe_run_id}, {movie=}")
|
|
96
95
|
movie_object_key = self.format_object_key(movie)
|
|
97
96
|
self.object_store_upload_movie(
|
|
@@ -101,6 +100,33 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
|
|
|
101
100
|
content_type="video/mp4",
|
|
102
101
|
)
|
|
103
102
|
|
|
103
|
+
def transfer_quality_data(self):
|
|
104
|
+
"""Transfer quality data to the object store."""
|
|
105
|
+
paths = list(self.read(tags=[Tag.output(), Tag.quality_data()]))
|
|
106
|
+
if len(paths) == 0:
|
|
107
|
+
logger.info(
|
|
108
|
+
f"No quality data found to upload for dataset. recipe_run_id={self.recipe_run_id}"
|
|
109
|
+
)
|
|
110
|
+
return
|
|
111
|
+
|
|
112
|
+
if count := len(paths) > 1:
|
|
113
|
+
# dataset inventory does not support multiple quality data object keys
|
|
114
|
+
raise RuntimeError(
|
|
115
|
+
f"Found multiple quality data files to upload. Not supported."
|
|
116
|
+
f"{count=}, recipe_run_id={self.recipe_run_id}"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
with self.telemetry_span(f"Uploading the trial quality data"):
|
|
120
|
+
path = paths[0]
|
|
121
|
+
logger.info(f"Uploading quality data: recipe_run_id={self.recipe_run_id}, {path=}")
|
|
122
|
+
quality_data_object_key = self.format_object_key(path)
|
|
123
|
+
self.object_store_upload_quality_data(
|
|
124
|
+
quality_data=path,
|
|
125
|
+
bucket=self.destination_bucket,
|
|
126
|
+
object_key=quality_data_object_key,
|
|
127
|
+
content_type="application/json",
|
|
128
|
+
)
|
|
129
|
+
|
|
104
130
|
|
|
105
131
|
class AssembleQualityData(L1OutputDataBase, QualityMixin):
|
|
106
132
|
"""
|
|
@@ -120,15 +146,15 @@ class AssembleQualityData(L1OutputDataBase, QualityMixin):
|
|
|
120
146
|
|
|
121
147
|
def run(self):
|
|
122
148
|
"""Run method for the task."""
|
|
123
|
-
with self.
|
|
149
|
+
with self.telemetry_span("Assembling quality data"):
|
|
124
150
|
quality_data = self.quality_assemble_data(polcal_label_list=self.polcal_label_list)
|
|
125
151
|
|
|
126
|
-
with self.
|
|
152
|
+
with self.telemetry_span(
|
|
127
153
|
f"Saving quality data with {len(quality_data)} metrics to the file system"
|
|
128
154
|
):
|
|
129
155
|
self.write(
|
|
130
156
|
quality_data,
|
|
131
|
-
tags=Tag.quality_data(),
|
|
157
|
+
tags=[Tag.output(), Tag.quality_data()],
|
|
132
158
|
encoder=quality_data_encoder,
|
|
133
159
|
relative_path=f"{self.constants.dataset_id}_quality_data.json",
|
|
134
160
|
)
|
|
@@ -136,36 +162,23 @@ class AssembleQualityData(L1OutputDataBase, QualityMixin):
|
|
|
136
162
|
|
|
137
163
|
class SubmitDatasetMetadata(L1OutputDataBase):
|
|
138
164
|
"""
|
|
139
|
-
Add
|
|
165
|
+
Add receipt account to the metadata store.
|
|
140
166
|
|
|
141
|
-
Add the quality data to the Quality database.
|
|
142
167
|
Add a Dataset Receipt Account record to Processing Support for use by the Dataset Catalog Locker.
|
|
143
|
-
Adds the number of files created during the calibration processing to the Processing Support table
|
|
168
|
+
Adds the number of files to be created during the calibration processing to the Processing Support table
|
|
144
169
|
for use by the Dataset Catalog Locker.
|
|
145
170
|
"""
|
|
146
171
|
|
|
147
172
|
def run(self) -> None:
|
|
148
173
|
"""Run method for this task."""
|
|
149
|
-
with self.
|
|
150
|
-
# each quality_data file is a list - this will combine the elements of multiple lists into a single list
|
|
151
|
-
quality_data = list(
|
|
152
|
-
chain.from_iterable(
|
|
153
|
-
self.read(tags=Tag.quality_data(), decoder=quality_data_decoder)
|
|
154
|
-
)
|
|
155
|
-
)
|
|
156
|
-
self.metadata_store_add_quality_data(
|
|
157
|
-
dataset_id=self.constants.dataset_id, quality_data=quality_data
|
|
158
|
-
)
|
|
159
|
-
with self.apm_processing_step("Count Expected Outputs"):
|
|
174
|
+
with self.telemetry_span("Count Expected Outputs"):
|
|
160
175
|
dataset_id = self.constants.dataset_id
|
|
161
176
|
expected_object_count = self.count(tags=Tag.output())
|
|
162
|
-
if quality_data:
|
|
163
|
-
expected_object_count += 1
|
|
164
177
|
logger.info(
|
|
165
178
|
f"Adding Dataset Receipt Account: "
|
|
166
179
|
f"{dataset_id=}, {expected_object_count=}, recipe_run_id={self.recipe_run_id}"
|
|
167
180
|
)
|
|
168
|
-
with self.
|
|
181
|
+
with self.telemetry_span(
|
|
169
182
|
f"Add Dataset Receipt Account: {dataset_id = }, {expected_object_count = }"
|
|
170
183
|
):
|
|
171
184
|
self.metadata_store_add_dataset_receipt_account(
|
|
@@ -230,25 +243,13 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
|
|
|
230
243
|
messages = [CatalogObjectMessage(body=body) for body in message_bodies]
|
|
231
244
|
return messages
|
|
232
245
|
|
|
233
|
-
@property
|
|
234
|
-
def quality_report_message(self) -> CreateQualityReportMessage:
|
|
235
|
-
"""Create the Quality Report Message."""
|
|
236
|
-
file_name = Path(f"{self.constants.dataset_id}_quality_report.pdf")
|
|
237
|
-
body = CreateQualityReportMessageBody(
|
|
238
|
-
bucket=self.destination_bucket,
|
|
239
|
-
objectName=self.format_object_key(file_name),
|
|
240
|
-
conversationId=str(self.recipe_run_id),
|
|
241
|
-
datasetId=self.constants.dataset_id,
|
|
242
|
-
incrementDatasetCatalogReceiptCount=True,
|
|
243
|
-
)
|
|
244
|
-
return CreateQualityReportMessage(body=body)
|
|
245
|
-
|
|
246
246
|
def run(self) -> None:
|
|
247
247
|
"""Run method for this task."""
|
|
248
|
-
with self.
|
|
248
|
+
with self.telemetry_span("Gather output data"):
|
|
249
249
|
frames = self.read(tags=self.output_frame_tags)
|
|
250
250
|
movies = self.read(tags=[Tag.output(), Tag.movie()])
|
|
251
|
-
|
|
251
|
+
quality_data = self.read(tags=[Tag.output(), Tag.quality_data()])
|
|
252
|
+
with self.telemetry_span("Create message objects"):
|
|
252
253
|
messages = []
|
|
253
254
|
messages += self.frame_messages(paths=frames)
|
|
254
255
|
frame_message_count = len(messages)
|
|
@@ -256,8 +257,8 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
|
|
|
256
257
|
object_message_count = len(messages) - frame_message_count
|
|
257
258
|
dataset_has_quality_data = self.dataset_has_quality_data
|
|
258
259
|
if dataset_has_quality_data:
|
|
259
|
-
messages.
|
|
260
|
-
with self.
|
|
260
|
+
messages += self.object_messages(paths=quality_data, object_type="QDATA")
|
|
261
|
+
with self.telemetry_span(
|
|
261
262
|
f"Publish messages: {frame_message_count = }, {object_message_count = }, {dataset_has_quality_data = }"
|
|
262
263
|
):
|
|
263
264
|
self.interservice_bus_publish(messages=messages)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Mixin to add methods to a Task to support globus transfers."""
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
from dataclasses import dataclass
|
|
4
5
|
from pathlib import Path
|
|
@@ -8,10 +9,11 @@ from globus_sdk import ConfidentialAppAuthClient
|
|
|
8
9
|
from globus_sdk import GlobusError
|
|
9
10
|
from globus_sdk import TransferClient
|
|
10
11
|
from globus_sdk import TransferData
|
|
12
|
+
from globus_sdk.scopes import TransferScopes
|
|
13
|
+
from globus_sdk.transport import RetryConfig
|
|
11
14
|
|
|
12
15
|
from dkist_processing_common.config import common_configurations
|
|
13
16
|
|
|
14
|
-
|
|
15
17
|
logger = logging.getLogger(__name__)
|
|
16
18
|
|
|
17
19
|
|
|
@@ -31,27 +33,32 @@ class GlobusTransferItem:
|
|
|
31
33
|
class GlobusMixin:
|
|
32
34
|
"""Mixin to add methods to a Task to support globus transfers."""
|
|
33
35
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
36
|
+
def globus_transfer_client_factory(self, transfer_data: TransferData) -> TransferClient:
|
|
37
|
+
"""Create a globus transfer client based on the direction of transfer and round-robin the available application credentials."""
|
|
38
|
+
if (
|
|
39
|
+
transfer_data["source_endpoint"] == common_configurations.object_store_endpoint
|
|
40
|
+
): # inbound
|
|
41
|
+
client_credentials = common_configurations.globus_inbound_client_credentials
|
|
42
|
+
else: # outbound
|
|
43
|
+
client_credentials = common_configurations.globus_outbound_client_credentials
|
|
44
|
+
|
|
45
|
+
# Round-robin the client credentials based on the recipe run id
|
|
46
|
+
index = self.recipe_run_id % len(client_credentials)
|
|
47
|
+
selected_credential = client_credentials[index]
|
|
48
|
+
|
|
39
49
|
confidential_client = ConfidentialAppAuthClient(
|
|
40
|
-
client_id=
|
|
41
|
-
client_secret=
|
|
42
|
-
transport_params=common_configurations.globus_transport_params,
|
|
43
|
-
)
|
|
44
|
-
authorizer = ClientCredentialsAuthorizer(
|
|
45
|
-
confidential_client, scopes="urn:globus:auth:scope:transfer.api.globus.org:all"
|
|
50
|
+
client_id=selected_credential.client_id,
|
|
51
|
+
client_secret=selected_credential.client_secret,
|
|
46
52
|
)
|
|
47
|
-
|
|
48
|
-
|
|
53
|
+
authorizer = ClientCredentialsAuthorizer(confidential_client, scopes=TransferScopes)
|
|
54
|
+
retry_config = RetryConfig(max_retries=common_configurations.globus_max_retries)
|
|
55
|
+
|
|
56
|
+
return TransferClient(authorizer=authorizer, retry_config=retry_config)
|
|
49
57
|
|
|
50
58
|
def globus_transfer_scratch_to_object_store(
|
|
51
59
|
self,
|
|
52
60
|
transfer_items: list[GlobusTransferItem],
|
|
53
61
|
label: str = None,
|
|
54
|
-
sync_level: str = None,
|
|
55
62
|
verify_checksum: bool = True,
|
|
56
63
|
) -> None:
|
|
57
64
|
"""Transfer data from scratch to the object store."""
|
|
@@ -60,7 +67,6 @@ class GlobusMixin:
|
|
|
60
67
|
destination_endpoint=common_configurations.object_store_endpoint,
|
|
61
68
|
transfer_items=transfer_items,
|
|
62
69
|
label=label,
|
|
63
|
-
sync_level=sync_level,
|
|
64
70
|
verify_checksum=verify_checksum,
|
|
65
71
|
)
|
|
66
72
|
|
|
@@ -68,7 +74,6 @@ class GlobusMixin:
|
|
|
68
74
|
self,
|
|
69
75
|
transfer_items: list[GlobusTransferItem],
|
|
70
76
|
label: str = None,
|
|
71
|
-
sync_level: str = None,
|
|
72
77
|
verify_checksum: bool = True,
|
|
73
78
|
) -> None:
|
|
74
79
|
"""Transfer data from the object store to scratch."""
|
|
@@ -77,7 +82,6 @@ class GlobusMixin:
|
|
|
77
82
|
destination_endpoint=common_configurations.scratch_endpoint,
|
|
78
83
|
transfer_items=transfer_items,
|
|
79
84
|
label=label,
|
|
80
|
-
sync_level=sync_level,
|
|
81
85
|
verify_checksum=verify_checksum,
|
|
82
86
|
)
|
|
83
87
|
|
|
@@ -87,7 +91,6 @@ class GlobusMixin:
|
|
|
87
91
|
destination_endpoint: str,
|
|
88
92
|
transfer_items: list[GlobusTransferItem],
|
|
89
93
|
label: str = None,
|
|
90
|
-
sync_level: str = None,
|
|
91
94
|
verify_checksum: bool = True,
|
|
92
95
|
) -> TransferData:
|
|
93
96
|
"""Format a globus TransferData instance."""
|
|
@@ -95,7 +98,6 @@ class GlobusMixin:
|
|
|
95
98
|
source_endpoint=source_endpoint,
|
|
96
99
|
destination_endpoint=destination_endpoint,
|
|
97
100
|
label=label,
|
|
98
|
-
sync_level=sync_level,
|
|
99
101
|
verify_checksum=verify_checksum,
|
|
100
102
|
)
|
|
101
103
|
for item in transfer_items:
|
|
@@ -112,7 +114,6 @@ class GlobusMixin:
|
|
|
112
114
|
destination_endpoint: str,
|
|
113
115
|
transfer_items: list[GlobusTransferItem],
|
|
114
116
|
label: str = None,
|
|
115
|
-
sync_level: str = None,
|
|
116
117
|
verify_checksum: bool = True,
|
|
117
118
|
) -> None:
|
|
118
119
|
"""Perform a transfer of data using globus."""
|
|
@@ -121,7 +122,6 @@ class GlobusMixin:
|
|
|
121
122
|
destination_endpoint=destination_endpoint,
|
|
122
123
|
transfer_items=transfer_items,
|
|
123
124
|
label=label,
|
|
124
|
-
sync_level=sync_level,
|
|
125
125
|
verify_checksum=verify_checksum,
|
|
126
126
|
)
|
|
127
127
|
self._blocking_globus_transfer(transfer_data=transfer_data)
|
|
@@ -131,24 +131,21 @@ class GlobusMixin:
|
|
|
131
131
|
source_endpoint: str,
|
|
132
132
|
destination_endpoint: str,
|
|
133
133
|
label: str = None,
|
|
134
|
-
sync_level: str = None,
|
|
135
134
|
verify_checksum: bool = True,
|
|
136
135
|
) -> TransferData:
|
|
137
136
|
label = label or "Data Processing Transfer"
|
|
138
137
|
return TransferData(
|
|
139
|
-
transfer_client=self.globus_transfer_client,
|
|
140
138
|
source_endpoint=source_endpoint,
|
|
141
139
|
destination_endpoint=destination_endpoint,
|
|
142
140
|
label=label,
|
|
143
|
-
sync_level=sync_level,
|
|
144
141
|
verify_checksum=verify_checksum,
|
|
145
142
|
)
|
|
146
143
|
|
|
147
144
|
def _blocking_globus_transfer(self, transfer_data: TransferData) -> None:
|
|
148
|
-
tc = self.
|
|
149
|
-
logger.info(f"Starting globus transfer: label={transfer_data.get('label')}")
|
|
145
|
+
tc = self.globus_transfer_client_factory(transfer_data=transfer_data)
|
|
150
146
|
transfer_result = tc.submit_transfer(transfer_data)
|
|
151
147
|
task_id = transfer_result["task_id"]
|
|
148
|
+
logger.info(f"Starting globus transfer: label={transfer_data.get('label')}, {task_id=}, ")
|
|
152
149
|
polling_interval = 60
|
|
153
150
|
while not tc.task_wait(
|
|
154
151
|
task_id=task_id, timeout=polling_interval, polling_interval=polling_interval
|