dkist-processing-common 10.5.4__py3-none-any.whl → 12.1.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- changelog/280.misc.rst +1 -0
- changelog/282.feature.2.rst +2 -0
- changelog/282.feature.rst +2 -0
- changelog/284.feature.rst +1 -0
- changelog/285.feature.rst +2 -0
- changelog/285.misc.rst +2 -0
- changelog/286.feature.rst +2 -0
- changelog/287.misc.rst +1 -0
- dkist_processing_common/__init__.py +1 -0
- dkist_processing_common/_util/constants.py +1 -0
- dkist_processing_common/_util/graphql.py +1 -0
- dkist_processing_common/_util/scratch.py +9 -9
- dkist_processing_common/_util/tags.py +1 -0
- dkist_processing_common/codecs/array.py +20 -0
- dkist_processing_common/codecs/asdf.py +9 -3
- dkist_processing_common/codecs/basemodel.py +22 -0
- dkist_processing_common/codecs/bytes.py +1 -0
- dkist_processing_common/codecs/fits.py +37 -9
- dkist_processing_common/codecs/iobase.py +1 -0
- dkist_processing_common/codecs/json.py +1 -0
- dkist_processing_common/codecs/path.py +1 -0
- dkist_processing_common/codecs/quality.py +1 -1
- dkist_processing_common/codecs/str.py +1 -0
- dkist_processing_common/config.py +64 -25
- dkist_processing_common/manual.py +6 -8
- dkist_processing_common/models/constants.py +373 -37
- dkist_processing_common/models/dkist_location.py +27 -0
- dkist_processing_common/models/fits_access.py +48 -0
- dkist_processing_common/models/flower_pot.py +231 -9
- dkist_processing_common/models/fried_parameter.py +41 -0
- dkist_processing_common/models/graphql.py +66 -75
- dkist_processing_common/models/input_dataset.py +117 -0
- dkist_processing_common/models/message.py +1 -1
- dkist_processing_common/models/message_queue_binding.py +1 -1
- dkist_processing_common/models/metric_code.py +2 -0
- dkist_processing_common/models/parameters.py +65 -28
- dkist_processing_common/models/quality.py +50 -5
- dkist_processing_common/models/tags.py +23 -21
- dkist_processing_common/models/task_name.py +3 -2
- dkist_processing_common/models/telemetry.py +28 -0
- dkist_processing_common/models/wavelength.py +3 -1
- dkist_processing_common/parsers/average_bud.py +46 -0
- dkist_processing_common/parsers/cs_step.py +13 -12
- dkist_processing_common/parsers/dsps_repeat.py +6 -4
- dkist_processing_common/parsers/experiment_id_bud.py +12 -4
- dkist_processing_common/parsers/id_bud.py +42 -27
- dkist_processing_common/parsers/l0_fits_access.py +5 -3
- dkist_processing_common/parsers/l1_fits_access.py +51 -23
- dkist_processing_common/parsers/lookup_bud.py +125 -0
- dkist_processing_common/parsers/near_bud.py +21 -20
- dkist_processing_common/parsers/observing_program_id_bud.py +24 -0
- dkist_processing_common/parsers/proposal_id_bud.py +13 -5
- dkist_processing_common/parsers/quality.py +2 -0
- dkist_processing_common/parsers/retarder.py +32 -0
- dkist_processing_common/parsers/single_value_single_key_flower.py +6 -1
- dkist_processing_common/parsers/task.py +8 -6
- dkist_processing_common/parsers/time.py +178 -72
- dkist_processing_common/parsers/unique_bud.py +21 -22
- dkist_processing_common/parsers/wavelength.py +5 -3
- dkist_processing_common/tasks/__init__.py +3 -2
- dkist_processing_common/tasks/assemble_movie.py +4 -3
- dkist_processing_common/tasks/base.py +59 -60
- dkist_processing_common/tasks/l1_output_data.py +54 -53
- dkist_processing_common/tasks/mixin/globus.py +24 -27
- dkist_processing_common/tasks/mixin/interservice_bus.py +1 -0
- dkist_processing_common/tasks/mixin/metadata_store.py +108 -243
- dkist_processing_common/tasks/mixin/object_store.py +22 -0
- dkist_processing_common/tasks/mixin/quality/__init__.py +1 -0
- dkist_processing_common/tasks/mixin/quality/_base.py +8 -1
- dkist_processing_common/tasks/mixin/quality/_metrics.py +166 -14
- dkist_processing_common/tasks/output_data_base.py +4 -3
- dkist_processing_common/tasks/parse_l0_input_data.py +277 -15
- dkist_processing_common/tasks/quality_metrics.py +9 -9
- dkist_processing_common/tasks/teardown.py +7 -7
- dkist_processing_common/tasks/transfer_input_data.py +67 -69
- dkist_processing_common/tasks/trial_catalog.py +77 -17
- dkist_processing_common/tasks/trial_output_data.py +16 -17
- dkist_processing_common/tasks/write_l1.py +102 -72
- dkist_processing_common/tests/conftest.py +32 -173
- dkist_processing_common/tests/mock_metadata_store.py +271 -0
- dkist_processing_common/tests/test_assemble_movie.py +4 -4
- dkist_processing_common/tests/test_assemble_quality.py +32 -4
- dkist_processing_common/tests/test_base.py +5 -19
- dkist_processing_common/tests/test_codecs.py +103 -12
- dkist_processing_common/tests/test_constants.py +15 -0
- dkist_processing_common/tests/test_dkist_location.py +15 -0
- dkist_processing_common/tests/test_fits_access.py +56 -19
- dkist_processing_common/tests/test_flower_pot.py +147 -5
- dkist_processing_common/tests/test_fried_parameter.py +27 -0
- dkist_processing_common/tests/test_input_dataset.py +78 -361
- dkist_processing_common/tests/test_interservice_bus.py +1 -0
- dkist_processing_common/tests/test_interservice_bus_mixin.py +1 -1
- dkist_processing_common/tests/test_manual_processing.py +33 -0
- dkist_processing_common/tests/test_output_data_base.py +5 -7
- dkist_processing_common/tests/test_parameters.py +71 -22
- dkist_processing_common/tests/test_parse_l0_input_data.py +115 -32
- dkist_processing_common/tests/test_publish_catalog_messages.py +2 -24
- dkist_processing_common/tests/test_quality.py +1 -0
- dkist_processing_common/tests/test_quality_mixin.py +255 -23
- dkist_processing_common/tests/test_scratch.py +2 -1
- dkist_processing_common/tests/test_stems.py +511 -168
- dkist_processing_common/tests/test_submit_dataset_metadata.py +3 -7
- dkist_processing_common/tests/test_tags.py +1 -0
- dkist_processing_common/tests/test_task_name.py +1 -1
- dkist_processing_common/tests/test_task_parsing.py +17 -7
- dkist_processing_common/tests/test_teardown.py +28 -24
- dkist_processing_common/tests/test_transfer_input_data.py +270 -125
- dkist_processing_common/tests/test_transfer_l1_output_data.py +2 -3
- dkist_processing_common/tests/test_trial_catalog.py +83 -8
- dkist_processing_common/tests/test_trial_output_data.py +46 -73
- dkist_processing_common/tests/test_workflow_task_base.py +8 -10
- dkist_processing_common/tests/test_write_l1.py +298 -76
- dkist_processing_common-12.1.0rc1.dist-info/METADATA +265 -0
- dkist_processing_common-12.1.0rc1.dist-info/RECORD +134 -0
- {dkist_processing_common-10.5.4.dist-info → dkist_processing_common-12.1.0rc1.dist-info}/WHEEL +1 -1
- docs/conf.py +1 -0
- docs/index.rst +1 -1
- docs/landing_page.rst +13 -0
- dkist_processing_common/tasks/mixin/input_dataset.py +0 -166
- dkist_processing_common-10.5.4.dist-info/METADATA +0 -175
- dkist_processing_common-10.5.4.dist-info/RECORD +0 -112
- {dkist_processing_common-10.5.4.dist-info → dkist_processing_common-12.1.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Classes to support the generation of quality metrics for the calibrated data."""
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
from dataclasses import dataclass
|
|
4
5
|
from dataclasses import field
|
|
@@ -18,7 +19,6 @@ from dkist_processing_common.parsers.quality import L1QualityFitsAccess
|
|
|
18
19
|
from dkist_processing_common.tasks.base import WorkflowTaskBase
|
|
19
20
|
from dkist_processing_common.tasks.mixin.quality import QualityMixin
|
|
20
21
|
|
|
21
|
-
|
|
22
22
|
__all__ = ["QualityL1Metrics", "QualityL0Metrics"]
|
|
23
23
|
|
|
24
24
|
|
|
@@ -100,10 +100,10 @@ class QualityL0Metrics(WorkflowTaskBase, QualityMixin):
|
|
|
100
100
|
`quality_task_types` properties, respectively.
|
|
101
101
|
"""
|
|
102
102
|
modstate_list = self.modstate_list if self.modstate_list is not None else [None]
|
|
103
|
-
with self.
|
|
103
|
+
with self.telemetry_span("Computing L0 Quality Metrics"):
|
|
104
104
|
quality_data_list = []
|
|
105
105
|
for task_type in self.quality_task_types:
|
|
106
|
-
with self.
|
|
106
|
+
with self.telemetry_span(f"Working on {task_type = }"):
|
|
107
107
|
for modstate in modstate_list:
|
|
108
108
|
paths = self.get_paths_for_modstate_and_task(modstate, task_type)
|
|
109
109
|
quality_data = self.calculate_l0_metrics(
|
|
@@ -112,7 +112,7 @@ class QualityL0Metrics(WorkflowTaskBase, QualityMixin):
|
|
|
112
112
|
quality_data.modstate = modstate
|
|
113
113
|
quality_data_list.append(quality_data)
|
|
114
114
|
|
|
115
|
-
with self.
|
|
115
|
+
with self.telemetry_span("Saving metrics to disk"):
|
|
116
116
|
for quality_data in quality_data_list:
|
|
117
117
|
if quality_data.has_values:
|
|
118
118
|
self.save_quality_data(quality_data, modstate=quality_data.modstate)
|
|
@@ -296,21 +296,21 @@ class QualityL1Metrics(WorkflowTaskBase, QualityMixin):
|
|
|
296
296
|
L1Metric(storage_method=self.quality_store_health_status, value_source="health_status"),
|
|
297
297
|
L1Metric(
|
|
298
298
|
storage_method=self.quality_store_ao_status_and_fried_parameter,
|
|
299
|
-
value_source=["ao_status", "fried_parameter"],
|
|
299
|
+
value_source=["ao_status", "fried_parameter", "num_out_of_bounds_ao_values"],
|
|
300
300
|
),
|
|
301
301
|
]
|
|
302
302
|
|
|
303
|
-
with self.
|
|
303
|
+
with self.telemetry_span("Reading L1 frames"):
|
|
304
304
|
paths = list(self.read(tags=[Tag.calibrated(), Tag.frame()]))
|
|
305
305
|
|
|
306
|
-
with self.
|
|
306
|
+
with self.telemetry_span("Calculating L1 quality metrics"):
|
|
307
307
|
for metric in metrics:
|
|
308
|
-
with self.
|
|
308
|
+
with self.telemetry_span(f"Calculating L1 metric {metric.value_source}"):
|
|
309
309
|
for path in paths:
|
|
310
310
|
frame = L1QualityFitsAccess.from_path(path)
|
|
311
311
|
metric.append_value(frame=frame)
|
|
312
312
|
|
|
313
|
-
with self.
|
|
313
|
+
with self.telemetry_span("Sending lists for storage"):
|
|
314
314
|
for metric in metrics:
|
|
315
315
|
if metric.has_values:
|
|
316
316
|
metric.store_metric()
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
"""Task(s) for the clean up tasks at the conclusion of a processing pipeline."""
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
from abc import ABC
|
|
4
5
|
from abc import abstractmethod
|
|
5
6
|
|
|
6
7
|
from dkist_processing_common.tasks.base import WorkflowTaskBase
|
|
7
8
|
|
|
8
|
-
|
|
9
9
|
__all__ = ["Teardown", "TrialTeardown"]
|
|
10
10
|
|
|
11
11
|
|
|
@@ -22,15 +22,15 @@ class TeardownBase(WorkflowTaskBase, ABC):
|
|
|
22
22
|
@property
|
|
23
23
|
def teardown_enabled(self) -> bool:
|
|
24
24
|
"""Recipe run configuration indicating if data should be removed at the end of a run."""
|
|
25
|
-
return self.
|
|
25
|
+
return self.metadata_store_recipe_run.configuration.teardown_enabled
|
|
26
26
|
|
|
27
27
|
def run(self) -> None:
|
|
28
28
|
"""Run method for Teardown class."""
|
|
29
|
-
with self.
|
|
29
|
+
with self.telemetry_span("Change recipe run status"):
|
|
30
30
|
self.change_recipe_run_status_to_success()
|
|
31
31
|
|
|
32
32
|
if not self.teardown_enabled:
|
|
33
|
-
with self.
|
|
33
|
+
with self.telemetry_span(f"Skip Teardown"):
|
|
34
34
|
return
|
|
35
35
|
|
|
36
36
|
logger.info(f"Removing data and tags for recipe run {self.recipe_run_id}")
|
|
@@ -43,13 +43,13 @@ class TeardownBase(WorkflowTaskBase, ABC):
|
|
|
43
43
|
|
|
44
44
|
def teardown(self):
|
|
45
45
|
"""Purge all constants and files/tags in scratch."""
|
|
46
|
-
with self.
|
|
46
|
+
with self.telemetry_span("Remove Data and Tags"):
|
|
47
47
|
self.scratch.purge()
|
|
48
48
|
|
|
49
|
-
with self.
|
|
49
|
+
with self.telemetry_span("Remove File Counters"):
|
|
50
50
|
self.filename_counter.purge()
|
|
51
51
|
|
|
52
|
-
with self.
|
|
52
|
+
with self.telemetry_span("Remove Constants"):
|
|
53
53
|
self.constants._purge()
|
|
54
54
|
|
|
55
55
|
|
|
@@ -1,30 +1,45 @@
|
|
|
1
1
|
"""Task(s) for the transfer in of data sources for a processing pipeline."""
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
|
|
6
|
+
from dkist_processing_common.codecs.basemodel import basemodel_decoder
|
|
7
|
+
from dkist_processing_common.codecs.basemodel import basemodel_encoder
|
|
8
|
+
from dkist_processing_common.models.input_dataset import InputDatasetObject
|
|
9
|
+
from dkist_processing_common.models.input_dataset import InputDatasetPartDocumentList
|
|
5
10
|
from dkist_processing_common.models.tags import Tag
|
|
6
11
|
from dkist_processing_common.tasks.base import WorkflowTaskBase
|
|
7
12
|
from dkist_processing_common.tasks.mixin.globus import GlobusMixin
|
|
8
13
|
from dkist_processing_common.tasks.mixin.globus import GlobusTransferItem
|
|
9
|
-
from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetMixin
|
|
10
|
-
from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetObject
|
|
11
14
|
|
|
12
15
|
__all__ = ["TransferL0Data"]
|
|
13
16
|
|
|
14
17
|
logger = logging.getLogger(__name__)
|
|
15
18
|
|
|
16
19
|
|
|
17
|
-
class TransferL0Data(WorkflowTaskBase, GlobusMixin
|
|
20
|
+
class TransferL0Data(WorkflowTaskBase, GlobusMixin):
|
|
18
21
|
"""Transfers Level 0 data and required parameter files to the scratch store."""
|
|
19
22
|
|
|
20
23
|
def download_input_dataset(self):
|
|
21
|
-
"""
|
|
22
|
-
if
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
self.write(
|
|
24
|
+
"""Write the input dataset part documents to scratch with appropriate tags."""
|
|
25
|
+
if observe_frames_part := self.metadata_store_input_dataset_observe_frames:
|
|
26
|
+
doc = observe_frames_part.inputDatasetPartDocument
|
|
27
|
+
self.write(data=doc, tags=Tag.input_dataset_observe_frames(), encoder=basemodel_encoder)
|
|
28
|
+
if calibration_frames_part := self.metadata_store_input_dataset_calibration_frames:
|
|
29
|
+
doc = calibration_frames_part.inputDatasetPartDocument
|
|
30
|
+
self.write(
|
|
31
|
+
data=doc, tags=Tag.input_dataset_calibration_frames(), encoder=basemodel_encoder
|
|
32
|
+
)
|
|
33
|
+
if parameters_part := self.metadata_store_input_dataset_parameters:
|
|
34
|
+
doc = parameters_part.inputDatasetPartDocument
|
|
35
|
+
self.add_file_tags_to_parameters_doc(param_doc=doc)
|
|
36
|
+
self.write(data=doc, tags=Tag.input_dataset_parameters(), encoder=basemodel_encoder)
|
|
37
|
+
|
|
38
|
+
def add_file_tags_to_parameters_doc(self, param_doc: InputDatasetPartDocumentList):
|
|
39
|
+
"""Update the input dataset document with the location of the file parameters."""
|
|
40
|
+
for doc_item in param_doc.doc_list:
|
|
41
|
+
for obj in doc_item.input_dataset_objects:
|
|
42
|
+
obj.tag = Tag.parameter(Path(obj.object_key).name)
|
|
28
43
|
|
|
29
44
|
def format_transfer_items(
|
|
30
45
|
self, input_dataset_objects: list[InputDatasetObject]
|
|
@@ -43,77 +58,60 @@ class TransferL0Data(WorkflowTaskBase, GlobusMixin, InputDatasetMixin):
|
|
|
43
58
|
)
|
|
44
59
|
return transfer_items
|
|
45
60
|
|
|
46
|
-
def
|
|
61
|
+
def build_transfer_list(self, doc_tag: str) -> list[InputDatasetObject]:
|
|
47
62
|
"""Format the list of frames as transfer items to be used by globus."""
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
""
|
|
67
|
-
scratch_items = [
|
|
68
|
-
self.scratch.scratch_base_path / ti.destination_path for ti in transfer_items
|
|
69
|
-
]
|
|
70
|
-
for si in scratch_items:
|
|
71
|
-
self.tag(si, tags=[Tag.input(), Tag.frame()])
|
|
72
|
-
|
|
73
|
-
def tag_parameter_objects(self, transfer_items: list[GlobusTransferItem]) -> None:
|
|
74
|
-
"""
|
|
75
|
-
Tag all the parameter files with 'parameter'.
|
|
76
|
-
|
|
77
|
-
Parameters
|
|
78
|
-
----------
|
|
79
|
-
transfer_items
|
|
80
|
-
List of items to be tagged
|
|
81
|
-
|
|
82
|
-
Returns
|
|
83
|
-
-------
|
|
84
|
-
None
|
|
85
|
-
"""
|
|
86
|
-
scratch_items = [
|
|
87
|
-
self.scratch.scratch_base_path / ti.destination_path for ti in transfer_items
|
|
88
|
-
]
|
|
89
|
-
for si in scratch_items:
|
|
90
|
-
self.tag(si, tags=[Tag.parameter(si.name)])
|
|
63
|
+
doc = next(
|
|
64
|
+
self.read(tags=doc_tag, decoder=basemodel_decoder, model=InputDatasetPartDocumentList),
|
|
65
|
+
None,
|
|
66
|
+
)
|
|
67
|
+
doc_list = doc.doc_list if doc else []
|
|
68
|
+
input_dataset_objects = []
|
|
69
|
+
for doc_item in doc_list:
|
|
70
|
+
input_dataset_objects += doc_item.input_dataset_objects
|
|
71
|
+
return input_dataset_objects
|
|
72
|
+
|
|
73
|
+
def tag_transfer_objects(self, input_dataset_objects: list[InputDatasetObject]) -> None:
|
|
74
|
+
"""Tag all the transferred input files."""
|
|
75
|
+
for obj in input_dataset_objects:
|
|
76
|
+
obj_path = self.scratch.absolute_path(obj.object_key)
|
|
77
|
+
if obj.tag:
|
|
78
|
+
self.tag(obj_path, tags=obj.tag)
|
|
79
|
+
else:
|
|
80
|
+
self.tag(obj_path, tags=[Tag.input(), Tag.frame()])
|
|
81
|
+
logger.info(f"Tagged {len(input_dataset_objects)} input dataset objects in scratch")
|
|
91
82
|
|
|
92
83
|
def run(self) -> None:
|
|
93
84
|
"""Execute the data transfer."""
|
|
94
|
-
with self.
|
|
85
|
+
with self.telemetry_span("Change Status to InProgress"):
|
|
95
86
|
self.metadata_store_change_recipe_run_to_inprogress()
|
|
96
87
|
|
|
97
|
-
with self.
|
|
88
|
+
with self.telemetry_span("Download Input Dataset Documents"):
|
|
98
89
|
self.download_input_dataset()
|
|
99
90
|
|
|
100
|
-
with self.
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
91
|
+
with self.telemetry_span("Build Input Dataset Transfer List"):
|
|
92
|
+
observe_transfer_objects = self.build_transfer_list(
|
|
93
|
+
doc_tag=Tag.input_dataset_observe_frames()
|
|
94
|
+
)
|
|
95
|
+
calibration_transfer_objects = self.build_transfer_list(
|
|
96
|
+
doc_tag=Tag.input_dataset_calibration_frames()
|
|
97
|
+
)
|
|
98
|
+
parameter_transfer_objects = self.build_transfer_list(
|
|
99
|
+
doc_tag=Tag.input_dataset_parameters()
|
|
100
|
+
)
|
|
101
|
+
transfer_objects = (
|
|
102
|
+
observe_transfer_objects + calibration_transfer_objects + parameter_transfer_objects
|
|
103
|
+
)
|
|
104
|
+
if len(observe_transfer_objects + calibration_transfer_objects) == 0:
|
|
105
|
+
raise ValueError("No input dataset frames found to transfer")
|
|
107
106
|
|
|
108
|
-
with self.
|
|
107
|
+
with self.telemetry_span("Transfer Input Frames and Parameter Files via Globus"):
|
|
109
108
|
self.globus_transfer_object_store_to_scratch(
|
|
110
|
-
transfer_items=
|
|
111
|
-
label=f"Transfer
|
|
109
|
+
transfer_items=self.format_transfer_items(input_dataset_objects=transfer_objects),
|
|
110
|
+
label=f"Transfer Input Objects for Recipe Run {self.recipe_run_id}",
|
|
112
111
|
)
|
|
113
112
|
|
|
114
|
-
with self.
|
|
115
|
-
self.
|
|
116
|
-
self.tag_parameter_objects(transfer_items=parameter_transfer_items)
|
|
113
|
+
with self.telemetry_span("Tag Input Frames and Parameter Files"):
|
|
114
|
+
self.tag_transfer_objects(input_dataset_objects=transfer_objects)
|
|
117
115
|
|
|
118
116
|
def rollback(self):
|
|
119
117
|
"""Warn that depending on the progress of the task all data may not be removed because it hadn't been tagged."""
|
|
@@ -1,18 +1,22 @@
|
|
|
1
1
|
"""Tasks to support the generation of downstream artifacts in a trial workflow that wouldn't otherwise produce them."""
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
import importlib
|
|
3
4
|
import logging
|
|
4
5
|
from datetime import datetime
|
|
5
6
|
from itertools import chain
|
|
6
7
|
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
7
9
|
from typing import Generator
|
|
8
10
|
from uuid import uuid4
|
|
9
11
|
|
|
10
|
-
from dkist_processing_common.codecs.asdf import
|
|
12
|
+
from dkist_processing_common.codecs.asdf import asdf_fileobj_encoder
|
|
13
|
+
from dkist_processing_common.codecs.basemodel import basemodel_decoder
|
|
11
14
|
from dkist_processing_common.codecs.fits import fits_access_decoder
|
|
12
15
|
from dkist_processing_common.codecs.json import json_encoder
|
|
13
16
|
from dkist_processing_common.codecs.path import path_decoder
|
|
14
17
|
from dkist_processing_common.codecs.quality import quality_data_decoder
|
|
15
18
|
from dkist_processing_common.models.fits_access import FitsAccessBase
|
|
19
|
+
from dkist_processing_common.models.input_dataset import InputDatasetPartDocumentList
|
|
16
20
|
from dkist_processing_common.models.tags import Tag
|
|
17
21
|
from dkist_processing_common.tasks.output_data_base import OutputDataBase
|
|
18
22
|
|
|
@@ -24,8 +28,9 @@ __all__ = ["CreateTrialDatasetInventory", "CreateTrialAsdf", "CreateTrialQuality
|
|
|
24
28
|
# Capture condition of dkist-processing-common[inventory] install
|
|
25
29
|
INVENTORY_EXTRA_INSTALLED = False
|
|
26
30
|
try:
|
|
27
|
-
from dkist_inventory.inventory import generate_inventory_from_frame_inventory
|
|
28
31
|
from dkist_inventory.inventory import generate_asdf_filename
|
|
32
|
+
from dkist_inventory.inventory import generate_inventory_from_frame_inventory
|
|
33
|
+
from dkist_inventory.inventory import generate_quality_report_filename
|
|
29
34
|
|
|
30
35
|
INVENTORY_EXTRA_INSTALLED = True
|
|
31
36
|
except ModuleNotFoundError:
|
|
@@ -36,6 +41,7 @@ ASDF_EXTRA_INSTALLED = False
|
|
|
36
41
|
try:
|
|
37
42
|
import asdf
|
|
38
43
|
from dkist_inventory.asdf_generator import asdf_tree_from_filenames
|
|
44
|
+
from dkist_inventory.asdf_generator import make_asdf_file_object
|
|
39
45
|
|
|
40
46
|
ASDF_EXTRA_INSTALLED = True
|
|
41
47
|
except ModuleNotFoundError:
|
|
@@ -44,8 +50,8 @@ except ModuleNotFoundError:
|
|
|
44
50
|
# Verify dkist-quality is installed
|
|
45
51
|
QUALITY_EXTRA_INSTALLED = False
|
|
46
52
|
try:
|
|
47
|
-
from dkist_quality.report import format_report
|
|
48
53
|
from dkist_quality.report import ReportFormattingException
|
|
54
|
+
from dkist_quality.report import format_report
|
|
49
55
|
|
|
50
56
|
QUALITY_EXTRA_INSTALLED = True
|
|
51
57
|
except ModuleNotFoundError:
|
|
@@ -95,13 +101,13 @@ class CreateTrialDatasetInventory(OutputDataBase):
|
|
|
95
101
|
|
|
96
102
|
def run(self) -> None:
|
|
97
103
|
"""Generate a json file simulating the dataset inventory record that would be produced when cataloging the dataset."""
|
|
98
|
-
with self.
|
|
104
|
+
with self.telemetry_span("Retrieve output frame headers"):
|
|
99
105
|
json_headers = list(self.frame_inventories)
|
|
100
|
-
with self.
|
|
106
|
+
with self.telemetry_span("Generate dataset inventory"):
|
|
101
107
|
inventory: dict = generate_inventory_from_frame_inventory(
|
|
102
108
|
bucket=self.destination_bucket, json_headers=json_headers
|
|
103
109
|
)
|
|
104
|
-
with self.
|
|
110
|
+
with self.telemetry_span("Save dataset inventory file"):
|
|
105
111
|
self.write(
|
|
106
112
|
inventory,
|
|
107
113
|
tags=[Tag.output(), Tag.dataset_inventory()],
|
|
@@ -136,27 +142,72 @@ class CreateTrialAsdf(OutputDataBase):
|
|
|
136
142
|
|
|
137
143
|
def run(self) -> None:
|
|
138
144
|
"""Generate an ASDF file simulating the ASDF file that would be produced when cataloging the dataset."""
|
|
139
|
-
with self.
|
|
145
|
+
with self.telemetry_span("Collate input dataset parameters"):
|
|
146
|
+
parameters = self.parse_input_dataset_parameters()
|
|
147
|
+
|
|
148
|
+
with self.telemetry_span("Generate ASDF tree"):
|
|
140
149
|
tree = asdf_tree_from_filenames(
|
|
141
150
|
filenames=self.absolute_output_frame_paths,
|
|
142
151
|
hdu=1, # compressed
|
|
143
152
|
relative_to=self.scratch.workflow_base_path,
|
|
153
|
+
parameters=parameters,
|
|
144
154
|
)
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
155
|
+
|
|
156
|
+
trial_history = [
|
|
157
|
+
(
|
|
158
|
+
"Written with dkist-processing-common trial ASDF writer",
|
|
159
|
+
{
|
|
160
|
+
"name": "dkist-processing-common",
|
|
161
|
+
"author": "DKIST Data Center",
|
|
162
|
+
"homepage": "https://bitbucket.org/dkistdc/dkist-processing-common",
|
|
163
|
+
"version": importlib.metadata.distribution("dkist-processing-common").version,
|
|
164
|
+
},
|
|
165
|
+
)
|
|
166
|
+
]
|
|
167
|
+
with self.telemetry_span("Save ASDF file"):
|
|
168
|
+
with make_asdf_file_object(tree, extra_history=trial_history) as asdf_obj:
|
|
148
169
|
self.write(
|
|
149
|
-
|
|
170
|
+
asdf_obj,
|
|
150
171
|
tags=[Tag.output(), Tag.asdf()],
|
|
151
|
-
encoder=
|
|
172
|
+
encoder=asdf_fileobj_encoder,
|
|
152
173
|
relative_path=generate_asdf_filename(
|
|
153
174
|
instrument=self.constants.instrument,
|
|
154
175
|
start_time=datetime.fromisoformat(self.constants.obs_ip_start_time),
|
|
155
176
|
dataset_id=self.constants.dataset_id,
|
|
156
177
|
),
|
|
157
|
-
custom_schema=schema_path.as_posix(),
|
|
158
178
|
)
|
|
159
179
|
|
|
180
|
+
def parse_input_dataset_parameters(self) -> list[dict[str, Any]]:
|
|
181
|
+
"""
|
|
182
|
+
Return the parameters associated with the dataset.
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
list[dict[str, Any]]
|
|
187
|
+
A list of dictionaries, each containing a parameter name and its values.
|
|
188
|
+
|
|
189
|
+
Raises
|
|
190
|
+
------
|
|
191
|
+
ValueError
|
|
192
|
+
If there is not exactly one ``InputDatasetPartDocumentList`` found.
|
|
193
|
+
"""
|
|
194
|
+
part_docs_iter = self.read(
|
|
195
|
+
tags=Tag.input_dataset_parameters(),
|
|
196
|
+
decoder=basemodel_decoder,
|
|
197
|
+
model=InputDatasetPartDocumentList,
|
|
198
|
+
)
|
|
199
|
+
docs = list(part_docs_iter)
|
|
200
|
+
|
|
201
|
+
if not docs:
|
|
202
|
+
logger.warning("No parameter list decoded from files")
|
|
203
|
+
return []
|
|
204
|
+
|
|
205
|
+
if len(docs) > 1:
|
|
206
|
+
raise ValueError(f"Expected 1 parameter list, found {len(docs)}")
|
|
207
|
+
|
|
208
|
+
parameters = docs[0].model_dump(by_alias=True).get("doc_list", [])
|
|
209
|
+
return parameters
|
|
210
|
+
|
|
160
211
|
|
|
161
212
|
class CreateTrialQualityReport(OutputDataBase):
|
|
162
213
|
"""
|
|
@@ -174,13 +225,20 @@ class CreateTrialQualityReport(OutputDataBase):
|
|
|
174
225
|
f" but the required dependencies were not found."
|
|
175
226
|
)
|
|
176
227
|
|
|
228
|
+
if not INVENTORY_EXTRA_INSTALLED:
|
|
229
|
+
raise ModuleNotFoundError(
|
|
230
|
+
f"{self.__class__.__name__} Task requires the dkist-inventory package "
|
|
231
|
+
f"(e.g. via an 'inventory' pip_extra on dkist_processing_core.Workflow().add_node())"
|
|
232
|
+
f" but the required dependencies were not found."
|
|
233
|
+
)
|
|
234
|
+
|
|
177
235
|
def run(self) -> None:
|
|
178
236
|
"""Generate the quality report for the dataset."""
|
|
179
237
|
self.create_trial_quality_report()
|
|
180
238
|
|
|
181
239
|
def create_trial_quality_report(self) -> None:
|
|
182
240
|
"""Generate a trial quality report in pdf format and save to the file system for future upload."""
|
|
183
|
-
with self.
|
|
241
|
+
with self.telemetry_span(f"Building the trial quality report"):
|
|
184
242
|
# each quality_data file is a list - this will combine the elements of multiple lists into a single list
|
|
185
243
|
quality_data = list(
|
|
186
244
|
chain.from_iterable(
|
|
@@ -191,9 +249,11 @@ class CreateTrialQualityReport(OutputDataBase):
|
|
|
191
249
|
report_data=quality_data, dataset_id=self.constants.dataset_id
|
|
192
250
|
)
|
|
193
251
|
|
|
194
|
-
with self.
|
|
252
|
+
with self.telemetry_span(f"Saving the trial quality report to the file system"):
|
|
195
253
|
self.write(
|
|
196
254
|
quality_report,
|
|
197
255
|
tags=[Tag.output(), Tag.quality_report()],
|
|
198
|
-
relative_path=
|
|
256
|
+
relative_path=generate_quality_report_filename(
|
|
257
|
+
dataset_id=self.constants.dataset_id
|
|
258
|
+
),
|
|
199
259
|
)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
"""Tasks to support transferring an arbitrary collection of files to a customizable post-run location."""
|
|
2
|
+
|
|
2
3
|
import logging
|
|
3
4
|
from functools import cached_property
|
|
4
5
|
from pathlib import Path
|
|
@@ -35,33 +36,31 @@ class TransferTrialData(TransferDataBase, GlobusMixin):
|
|
|
35
36
|
|
|
36
37
|
def transfer_objects(self) -> None:
|
|
37
38
|
"""Collect transfer items and send them to Globus for transfer."""
|
|
38
|
-
with self.
|
|
39
|
+
with self.telemetry_span("Build transfer list"):
|
|
39
40
|
transfer_manifest = self.build_transfer_list()
|
|
40
41
|
|
|
41
|
-
with self.
|
|
42
|
+
with self.telemetry_span("Send transfer manifest to globus"):
|
|
42
43
|
self.transfer_all_trial_frames(transfer_manifest)
|
|
43
44
|
|
|
44
45
|
@cached_property
|
|
45
46
|
def destination_bucket(self) -> str:
|
|
46
|
-
"""Get the destination bucket
|
|
47
|
-
return self.
|
|
47
|
+
"""Get the destination bucket."""
|
|
48
|
+
return self.metadata_store_recipe_run.configuration.destination_bucket
|
|
48
49
|
|
|
49
50
|
@property
|
|
50
51
|
def destination_root_folder(self) -> Path:
|
|
51
52
|
"""Format the destination root folder with a value that can be set in the recipe run configuration."""
|
|
52
|
-
|
|
53
|
-
|
|
53
|
+
root_name_from_config = (
|
|
54
|
+
self.metadata_store_recipe_run.configuration.trial_root_directory_name
|
|
54
55
|
)
|
|
55
|
-
root_name = Path(
|
|
56
|
-
|
|
56
|
+
root_name = Path(root_name_from_config or super().destination_root_folder)
|
|
57
57
|
return root_name
|
|
58
58
|
|
|
59
59
|
@property
|
|
60
60
|
def destination_folder(self) -> Path:
|
|
61
61
|
"""Format the destination folder with a parent that can be set by the recipe run configuration."""
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
) or Path(self.constants.dataset_id)
|
|
62
|
+
dir_name_from_config = self.metadata_store_recipe_run.configuration.trial_directory_name
|
|
63
|
+
dir_name = dir_name_from_config or Path(self.constants.dataset_id)
|
|
65
64
|
return self.destination_root_folder / dir_name
|
|
66
65
|
|
|
67
66
|
@property
|
|
@@ -71,9 +70,12 @@ class TransferTrialData(TransferDataBase, GlobusMixin):
|
|
|
71
70
|
Defaults to transferring all product files. Setting `trial_exclusive_transfer_tag_lists` in the
|
|
72
71
|
recipe run configuration to a list of tag lists will override the default.
|
|
73
72
|
"""
|
|
74
|
-
|
|
75
|
-
|
|
73
|
+
tag_list_from_config = (
|
|
74
|
+
self.metadata_store_recipe_run.configuration.trial_exclusive_transfer_tag_lists
|
|
76
75
|
)
|
|
76
|
+
if tag_list_from_config is not None:
|
|
77
|
+
return tag_list_from_config
|
|
78
|
+
return self.default_transfer_tag_lists
|
|
77
79
|
|
|
78
80
|
@property
|
|
79
81
|
def output_frame_tag_list(self) -> list[list[str]]:
|
|
@@ -98,7 +100,7 @@ class TransferTrialData(TransferDataBase, GlobusMixin):
|
|
|
98
100
|
tag_list = []
|
|
99
101
|
tag_list += [[Tag.output(), Tag.dataset_inventory()]]
|
|
100
102
|
tag_list += [[Tag.output(), Tag.asdf()]]
|
|
101
|
-
tag_list += [[Tag.quality_data()]]
|
|
103
|
+
tag_list += [[Tag.output(), Tag.quality_data()]]
|
|
102
104
|
tag_list += [[Tag.output(), Tag.quality_report()]]
|
|
103
105
|
tag_list += [[Tag.output(), Tag.movie()]]
|
|
104
106
|
return tag_list
|
|
@@ -128,9 +130,6 @@ class TransferTrialData(TransferDataBase, GlobusMixin):
|
|
|
128
130
|
"""
|
|
129
131
|
tag_lists = self.transfer_tag_lists
|
|
130
132
|
|
|
131
|
-
if not isinstance(tag_lists[0], list):
|
|
132
|
-
raise ValueError(f"{tag_lists=} must be a list of tag set lists")
|
|
133
|
-
|
|
134
133
|
transfer_items = []
|
|
135
134
|
for tag_set in tag_lists:
|
|
136
135
|
|