dkist-processing-common 10.8.1__py3-none-any.whl → 10.8.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- changelog/235.feature.rst +3 -0
- changelog/235.misc.1.rst +2 -0
- changelog/235.misc.rst +1 -0
- dkist_processing_common/codecs/array.py +19 -0
- dkist_processing_common/codecs/basemodel.py +21 -0
- dkist_processing_common/codecs/fits.py +12 -6
- dkist_processing_common/manual.py +3 -5
- dkist_processing_common/models/graphql.py +13 -3
- dkist_processing_common/models/input_dataset.py +113 -0
- dkist_processing_common/models/parameters.py +65 -28
- dkist_processing_common/tasks/mixin/metadata_store.py +7 -4
- dkist_processing_common/tasks/transfer_input_data.py +61 -70
- dkist_processing_common/tests/conftest.py +24 -7
- dkist_processing_common/tests/test_codecs.py +38 -0
- dkist_processing_common/tests/test_input_dataset.py +79 -308
- dkist_processing_common/tests/test_parameters.py +71 -22
- dkist_processing_common/tests/test_transfer_input_data.py +131 -45
- dkist_processing_common/tests/test_write_l1.py +2 -2
- {dkist_processing_common-10.8.1.dist-info → dkist_processing_common-10.8.1rc1.dist-info}/METADATA +2 -2
- {dkist_processing_common-10.8.1.dist-info → dkist_processing_common-10.8.1rc1.dist-info}/RECORD +22 -17
- {dkist_processing_common-10.8.1.dist-info → dkist_processing_common-10.8.1rc1.dist-info}/WHEEL +1 -1
- dkist_processing_common/tasks/mixin/input_dataset.py +0 -166
- {dkist_processing_common-10.8.1.dist-info → dkist_processing_common-10.8.1rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
"""Mixin for a WorkflowDataTaskBase subclass which implements input data set access functionality."""
|
|
2
|
-
import json
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from itertools import chain
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
from typing import Any
|
|
8
|
-
|
|
9
|
-
from dkist_processing_common.models.tags import Tag
|
|
10
|
-
from dkist_processing_common.tasks.base import tag_type_hint
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
frames_part_type_hint = list[dict[str, str | list[str]]] | None
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
@dataclass
|
|
17
|
-
class InputDatasetParameterValue:
|
|
18
|
-
"""Data structure for a de-serialized input dataset parameter value."""
|
|
19
|
-
|
|
20
|
-
parameter_value_id: int
|
|
21
|
-
parameter_value: Any = None
|
|
22
|
-
parameter_value_start_date: datetime | None = None
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@dataclass
|
|
26
|
-
class InputDatasetObject:
|
|
27
|
-
"""Data structure for a de-serialized input dataset frame."""
|
|
28
|
-
|
|
29
|
-
bucket: str
|
|
30
|
-
object_key: str
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class InputDatasetMixin:
|
|
34
|
-
"""Mixin for WorkflowDataTaskBase that accesses downloaded input dataset part documents."""
|
|
35
|
-
|
|
36
|
-
def _input_dataset_part_document(self, tags: tag_type_hint):
|
|
37
|
-
"""Get the input dataset document part and deserialize it."""
|
|
38
|
-
paths: list[Path] = list(self.read(tags=tags))
|
|
39
|
-
if not paths:
|
|
40
|
-
return
|
|
41
|
-
if len(paths) > 1:
|
|
42
|
-
raise ValueError(
|
|
43
|
-
f"There are more than one input dataset part documents to parse for {tags=}"
|
|
44
|
-
)
|
|
45
|
-
p = paths[0]
|
|
46
|
-
with p.open(mode="rb") as f:
|
|
47
|
-
return json.load(f)
|
|
48
|
-
|
|
49
|
-
@property
|
|
50
|
-
def input_dataset_observe_frames_part_document(self) -> frames_part_type_hint:
|
|
51
|
-
"""Get the 'observe frames' part of the input dataset."""
|
|
52
|
-
return self._input_dataset_part_document(tags=Tag.input_dataset_observe_frames())
|
|
53
|
-
|
|
54
|
-
@property
|
|
55
|
-
def input_dataset_calibration_frames_part_document(self) -> frames_part_type_hint:
|
|
56
|
-
"""Get the 'calibration frames' part of the input dataset."""
|
|
57
|
-
return self._input_dataset_part_document(tags=Tag.input_dataset_calibration_frames())
|
|
58
|
-
|
|
59
|
-
@property
|
|
60
|
-
def input_dataset_parameters_part_document(
|
|
61
|
-
self,
|
|
62
|
-
) -> list[dict[str, str | list[dict[str, int | str]]]] | None:
|
|
63
|
-
"""Get the 'parameters' part of the input dataset."""
|
|
64
|
-
return self._input_dataset_part_document(tags=Tag.input_dataset_parameters())
|
|
65
|
-
|
|
66
|
-
@property
|
|
67
|
-
def input_dataset_frames(self) -> list[InputDatasetObject]:
|
|
68
|
-
"""Get the list of frames for this input dataset."""
|
|
69
|
-
result = []
|
|
70
|
-
observe_frames = self.input_dataset_observe_frames_part_document or []
|
|
71
|
-
calibration_frames = self.input_dataset_calibration_frames_part_document or []
|
|
72
|
-
for frame_set in chain(observe_frames, calibration_frames):
|
|
73
|
-
for key in frame_set.get("object_keys", list()):
|
|
74
|
-
result.append(InputDatasetObject(bucket=frame_set["bucket"], object_key=key))
|
|
75
|
-
return result
|
|
76
|
-
|
|
77
|
-
@property
|
|
78
|
-
def input_dataset_parameters(self) -> dict[str, list[InputDatasetParameterValue]]:
|
|
79
|
-
"""Get the input dataset parameters."""
|
|
80
|
-
parameters = self.input_dataset_parameters_part_document or []
|
|
81
|
-
result = dict()
|
|
82
|
-
for p in parameters:
|
|
83
|
-
result.update(self._input_dataset_parse_parameter(p))
|
|
84
|
-
return result
|
|
85
|
-
|
|
86
|
-
@property
|
|
87
|
-
def input_dataset_parameter_objects(self) -> list[InputDatasetObject]:
|
|
88
|
-
"""Parse the parameter object locations out of the set of all parameters."""
|
|
89
|
-
result = []
|
|
90
|
-
for value_list in self.input_dataset_parameters.values():
|
|
91
|
-
for value in value_list:
|
|
92
|
-
param_value = value.parameter_value
|
|
93
|
-
if isinstance(param_value, dict) and param_value.get("is_file", False):
|
|
94
|
-
result.append(
|
|
95
|
-
InputDatasetObject(
|
|
96
|
-
bucket=param_value["bucket"], object_key=param_value["objectKey"]
|
|
97
|
-
)
|
|
98
|
-
)
|
|
99
|
-
return result
|
|
100
|
-
|
|
101
|
-
def _input_dataset_parse_parameter(
|
|
102
|
-
self, parameter: dict
|
|
103
|
-
) -> dict[str, list[InputDatasetParameterValue]]:
|
|
104
|
-
name: str = parameter["parameterName"]
|
|
105
|
-
raw_values: list[dict] = parameter["parameterValues"]
|
|
106
|
-
values = self._input_dataset_parse_parameter_values(raw_values=raw_values)
|
|
107
|
-
return {name: values}
|
|
108
|
-
|
|
109
|
-
def _input_dataset_parse_parameter_values(
|
|
110
|
-
self, raw_values: list[dict[str, Any]]
|
|
111
|
-
) -> list[InputDatasetParameterValue]:
|
|
112
|
-
values = list()
|
|
113
|
-
for v in raw_values:
|
|
114
|
-
parsed_value = InputDatasetParameterValue(parameter_value_id=v["parameterValueId"])
|
|
115
|
-
parsed_value.parameter_value = self._input_dataset_parse_parameter_value(
|
|
116
|
-
raw_parameter_value=v["parameterValue"]
|
|
117
|
-
)
|
|
118
|
-
if d := v.get("parameterValueStartDate"):
|
|
119
|
-
parsed_value.parameter_value_start_date = datetime.fromisoformat(d)
|
|
120
|
-
else:
|
|
121
|
-
parsed_value.parameter_value_start_date = datetime(1, 1, 1)
|
|
122
|
-
values.append(parsed_value)
|
|
123
|
-
return values
|
|
124
|
-
|
|
125
|
-
def _input_dataset_parse_parameter_value(self, raw_parameter_value: str) -> Any:
|
|
126
|
-
"""Return the json decoding of the parameter value."""
|
|
127
|
-
return json.loads(raw_parameter_value, object_hook=self._decode_parameter_value)
|
|
128
|
-
|
|
129
|
-
def _decode_parameter_value(self, param_dict: dict):
|
|
130
|
-
"""Decode a parameter value."""
|
|
131
|
-
if "__file__" in param_dict:
|
|
132
|
-
return self._convert_parameter_file_value_to_path(param_dict)
|
|
133
|
-
# Nothing to do here, so return control back to json.loads()
|
|
134
|
-
return param_dict
|
|
135
|
-
|
|
136
|
-
def _convert_parameter_file_value_to_path(self, param_dict: dict):
|
|
137
|
-
"""
|
|
138
|
-
Decode a parameter file value.
|
|
139
|
-
|
|
140
|
-
Note: for parameters that are files, the string passed to json.loads() looks like this:
|
|
141
|
-
'{ "__file__":
|
|
142
|
-
{
|
|
143
|
-
"bucket": "data",
|
|
144
|
-
"objectKey": "parameters/parameter_name/uuid.dat"
|
|
145
|
-
}
|
|
146
|
-
}'
|
|
147
|
-
|
|
148
|
-
In this hook, we remove the outer __file__ dict and return the inner dict with the addition of
|
|
149
|
-
a key and value for the file path. Because the file path is dependent on the existence of a tag,
|
|
150
|
-
if no tag is found, None is returned for the path. This use case will occur when we construct
|
|
151
|
-
the list of files to be transferred and tagged by the TransferL0Data task.
|
|
152
|
-
"""
|
|
153
|
-
file_dict = param_dict["__file__"]
|
|
154
|
-
object_key = file_dict["objectKey"]
|
|
155
|
-
parameter_tag = Tag.parameter(Path(object_key).name)
|
|
156
|
-
paths = list(self.read(tags=parameter_tag))
|
|
157
|
-
num_paths = len(paths)
|
|
158
|
-
if num_paths == 1:
|
|
159
|
-
param_path = paths[0]
|
|
160
|
-
elif num_paths == 0:
|
|
161
|
-
param_path = None
|
|
162
|
-
else:
|
|
163
|
-
raise ValueError(f"Found multiple paths for {parameter_tag = }.")
|
|
164
|
-
file_dict["param_path"] = param_path
|
|
165
|
-
file_dict["is_file"] = True
|
|
166
|
-
return file_dict
|
|
File without changes
|