dkist-processing-common 10.8.3__py3-none-any.whl → 10.8.4rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,166 +0,0 @@
1
- """Mixin for a WorkflowDataTaskBase subclass which implements input data set access functionality."""
2
- import json
3
- from dataclasses import dataclass
4
- from datetime import datetime
5
- from itertools import chain
6
- from pathlib import Path
7
- from typing import Any
8
-
9
- from dkist_processing_common.models.tags import Tag
10
- from dkist_processing_common.tasks.base import tag_type_hint
11
-
12
-
13
- frames_part_type_hint = list[dict[str, str | list[str]]] | None
14
-
15
-
16
- @dataclass
17
- class InputDatasetParameterValue:
18
- """Data structure for a de-serialized input dataset parameter value."""
19
-
20
- parameter_value_id: int
21
- parameter_value: Any = None
22
- parameter_value_start_date: datetime | None = None
23
-
24
-
25
- @dataclass
26
- class InputDatasetObject:
27
- """Data structure for a de-serialized input dataset frame."""
28
-
29
- bucket: str
30
- object_key: str
31
-
32
-
33
- class InputDatasetMixin:
34
- """Mixin for WorkflowDataTaskBase that accesses downloaded input dataset part documents."""
35
-
36
- def _input_dataset_part_document(self, tags: tag_type_hint):
37
- """Get the input dataset document part and deserialize it."""
38
- paths: list[Path] = list(self.read(tags=tags))
39
- if not paths:
40
- return
41
- if len(paths) > 1:
42
- raise ValueError(
43
- f"There are more than one input dataset part documents to parse for {tags=}"
44
- )
45
- p = paths[0]
46
- with p.open(mode="rb") as f:
47
- return json.load(f)
48
-
49
- @property
50
- def input_dataset_observe_frames_part_document(self) -> frames_part_type_hint:
51
- """Get the 'observe frames' part of the input dataset."""
52
- return self._input_dataset_part_document(tags=Tag.input_dataset_observe_frames())
53
-
54
- @property
55
- def input_dataset_calibration_frames_part_document(self) -> frames_part_type_hint:
56
- """Get the 'calibration frames' part of the input dataset."""
57
- return self._input_dataset_part_document(tags=Tag.input_dataset_calibration_frames())
58
-
59
- @property
60
- def input_dataset_parameters_part_document(
61
- self,
62
- ) -> list[dict[str, str | list[dict[str, int | str]]]] | None:
63
- """Get the 'parameters' part of the input dataset."""
64
- return self._input_dataset_part_document(tags=Tag.input_dataset_parameters())
65
-
66
- @property
67
- def input_dataset_frames(self) -> list[InputDatasetObject]:
68
- """Get the list of frames for this input dataset."""
69
- result = []
70
- observe_frames = self.input_dataset_observe_frames_part_document or []
71
- calibration_frames = self.input_dataset_calibration_frames_part_document or []
72
- for frame_set in chain(observe_frames, calibration_frames):
73
- for key in frame_set.get("object_keys", list()):
74
- result.append(InputDatasetObject(bucket=frame_set["bucket"], object_key=key))
75
- return result
76
-
77
- @property
78
- def input_dataset_parameters(self) -> dict[str, list[InputDatasetParameterValue]]:
79
- """Get the input dataset parameters."""
80
- parameters = self.input_dataset_parameters_part_document or []
81
- result = dict()
82
- for p in parameters:
83
- result.update(self._input_dataset_parse_parameter(p))
84
- return result
85
-
86
- @property
87
- def input_dataset_parameter_objects(self) -> list[InputDatasetObject]:
88
- """Parse the parameter object locations out of the set of all parameters."""
89
- result = []
90
- for value_list in self.input_dataset_parameters.values():
91
- for value in value_list:
92
- param_value = value.parameter_value
93
- if isinstance(param_value, dict) and param_value.get("is_file", False):
94
- result.append(
95
- InputDatasetObject(
96
- bucket=param_value["bucket"], object_key=param_value["objectKey"]
97
- )
98
- )
99
- return result
100
-
101
- def _input_dataset_parse_parameter(
102
- self, parameter: dict
103
- ) -> dict[str, list[InputDatasetParameterValue]]:
104
- name: str = parameter["parameterName"]
105
- raw_values: list[dict] = parameter["parameterValues"]
106
- values = self._input_dataset_parse_parameter_values(raw_values=raw_values)
107
- return {name: values}
108
-
109
- def _input_dataset_parse_parameter_values(
110
- self, raw_values: list[dict[str, Any]]
111
- ) -> list[InputDatasetParameterValue]:
112
- values = list()
113
- for v in raw_values:
114
- parsed_value = InputDatasetParameterValue(parameter_value_id=v["parameterValueId"])
115
- parsed_value.parameter_value = self._input_dataset_parse_parameter_value(
116
- raw_parameter_value=v["parameterValue"]
117
- )
118
- if d := v.get("parameterValueStartDate"):
119
- parsed_value.parameter_value_start_date = datetime.fromisoformat(d)
120
- else:
121
- parsed_value.parameter_value_start_date = datetime(1, 1, 1)
122
- values.append(parsed_value)
123
- return values
124
-
125
- def _input_dataset_parse_parameter_value(self, raw_parameter_value: str) -> Any:
126
- """Return the json decoding of the parameter value."""
127
- return json.loads(raw_parameter_value, object_hook=self._decode_parameter_value)
128
-
129
- def _decode_parameter_value(self, param_dict: dict):
130
- """Decode a parameter value."""
131
- if "__file__" in param_dict:
132
- return self._convert_parameter_file_value_to_path(param_dict)
133
- # Nothing to do here, so return control back to json.loads()
134
- return param_dict
135
-
136
- def _convert_parameter_file_value_to_path(self, param_dict: dict):
137
- """
138
- Decode a parameter file value.
139
-
140
- Note: for parameters that are files, the string passed to json.loads() looks like this:
141
- '{ "__file__":
142
- {
143
- "bucket": "data",
144
- "objectKey": "parameters/parameter_name/uuid.dat"
145
- }
146
- }'
147
-
148
- In this hook, we remove the outer __file__ dict and return the inner dict with the addition of
149
- a key and value for the file path. Because the file path is dependent on the existence of a tag,
150
- if no tag is found, None is returned for the path. This use case will occur when we construct
151
- the list of files to be transferred and tagged by the TransferL0Data task.
152
- """
153
- file_dict = param_dict["__file__"]
154
- object_key = file_dict["objectKey"]
155
- parameter_tag = Tag.parameter(Path(object_key).name)
156
- paths = list(self.read(tags=parameter_tag))
157
- num_paths = len(paths)
158
- if num_paths == 1:
159
- param_path = paths[0]
160
- elif num_paths == 0:
161
- param_path = None
162
- else:
163
- raise ValueError(f"Found multiple paths for {parameter_tag = }.")
164
- file_dict["param_path"] = param_path
165
- file_dict["is_file"] = True
166
- return file_dict