dkist-processing-common 10.8.1__py3-none-any.whl → 10.8.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- changelog/235.feature.rst +3 -0
- changelog/235.misc.1.rst +2 -0
- changelog/235.misc.rst +1 -0
- dkist_processing_common/codecs/array.py +19 -0
- dkist_processing_common/codecs/basemodel.py +21 -0
- dkist_processing_common/codecs/fits.py +12 -6
- dkist_processing_common/manual.py +3 -5
- dkist_processing_common/models/graphql.py +13 -3
- dkist_processing_common/models/input_dataset.py +113 -0
- dkist_processing_common/models/parameters.py +65 -28
- dkist_processing_common/tasks/mixin/metadata_store.py +7 -4
- dkist_processing_common/tasks/transfer_input_data.py +61 -70
- dkist_processing_common/tests/conftest.py +24 -7
- dkist_processing_common/tests/test_codecs.py +38 -0
- dkist_processing_common/tests/test_input_dataset.py +79 -308
- dkist_processing_common/tests/test_parameters.py +71 -22
- dkist_processing_common/tests/test_transfer_input_data.py +131 -45
- dkist_processing_common/tests/test_write_l1.py +2 -2
- {dkist_processing_common-10.8.1.dist-info → dkist_processing_common-10.8.1rc1.dist-info}/METADATA +2 -2
- {dkist_processing_common-10.8.1.dist-info → dkist_processing_common-10.8.1rc1.dist-info}/RECORD +22 -17
- {dkist_processing_common-10.8.1.dist-info → dkist_processing_common-10.8.1rc1.dist-info}/WHEEL +1 -1
- dkist_processing_common/tasks/mixin/input_dataset.py +0 -166
- {dkist_processing_common-10.8.1.dist-info → dkist_processing_common-10.8.1rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
Add two new codecs: Basemodel codecs are used for encoding and decoding Pydantic BaseModel objects. For decoding, the intended model
|
|
2
|
+
is passed to the decoder through a keyword argument in the task read method. Array codecs are used for encoding and decoding numpy
|
|
3
|
+
arrays similar to the standard np.load() and np.save(), but with the task tag-based write method.
|
changelog/235.misc.1.rst
ADDED
changelog/235.misc.rst
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Remove the input_dataset mixin and replace it with input_dataset Pydantic BaseModel models.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Encoder/decoder for writing/reading numpy arrays."""
|
|
2
|
+
import io
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from dkist_processing_common.codecs.iobase import iobase_encoder
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def array_encoder(data: np.ndarray, **np_kwargs) -> bytes:
|
|
11
|
+
"""Convert a numpy array to bytes compatible with np.load()."""
|
|
12
|
+
buffer = io.BytesIO()
|
|
13
|
+
np.save(buffer, data, **np_kwargs)
|
|
14
|
+
return iobase_encoder(buffer)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def array_decoder(path: Path, **np_kwargs) -> np.ndarray:
|
|
18
|
+
"""Return the data in the file as a numpy array using np.load()."""
|
|
19
|
+
return np.load(path, **np_kwargs)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Encoder/decoder for writing and reading Pydantic BaseModel objects."""
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Type
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
from dkist_processing_common.codecs.bytes import bytes_decoder
|
|
8
|
+
from dkist_processing_common.codecs.str import str_encoder
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def basemodel_encoder(data: BaseModel, **basemodel_kwargs) -> bytes:
|
|
12
|
+
"""Convert a Pydantic BaseModel object into bytes for writing to file."""
|
|
13
|
+
data_dump = data.model_dump_json(**basemodel_kwargs)
|
|
14
|
+
return str_encoder(data_dump)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def basemodel_decoder(path: Path, model: Type[BaseModel], **basemodel_kwargs) -> BaseModel:
|
|
18
|
+
"""Return the data in the file as a Pydantic BaseModel object."""
|
|
19
|
+
data = bytes_decoder(path)
|
|
20
|
+
model_validated = model.model_validate_json(data, **basemodel_kwargs)
|
|
21
|
+
return model_validated
|
|
@@ -30,15 +30,15 @@ def fits_hdulist_encoder(hdu_list: fits.HDUList) -> bytes:
|
|
|
30
30
|
return iobase_encoder(file_obj)
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
def fits_hdu_decoder(path: Path) -> fits.PrimaryHDU | fits.CompImageHDU:
|
|
33
|
+
def fits_hdu_decoder(path: Path, hdu: int | None = None) -> fits.PrimaryHDU | fits.CompImageHDU:
|
|
34
34
|
"""Read a Path with `fits` to produce an `HDUList`."""
|
|
35
35
|
hdu_list = fits.open(path, checksum=True)
|
|
36
|
-
return _extract_hdu(hdu_list)
|
|
36
|
+
return _extract_hdu(hdu_list, hdu)
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
def fits_array_decoder(path: Path, auto_squeeze: bool = True) -> np.ndarray:
|
|
39
|
+
def fits_array_decoder(path: Path, hdu: int | None = None, auto_squeeze: bool = True) -> np.ndarray:
|
|
40
40
|
"""Read a Path with `fits` and return the `.data` property."""
|
|
41
|
-
hdu = fits_hdu_decoder(path)
|
|
41
|
+
hdu = fits_hdu_decoder(path, hdu=hdu)
|
|
42
42
|
data = hdu.data
|
|
43
43
|
|
|
44
44
|
# This conditional is explicitly to catch summit data with a dummy first axis for WCS
|
|
@@ -56,8 +56,14 @@ def fits_access_decoder(
|
|
|
56
56
|
return fits_access_class(hdu=hdu, name=str(path), **fits_access_kwargs)
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
def _extract_hdu(hdul: fits.HDUList) -> fits.PrimaryHDU | fits.CompImageHDU:
|
|
60
|
-
"""
|
|
59
|
+
def _extract_hdu(hdul: fits.HDUList, hdu: int | None = None) -> fits.PrimaryHDU | fits.CompImageHDU:
|
|
60
|
+
"""
|
|
61
|
+
Return the fits hdu associated with the data in the hdu list.
|
|
62
|
+
|
|
63
|
+
Only search down the hdu index for the data if the hdu index is not explicitly provided.
|
|
64
|
+
"""
|
|
65
|
+
if hdu is not None:
|
|
66
|
+
return hdul[hdu]
|
|
61
67
|
if hdul[0].data is not None:
|
|
62
68
|
return hdul[0]
|
|
63
69
|
return hdul[1]
|
|
@@ -2,15 +2,13 @@
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import shutil
|
|
5
|
-
from dataclasses import asdict
|
|
6
|
-
from io import BytesIO
|
|
7
5
|
from pathlib import Path
|
|
8
6
|
from typing import Callable
|
|
9
7
|
from unittest.mock import patch
|
|
10
8
|
|
|
11
9
|
from dkist_processing_core.task import TaskBase
|
|
12
10
|
|
|
13
|
-
from dkist_processing_common.codecs.
|
|
11
|
+
from dkist_processing_common.codecs.basemodel import basemodel_encoder
|
|
14
12
|
from dkist_processing_common.models.graphql import RecipeRunProvenanceMutation
|
|
15
13
|
from dkist_processing_common.models.tags import Tag
|
|
16
14
|
from dkist_processing_common.tasks.base import WorkflowTaskBase
|
|
@@ -182,8 +180,8 @@ def writing_metadata_store_record_provenance(self, is_task_manual: bool, library
|
|
|
182
180
|
workflowVersion=self.workflow_version,
|
|
183
181
|
)
|
|
184
182
|
self.write(
|
|
185
|
-
data=params
|
|
186
|
-
encoder=
|
|
183
|
+
data=params,
|
|
184
|
+
encoder=basemodel_encoder,
|
|
187
185
|
tags=["PROVENANCE_RECORD"],
|
|
188
186
|
relative_path=f"{self.task_name}_provenance.json",
|
|
189
187
|
overwrite=True,
|
|
@@ -3,6 +3,9 @@ from pydantic import BaseModel
|
|
|
3
3
|
from pydantic import field_validator
|
|
4
4
|
from pydantic import Json
|
|
5
5
|
|
|
6
|
+
from dkist_processing_common.models.input_dataset import InputDatasetBaseModel
|
|
7
|
+
from dkist_processing_common.models.input_dataset import InputDatasetPartDocumentList
|
|
8
|
+
|
|
6
9
|
|
|
7
10
|
class RecipeRunMutation(BaseModel):
|
|
8
11
|
"""Recipe run mutation record."""
|
|
@@ -37,13 +40,19 @@ class InputDatasetPartTypeResponse(BaseModel):
|
|
|
37
40
|
inputDatasetPartTypeName: str
|
|
38
41
|
|
|
39
42
|
|
|
40
|
-
class InputDatasetPartResponse(
|
|
43
|
+
class InputDatasetPartResponse(InputDatasetBaseModel):
|
|
41
44
|
"""Response class for the input dataset part entity."""
|
|
42
45
|
|
|
43
46
|
inputDatasetPartId: int
|
|
44
|
-
inputDatasetPartDocument: Json[
|
|
47
|
+
# inputDatasetPartDocument : Json[InputDatasetPartDocumentList] # will work in gqlclient v2
|
|
48
|
+
inputDatasetPartDocument: Json[list]
|
|
45
49
|
inputDatasetPartType: InputDatasetPartTypeResponse
|
|
46
50
|
|
|
51
|
+
@field_validator("inputDatasetPartDocument", mode="after")
|
|
52
|
+
@classmethod
|
|
53
|
+
def _use_frame_or_parameter_model(cls, value_list): # not needed for gqlclient v2
|
|
54
|
+
return InputDatasetPartDocumentList(doc_list=value_list)
|
|
55
|
+
|
|
47
56
|
|
|
48
57
|
class InputDatasetInputDatasetPartResponse(BaseModel):
|
|
49
58
|
"""Response class for the join entity between input datasets and input dataset parts."""
|
|
@@ -103,11 +112,12 @@ class RecipeRunResponse(BaseModel):
|
|
|
103
112
|
recipeInstance: RecipeInstanceResponse
|
|
104
113
|
recipeInstanceId: int
|
|
105
114
|
recipeRunProvenances: list[RecipeRunProvenanceResponse]
|
|
115
|
+
# configuration: Json[RecipeRunConfiguration] | None # will work in gqlclient v2
|
|
106
116
|
configuration: Json[dict] | None
|
|
107
117
|
|
|
108
118
|
@field_validator("configuration", mode="after")
|
|
109
119
|
@classmethod
|
|
110
|
-
def _use_recipe_run_configuration_model(cls, value):
|
|
120
|
+
def _use_recipe_run_configuration_model(cls, value): # not needed for gqlclient v2
|
|
111
121
|
if value is None:
|
|
112
122
|
return RecipeRunConfiguration()
|
|
113
123
|
return RecipeRunConfiguration.model_validate(value)
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Input dataset models for the inputDatasetPartDocument from the metadata store api."""
|
|
2
|
+
import json
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
from pydantic import ConfigDict
|
|
8
|
+
from pydantic import Field
|
|
9
|
+
from pydantic import field_serializer
|
|
10
|
+
from pydantic import field_validator
|
|
11
|
+
from pydantic import Json
|
|
12
|
+
from pydantic import PlainSerializer
|
|
13
|
+
from pydantic.alias_generators import to_camel
|
|
14
|
+
from typing_extensions import Annotated
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class InputDatasetBaseModel(BaseModel):
|
|
18
|
+
"""Custom BaseModel for input datasets."""
|
|
19
|
+
|
|
20
|
+
model_config = ConfigDict(
|
|
21
|
+
alias_generator=to_camel, validate_by_name=True, validate_by_alias=True
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
def model_dump(self, **kwargs) -> dict:
|
|
25
|
+
"""Dump models as they were in the metadata store."""
|
|
26
|
+
kwargs.setdefault("exclude_defaults", True)
|
|
27
|
+
kwargs.setdefault("by_alias", True) # will not be needed in Pydantic v3
|
|
28
|
+
return super().model_dump(**kwargs)
|
|
29
|
+
|
|
30
|
+
def model_dump_json(self, **kwargs) -> str:
|
|
31
|
+
"""Dump models as they were in the metadata store."""
|
|
32
|
+
kwargs.setdefault("exclude_defaults", True)
|
|
33
|
+
kwargs.setdefault("by_alias", True) # will not be needed in Pydantic v3
|
|
34
|
+
return super().model_dump_json(**kwargs)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class InputDatasetObject(InputDatasetBaseModel):
|
|
38
|
+
"""Input dataset object validator for a single file."""
|
|
39
|
+
|
|
40
|
+
bucket: str
|
|
41
|
+
object_key: str
|
|
42
|
+
tag: str | None = None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class InputDatasetFilePointer(InputDatasetBaseModel):
|
|
46
|
+
"""Wrapper for InputDatasetObject files."""
|
|
47
|
+
|
|
48
|
+
file_pointer: InputDatasetObject = Field(alias="__file__")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class InputDatasetParameterValue(InputDatasetBaseModel):
|
|
52
|
+
"""Input dataset parameter value validator."""
|
|
53
|
+
|
|
54
|
+
parameter_value_id: int
|
|
55
|
+
# parameter_value: Json[InputDatasetFilePointer] | Json[Any] # will work in gqlclient v2
|
|
56
|
+
parameter_value: Json[Any]
|
|
57
|
+
parameter_value_start_date: Annotated[
|
|
58
|
+
datetime, Field(default=datetime(1, 1, 1)), PlainSerializer(lambda x: x.isoformat())
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
@field_validator("parameter_value", mode="after")
|
|
62
|
+
@classmethod
|
|
63
|
+
def validate_parameter_value(cls, param_val):
|
|
64
|
+
"""Decode and provide additional validation for parameter_value types."""
|
|
65
|
+
match param_val:
|
|
66
|
+
case {"__file__": _}:
|
|
67
|
+
return InputDatasetFilePointer.model_validate(param_val)
|
|
68
|
+
case _:
|
|
69
|
+
return param_val
|
|
70
|
+
|
|
71
|
+
@field_serializer("parameter_value")
|
|
72
|
+
def serialize_parameter_value(self, param_val):
|
|
73
|
+
"""Serialize the parameter_value types."""
|
|
74
|
+
if isinstance(param_val, InputDatasetBaseModel):
|
|
75
|
+
return json.dumps(param_val.model_dump())
|
|
76
|
+
return json.dumps(param_val)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class InputDatasetParameter(InputDatasetBaseModel):
|
|
80
|
+
"""Parsing of the inputDatasetPartDocument that is relevant for parameters."""
|
|
81
|
+
|
|
82
|
+
parameter_name: str
|
|
83
|
+
parameter_values: list[InputDatasetParameterValue]
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def input_dataset_objects(self) -> list[InputDatasetObject]:
|
|
87
|
+
"""Find and return list of InputDatasetObjects."""
|
|
88
|
+
object_list = []
|
|
89
|
+
for param in self.parameter_values:
|
|
90
|
+
if isinstance(param.parameter_value, InputDatasetFilePointer):
|
|
91
|
+
object_list.append(param.parameter_value.file_pointer)
|
|
92
|
+
return object_list
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class InputDatasetFrames(InputDatasetBaseModel):
|
|
96
|
+
"""Parsing of the inputDatasetPartDocument that is relevant for frames."""
|
|
97
|
+
|
|
98
|
+
bucket: str
|
|
99
|
+
object_keys: list[str] = Field(alias="object_keys") # not camel case in metadata store
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def input_dataset_objects(self) -> list[InputDatasetObject]:
|
|
103
|
+
"""Convert a single bucket and a list of object_keys list into a list of InputDatasetObjects."""
|
|
104
|
+
object_list = []
|
|
105
|
+
for frame in self.object_keys:
|
|
106
|
+
object_list.append(InputDatasetObject(bucket=self.bucket, object_key=frame))
|
|
107
|
+
return object_list
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class InputDatasetPartDocumentList(InputDatasetBaseModel):
|
|
111
|
+
"""List of either InputDatasetFrames or InputDatasetParameter objects."""
|
|
112
|
+
|
|
113
|
+
doc_list: list[InputDatasetFrames] | list[InputDatasetParameter] = Field(alias="doc_list")
|
|
@@ -1,14 +1,23 @@
|
|
|
1
1
|
"""Base class for parameter-parsing object."""
|
|
2
2
|
import logging
|
|
3
|
+
from contextlib import contextmanager
|
|
3
4
|
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
4
6
|
from typing import Any
|
|
7
|
+
from typing import Callable
|
|
5
8
|
from typing import Literal
|
|
6
9
|
|
|
7
10
|
import numpy as np
|
|
8
11
|
import scipy.interpolate as spi
|
|
9
|
-
from astropy.io import fits
|
|
10
12
|
|
|
11
|
-
from dkist_processing_common.
|
|
13
|
+
from dkist_processing_common._util.scratch import WorkflowFileSystem
|
|
14
|
+
from dkist_processing_common.codecs.array import array_decoder
|
|
15
|
+
from dkist_processing_common.codecs.basemodel import basemodel_decoder
|
|
16
|
+
from dkist_processing_common.codecs.fits import fits_array_decoder
|
|
17
|
+
from dkist_processing_common.models.input_dataset import InputDatasetFilePointer
|
|
18
|
+
from dkist_processing_common.models.input_dataset import InputDatasetPartDocumentList
|
|
19
|
+
from dkist_processing_common.models.tags import Tag
|
|
20
|
+
|
|
12
21
|
|
|
13
22
|
logger = logging.getLogger(__name__)
|
|
14
23
|
|
|
@@ -24,9 +33,9 @@ class ParameterBase:
|
|
|
24
33
|
|
|
25
34
|
To use in an instrument pipeline a subclass is required. Here's a simple, but complete example::
|
|
26
35
|
|
|
27
|
-
class InstParameters(ParameterBase)
|
|
28
|
-
def __init__(self,
|
|
29
|
-
super().__init__(
|
|
36
|
+
class InstParameters(ParameterBase):
|
|
37
|
+
def __init__(self, scratch, some_other_parameters):
|
|
38
|
+
super().__init__(scratch=scratch)
|
|
30
39
|
self._thing = self._some_function(some_other_parameters)
|
|
31
40
|
|
|
32
41
|
@property
|
|
@@ -34,7 +43,7 @@ class ParameterBase:
|
|
|
34
43
|
return self._find_most_recent_past_value("some_parameter_name")
|
|
35
44
|
|
|
36
45
|
@property
|
|
37
|
-
def
|
|
46
|
+
def complicated_parameter(self):
|
|
38
47
|
return self._some_complicated_parsing_function("complicated_parameter_name", another_argument)
|
|
39
48
|
|
|
40
49
|
|
|
@@ -55,15 +64,16 @@ class ParameterBase:
|
|
|
55
64
|
workflow_version=workflow_version,
|
|
56
65
|
)
|
|
57
66
|
|
|
58
|
-
self.parameters = InstParameters(self.
|
|
67
|
+
self.parameters = InstParameters(scratch=self.scratch) #<------ This is the important line
|
|
59
68
|
|
|
60
|
-
|
|
61
|
-
|
|
69
|
+
ParameterBase needs the task scratch in order to read the parameters document written at input dataset
|
|
70
|
+
transfer. Note that the first argument to the ConstantsSubclass will *always* be scratch, but additional
|
|
71
|
+
arguments can be passed if the subclass requires them.
|
|
62
72
|
|
|
63
73
|
Parameters
|
|
64
74
|
----------
|
|
65
|
-
|
|
66
|
-
The
|
|
75
|
+
scratch
|
|
76
|
+
The task scratch WorkflowFileSystem instance
|
|
67
77
|
|
|
68
78
|
obs_ip_start_time
|
|
69
79
|
A string containing the start date of the Observe IP task type frames. Must be in isoformat.
|
|
@@ -74,25 +84,53 @@ class ParameterBase:
|
|
|
74
84
|
|
|
75
85
|
def __init__(
|
|
76
86
|
self,
|
|
77
|
-
|
|
87
|
+
scratch: WorkflowFileSystem,
|
|
78
88
|
obs_ip_start_time: str | None = None,
|
|
79
89
|
**kwargs,
|
|
80
90
|
):
|
|
91
|
+
self.scratch = scratch
|
|
92
|
+
input_dataset_parameter_model = self._get_parameters_doc_from_file()
|
|
93
|
+
input_dataset_parameters = {}
|
|
94
|
+
if input_dataset_parameter_model is not None:
|
|
95
|
+
input_dataset_parameters = {
|
|
96
|
+
p.parameter_name: p.parameter_values for p in input_dataset_parameter_model.doc_list
|
|
97
|
+
}
|
|
81
98
|
self.input_dataset_parameters = input_dataset_parameters
|
|
99
|
+
|
|
82
100
|
if obs_ip_start_time is not None:
|
|
83
101
|
# Specifically `not None` because we want to error normally on badly formatted strings (including "").
|
|
84
102
|
self._obs_ip_start_datetime = datetime.fromisoformat(obs_ip_start_time)
|
|
85
103
|
else:
|
|
86
104
|
logger.info(
|
|
87
105
|
"WARNING: "
|
|
88
|
-
"The task containing this parameters object did not provide an obs ip start time
|
|
89
|
-
"
|
|
106
|
+
"The task containing this parameters object did not provide an obs ip start time, "
|
|
107
|
+
"which really only makes sense for Parsing tasks."
|
|
90
108
|
)
|
|
91
109
|
|
|
92
110
|
for parent_class in self.__class__.__bases__:
|
|
93
111
|
if hasattr(parent_class, "is_param_mixin"):
|
|
94
112
|
parent_class.__init__(self, **kwargs)
|
|
95
113
|
|
|
114
|
+
def _read_parameter_file(
|
|
115
|
+
self, tag: str, decoder: Callable[[Path], Any], **decoder_kwargs
|
|
116
|
+
) -> Any:
|
|
117
|
+
"""Read any file in the task scratch instance."""
|
|
118
|
+
paths = list(self.scratch.find_all(tags=tag))
|
|
119
|
+
if len(paths) == 0:
|
|
120
|
+
logger.info(f"WARNING: There is no parameter file for {tag = }")
|
|
121
|
+
if len(paths) == 1:
|
|
122
|
+
return decoder(paths[0], **decoder_kwargs)
|
|
123
|
+
if len(paths) > 1:
|
|
124
|
+
raise ValueError(f"There is more than one parameter file for {tag = }: {paths}")
|
|
125
|
+
|
|
126
|
+
def _get_parameters_doc_from_file(self) -> InputDatasetPartDocumentList:
|
|
127
|
+
"""Get parameters doc saved at the TransferL0Data task."""
|
|
128
|
+
tag = Tag.input_dataset_parameters()
|
|
129
|
+
parameters_from_file = self._read_parameter_file(
|
|
130
|
+
tag=tag, decoder=basemodel_decoder, model=InputDatasetPartDocumentList
|
|
131
|
+
)
|
|
132
|
+
return parameters_from_file
|
|
133
|
+
|
|
96
134
|
def _find_most_recent_past_value(
|
|
97
135
|
self,
|
|
98
136
|
parameter_name: str,
|
|
@@ -113,20 +151,19 @@ class ParameterBase:
|
|
|
113
151
|
)
|
|
114
152
|
return result
|
|
115
153
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
return
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
return result
|
|
154
|
+
def _load_param_value_from_fits(
|
|
155
|
+
self, param_obj: InputDatasetFilePointer, hdu: int = 0
|
|
156
|
+
) -> np.ndarray:
|
|
157
|
+
"""Return the data associated with a tagged parameter file saved in FITS format."""
|
|
158
|
+
tag = param_obj.file_pointer.tag
|
|
159
|
+
param_value = self._read_parameter_file(tag=tag, decoder=fits_array_decoder, hdu=hdu)
|
|
160
|
+
return param_value
|
|
161
|
+
|
|
162
|
+
def _load_param_value_from_numpy_save(self, param_obj: InputDatasetFilePointer) -> np.ndarray:
|
|
163
|
+
"""Return the data associated with a tagged parameter file saved in numpy format."""
|
|
164
|
+
tag = param_obj.file_pointer.tag
|
|
165
|
+
param_value = self._read_parameter_file(tag=tag, decoder=array_decoder)
|
|
166
|
+
return param_value
|
|
130
167
|
|
|
131
168
|
|
|
132
169
|
class _ParamMixinBase:
|
|
@@ -210,16 +210,19 @@ class MetadataStoreMixin:
|
|
|
210
210
|
self, part_type: Literal["observe_frames", "calibration_frames", "parameters"]
|
|
211
211
|
) -> InputDatasetPartResponse:
|
|
212
212
|
"""Get the input dataset part by input dataset part type name."""
|
|
213
|
-
|
|
213
|
+
part_types_found = set()
|
|
214
|
+
input_dataset_part = None
|
|
214
215
|
parts = (
|
|
215
216
|
self.metadata_store_input_dataset_recipe_run.recipeInstance.inputDataset.inputDatasetInputDatasetParts
|
|
216
217
|
)
|
|
217
218
|
for part in parts:
|
|
218
219
|
part_type_name = part.inputDatasetPart.inputDatasetPartType.inputDatasetPartTypeName
|
|
219
|
-
if part_type_name in
|
|
220
|
+
if part_type_name in part_types_found:
|
|
220
221
|
raise ValueError(f"Multiple input dataset parts found for {part_type_name=}.")
|
|
221
|
-
|
|
222
|
-
|
|
222
|
+
part_types_found.add(part_type_name)
|
|
223
|
+
if part_type_name == part_type:
|
|
224
|
+
input_dataset_part = part.inputDatasetPart
|
|
225
|
+
return input_dataset_part
|
|
223
226
|
|
|
224
227
|
@property
|
|
225
228
|
def metadata_store_input_dataset_observe_frames(self) -> InputDatasetPartResponse:
|
|
@@ -2,35 +2,44 @@
|
|
|
2
2
|
import logging
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
|
-
from dkist_processing_common.codecs.
|
|
5
|
+
from dkist_processing_common.codecs.basemodel import basemodel_decoder
|
|
6
|
+
from dkist_processing_common.codecs.basemodel import basemodel_encoder
|
|
7
|
+
from dkist_processing_common.models.input_dataset import InputDatasetObject
|
|
8
|
+
from dkist_processing_common.models.input_dataset import InputDatasetPartDocumentList
|
|
6
9
|
from dkist_processing_common.models.tags import Tag
|
|
7
10
|
from dkist_processing_common.tasks.base import WorkflowTaskBase
|
|
8
11
|
from dkist_processing_common.tasks.mixin.globus import GlobusMixin
|
|
9
12
|
from dkist_processing_common.tasks.mixin.globus import GlobusTransferItem
|
|
10
|
-
|
|
11
|
-
from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetObject
|
|
13
|
+
|
|
12
14
|
|
|
13
15
|
__all__ = ["TransferL0Data"]
|
|
14
16
|
|
|
15
17
|
logger = logging.getLogger(__name__)
|
|
16
18
|
|
|
17
19
|
|
|
18
|
-
class TransferL0Data(WorkflowTaskBase, GlobusMixin
|
|
20
|
+
class TransferL0Data(WorkflowTaskBase, GlobusMixin):
|
|
19
21
|
"""Transfers Level 0 data and required parameter files to the scratch store."""
|
|
20
22
|
|
|
21
23
|
def download_input_dataset(self):
|
|
22
|
-
"""
|
|
23
|
-
if
|
|
24
|
-
|
|
25
|
-
self.write(
|
|
26
|
-
if
|
|
27
|
-
|
|
24
|
+
"""Write the input dataset part documents to scratch with appropriate tags."""
|
|
25
|
+
if observe_frames_part := self.metadata_store_input_dataset_observe_frames:
|
|
26
|
+
doc = observe_frames_part.inputDatasetPartDocument
|
|
27
|
+
self.write(data=doc, tags=Tag.input_dataset_observe_frames(), encoder=basemodel_encoder)
|
|
28
|
+
if calibration_frames_part := self.metadata_store_input_dataset_calibration_frames:
|
|
29
|
+
doc = calibration_frames_part.inputDatasetPartDocument
|
|
28
30
|
self.write(
|
|
29
|
-
|
|
31
|
+
data=doc, tags=Tag.input_dataset_calibration_frames(), encoder=basemodel_encoder
|
|
30
32
|
)
|
|
31
|
-
if
|
|
32
|
-
|
|
33
|
-
self.
|
|
33
|
+
if parameters_part := self.metadata_store_input_dataset_parameters:
|
|
34
|
+
doc = parameters_part.inputDatasetPartDocument
|
|
35
|
+
self.add_file_tags_to_parameters_doc(param_doc=doc)
|
|
36
|
+
self.write(data=doc, tags=Tag.input_dataset_parameters(), encoder=basemodel_encoder)
|
|
37
|
+
|
|
38
|
+
def add_file_tags_to_parameters_doc(self, param_doc: InputDatasetPartDocumentList):
|
|
39
|
+
"""Update the input dataset document with the location of the file parameters."""
|
|
40
|
+
for doc_item in param_doc.doc_list:
|
|
41
|
+
for obj in doc_item.input_dataset_objects:
|
|
42
|
+
obj.tag = Tag.parameter(Path(obj.object_key).name)
|
|
34
43
|
|
|
35
44
|
def format_transfer_items(
|
|
36
45
|
self, input_dataset_objects: list[InputDatasetObject]
|
|
@@ -49,77 +58,59 @@ class TransferL0Data(WorkflowTaskBase, GlobusMixin, InputDatasetMixin):
|
|
|
49
58
|
)
|
|
50
59
|
return transfer_items
|
|
51
60
|
|
|
52
|
-
def
|
|
61
|
+
def build_transfer_list(self, doc_tag: str) -> list[InputDatasetObject]:
|
|
53
62
|
"""Format the list of frames as transfer items to be used by globus."""
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
"""
|
|
73
|
-
scratch_items = [
|
|
74
|
-
self.scratch.scratch_base_path / ti.destination_path for ti in transfer_items
|
|
75
|
-
]
|
|
76
|
-
for si in scratch_items:
|
|
77
|
-
self.tag(si, tags=[Tag.input(), Tag.frame()])
|
|
78
|
-
|
|
79
|
-
def tag_parameter_objects(self, transfer_items: list[GlobusTransferItem]) -> None:
|
|
80
|
-
"""
|
|
81
|
-
Tag all the parameter files with 'parameter'.
|
|
82
|
-
|
|
83
|
-
Parameters
|
|
84
|
-
----------
|
|
85
|
-
transfer_items
|
|
86
|
-
List of items to be tagged
|
|
87
|
-
|
|
88
|
-
Returns
|
|
89
|
-
-------
|
|
90
|
-
None
|
|
91
|
-
"""
|
|
92
|
-
scratch_items = [
|
|
93
|
-
self.scratch.scratch_base_path / ti.destination_path for ti in transfer_items
|
|
94
|
-
]
|
|
95
|
-
for si in scratch_items:
|
|
96
|
-
self.tag(si, tags=[Tag.parameter(si.name)])
|
|
63
|
+
doc = next(
|
|
64
|
+
self.read(tags=doc_tag, decoder=basemodel_decoder, model=InputDatasetPartDocumentList),
|
|
65
|
+
None,
|
|
66
|
+
)
|
|
67
|
+
doc_list = doc.doc_list if doc else []
|
|
68
|
+
input_dataset_objects = []
|
|
69
|
+
for doc_item in doc_list:
|
|
70
|
+
input_dataset_objects += doc_item.input_dataset_objects
|
|
71
|
+
return input_dataset_objects
|
|
72
|
+
|
|
73
|
+
def tag_transfer_objects(self, input_dataset_objects: list[InputDatasetObject]) -> None:
|
|
74
|
+
"""Tag all the transferred input files."""
|
|
75
|
+
for obj in input_dataset_objects:
|
|
76
|
+
obj_path = self.scratch.absolute_path(obj.object_key)
|
|
77
|
+
if obj.tag:
|
|
78
|
+
self.tag(obj_path, tags=obj.tag)
|
|
79
|
+
else:
|
|
80
|
+
self.tag(obj_path, tags=[Tag.input(), Tag.frame()])
|
|
97
81
|
|
|
98
82
|
def run(self) -> None:
|
|
99
83
|
"""Execute the data transfer."""
|
|
100
84
|
with self.apm_task_step("Change Status to InProgress"):
|
|
101
85
|
self.metadata_store_change_recipe_run_to_inprogress()
|
|
102
86
|
|
|
103
|
-
with self.apm_task_step("Download Input Dataset"):
|
|
87
|
+
with self.apm_task_step("Download Input Dataset Documents"):
|
|
104
88
|
self.download_input_dataset()
|
|
105
89
|
|
|
106
|
-
with self.apm_task_step("
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
90
|
+
with self.apm_task_step("Build Input Dataset Transfer List"):
|
|
91
|
+
observe_transfer_objects = self.build_transfer_list(
|
|
92
|
+
doc_tag=Tag.input_dataset_observe_frames()
|
|
93
|
+
)
|
|
94
|
+
calibration_transfer_objects = self.build_transfer_list(
|
|
95
|
+
doc_tag=Tag.input_dataset_calibration_frames()
|
|
96
|
+
)
|
|
97
|
+
parameter_transfer_objects = self.build_transfer_list(
|
|
98
|
+
doc_tag=Tag.input_dataset_parameters()
|
|
99
|
+
)
|
|
100
|
+
transfer_objects = (
|
|
101
|
+
observe_transfer_objects + calibration_transfer_objects + parameter_transfer_objects
|
|
102
|
+
)
|
|
103
|
+
if len(observe_transfer_objects + calibration_transfer_objects) == 0:
|
|
104
|
+
raise ValueError("No input dataset frames found to transfer")
|
|
113
105
|
|
|
114
106
|
with self.apm_task_step("Transfer Input Frames and Parameter Files via Globus"):
|
|
115
107
|
self.globus_transfer_object_store_to_scratch(
|
|
116
|
-
transfer_items=
|
|
117
|
-
label=f"Transfer
|
|
108
|
+
transfer_items=self.format_transfer_items(input_dataset_objects=transfer_objects),
|
|
109
|
+
label=f"Transfer Input Objects for Recipe Run {self.recipe_run_id}",
|
|
118
110
|
)
|
|
119
111
|
|
|
120
112
|
with self.apm_processing_step("Tag Input Frames and Parameter Files"):
|
|
121
|
-
self.
|
|
122
|
-
self.tag_parameter_objects(transfer_items=parameter_transfer_items)
|
|
113
|
+
self.tag_transfer_objects(input_dataset_objects=transfer_objects)
|
|
123
114
|
|
|
124
115
|
def rollback(self):
|
|
125
116
|
"""Warn that depending on the progress of the task all data may not be removed because it hadn't been tagged."""
|