PyPI - dkist-processing-common - Versions diffs - 10.8.2__py3-none-any.whl → 10.8.4rc1__py3-none-any.whl - Mend

dkist-processing-common 10.8.2py3-none-any.whl → 10.8.4rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

dkist_processing_common/tasks/transfer_input_data.py CHANGED Viewed

@@ -2,35 +2,44 @@
 import logging
 from pathlib import Path
-from dkist_processing_common.codecs.json import json_encoder
+from dkist_processing_common.codecs.basemodel import basemodel_decoder
+from dkist_processing_common.codecs.basemodel import basemodel_encoder
+from dkist_processing_common.models.input_dataset import InputDatasetObject
+from dkist_processing_common.models.input_dataset import InputDatasetPartDocumentList
 from dkist_processing_common.models.tags import Tag
 from dkist_processing_common.tasks.base import WorkflowTaskBase
 from dkist_processing_common.tasks.mixin.globus import GlobusMixin
 from dkist_processing_common.tasks.mixin.globus import GlobusTransferItem
-from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetMixin
-from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetObject
 __all__ = ["TransferL0Data"]
 logger = logging.getLogger(__name__)
-class TransferL0Data(WorkflowTaskBase, GlobusMixin, InputDatasetMixin):
+class TransferL0Data(WorkflowTaskBase, GlobusMixin):
     """Transfers Level 0 data and required parameter files to the scratch store."""
     def download_input_dataset(self):
-        """Get the input dataset document parts and save it to scratch with the appropriate tags."""
-        if observe_frames := self.metadata_store_input_dataset_observe_frames:
-            observe_doc = observe_frames.inputDatasetPartDocument
-            self.write(observe_doc, tags=Tag.input_dataset_observe_frames(), encoder=json_encoder)
-        if calibration_frames := self.metadata_store_input_dataset_calibration_frames:
-            calibration_doc = calibration_frames.inputDatasetPartDocument
+        """Write the input dataset part documents to scratch with appropriate tags."""
+        if observe_frames_part := self.metadata_store_input_dataset_observe_frames:
+            doc = observe_frames_part.inputDatasetPartDocument
+            self.write(data=doc, tags=Tag.input_dataset_observe_frames(), encoder=basemodel_encoder)
+        if calibration_frames_part := self.metadata_store_input_dataset_calibration_frames:
+            doc = calibration_frames_part.inputDatasetPartDocument
             self.write(
-                calibration_doc, tags=Tag.input_dataset_calibration_frames(), encoder=json_encoder
+                data=doc, tags=Tag.input_dataset_calibration_frames(), encoder=basemodel_encoder
             )
-        if parameters := self.metadata_store_input_dataset_parameters:
-            parameters_doc = parameters.inputDatasetPartDocument
-            self.write(parameters_doc, tags=Tag.input_dataset_parameters(), encoder=json_encoder)
+        if parameters_part := self.metadata_store_input_dataset_parameters:
+            doc = parameters_part.inputDatasetPartDocument
+            self.add_file_tags_to_parameters_doc(param_doc=doc)
+            self.write(data=doc, tags=Tag.input_dataset_parameters(), encoder=basemodel_encoder)
+    def add_file_tags_to_parameters_doc(self, param_doc: InputDatasetPartDocumentList):
+        """Update the input dataset document with the location of the file parameters."""
+        for doc_item in param_doc.doc_list:
+            for obj in doc_item.input_dataset_objects:
+                obj.tag = Tag.parameter(Path(obj.object_key).name)
     def format_transfer_items(
         self, input_dataset_objects: list[InputDatasetObject]
@@ -49,77 +58,59 @@ class TransferL0Data(WorkflowTaskBase, GlobusMixin, InputDatasetMixin):
             )
         return transfer_items
-    def format_frame_transfer_items(self) -> list[GlobusTransferItem]:
+    def build_transfer_list(self, doc_tag: str) -> list[InputDatasetObject]:
         """Format the list of frames as transfer items to be used by globus."""
-        return self.format_transfer_items(self.input_dataset_frames)
-    def format_parameter_transfer_items(self) -> list[GlobusTransferItem]:
-        """Format the list of parameter objects as transfer items to be used by globus."""
-        return self.format_transfer_items(self.input_dataset_parameter_objects)
-    def tag_input_frames(self, transfer_items: list[GlobusTransferItem]) -> None:
-        """
-        Tag all the input files with 'frame' and 'input' tags.
-        Parameters
-        ----------
-        transfer_items
-            List of items to be tagged
-        Returns
-        -------
-        None
-        """
-        scratch_items = [
-            self.scratch.scratch_base_path / ti.destination_path for ti in transfer_items
-        ]
-        for si in scratch_items:
-            self.tag(si, tags=[Tag.input(), Tag.frame()])
-    def tag_parameter_objects(self, transfer_items: list[GlobusTransferItem]) -> None:
-        """
-        Tag all the parameter files with 'parameter'.
-        Parameters
-        ----------
-        transfer_items
-            List of items to be tagged
-        Returns
-        -------
-        None
-        """
-        scratch_items = [
-            self.scratch.scratch_base_path / ti.destination_path for ti in transfer_items
-        ]
-        for si in scratch_items:
-            self.tag(si, tags=[Tag.parameter(si.name)])
+        doc = next(
+            self.read(tags=doc_tag, decoder=basemodel_decoder, model=InputDatasetPartDocumentList),
+            None,
+        )
+        doc_list = doc.doc_list if doc else []
+        input_dataset_objects = []
+        for doc_item in doc_list:
+            input_dataset_objects += doc_item.input_dataset_objects
+        return input_dataset_objects
+    def tag_transfer_objects(self, input_dataset_objects: list[InputDatasetObject]) -> None:
+        """Tag all the transferred input files."""
+        for obj in input_dataset_objects:
+            obj_path = self.scratch.absolute_path(obj.object_key)
+            if obj.tag:
+                self.tag(obj_path, tags=obj.tag)
+            else:
+                self.tag(obj_path, tags=[Tag.input(), Tag.frame()])
     def run(self) -> None:
         """Execute the data transfer."""
         with self.apm_task_step("Change Status to InProgress"):
             self.metadata_store_change_recipe_run_to_inprogress()
-        with self.apm_task_step("Download Input Dataset"):
+        with self.apm_task_step("Download Input Dataset Documents"):
             self.download_input_dataset()
-        with self.apm_task_step("Format Frame Transfer Items"):
-            frame_transfer_items = self.format_frame_transfer_items()
-            if not frame_transfer_items:
-                raise ValueError("No input dataset frames found")
-        with self.apm_task_step("Format Parameter Transfer Items"):
-            parameter_transfer_items = self.format_parameter_transfer_items()
+        with self.apm_task_step("Build Input Dataset Transfer List"):
+            observe_transfer_objects = self.build_transfer_list(
+                doc_tag=Tag.input_dataset_observe_frames()
+            )
+            calibration_transfer_objects = self.build_transfer_list(
+                doc_tag=Tag.input_dataset_calibration_frames()
+            )
+            parameter_transfer_objects = self.build_transfer_list(
+                doc_tag=Tag.input_dataset_parameters()
+            )
+            transfer_objects = (
+                observe_transfer_objects + calibration_transfer_objects + parameter_transfer_objects
+            )
+            if len(observe_transfer_objects + calibration_transfer_objects) == 0:
+                raise ValueError("No input dataset frames found to transfer")
         with self.apm_task_step("Transfer Input Frames and Parameter Files via Globus"):
             self.globus_transfer_object_store_to_scratch(
-                transfer_items=frame_transfer_items + parameter_transfer_items,
-                label=f"Transfer Inputs for Recipe Run {self.recipe_run_id}",
+                transfer_items=self.format_transfer_items(input_dataset_objects=transfer_objects),
+                label=f"Transfer Input Objects for Recipe Run {self.recipe_run_id}",
             )
         with self.apm_processing_step("Tag Input Frames and Parameter Files"):
-            self.tag_input_frames(transfer_items=frame_transfer_items)
-            self.tag_parameter_objects(transfer_items=parameter_transfer_items)
+            self.tag_transfer_objects(input_dataset_objects=transfer_objects)
     def rollback(self):
         """Warn that depending on the progress of the task all data may not be removed because it hadn't been tagged."""

dkist_processing_common/tasks/write_l1.py CHANGED Viewed

@@ -29,6 +29,7 @@ from sunpy.coordinates import Helioprojective
 from dkist_processing_common.codecs.fits import fits_access_decoder
 from dkist_processing_common.codecs.fits import fits_hdulist_encoder
+from dkist_processing_common.models.fried_parameter import r0_valid
 from dkist_processing_common.models.tags import Tag
 from dkist_processing_common.models.wavelength import WavelengthRange
 from dkist_processing_common.parsers.l0_fits_access import L0FitsAccess
@@ -373,8 +374,14 @@ class WriteL1Frame(WorkflowTaskBase, MetadataStoreMixin, ABC):
         """
         # Replace header values in place
         header = self.replace_header_values(header=header, data=data)
-        # Remove r0 value if AO not locked
-        header = self.remove_invalid_r0_values(header=header)
+        # Remove r0 value if r0 conditions are not met
+        r0_is_valid = r0_valid(
+            r0=header["ATMOS_R0"],
+            ao_lock=header.get("AO_LOCK", None),
+            num_out_of_bounds_ao_values=header.get("OOBSHIFT", None),
+        )
+        if not r0_is_valid:
+            header.pop("ATMOS_R0", None)
         # Add the stats table
         header = self.add_stats_headers(header=header, data=data)
         # Add the datacenter table

dkist_processing_common/tests/conftest.py CHANGED Viewed

@@ -45,7 +45,6 @@ from dkist_processing_common.models.graphql import RecipeRunStatusResponse
 from dkist_processing_common.models.tags import Tag
 from dkist_processing_common.parsers.l0_fits_access import L0FitsAccess
 from dkist_processing_common.tasks import WorkflowTaskBase
-from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetMixin
 TILE_SIZE = 64
@@ -359,7 +358,7 @@ class FakeGQLClient:
                 {
                     "parameterValueId": 1,
                     "parameterValue": json.dumps([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
-                    "parameterValueStartDate": "2000-01-01",
+                    "parameterValueStartDate": datetime(2000, 1, 1).isoformat(),
                 }
             ],
         },
@@ -376,7 +375,7 @@ class FakeGQLClient:
                             }
                         }
                     ),
-                    "parameterValueStartDate": "2000-01-01",
+                    "parameterValueStartDate": datetime(2000, 1, 1).isoformat(),
                 },
                 {
                     "parameterValueId": 3,
@@ -388,7 +387,7 @@ class FakeGQLClient:
                             }
                         }
                     ),
-                    "parameterValueStartDate": "2000-01-02",
+                    "parameterValueStartDate": datetime(2000, 1, 2).isoformat(),
                 },
             ],
         },
@@ -400,7 +399,7 @@ class FakeGQLClient:
                     "parameterValue": json.dumps(
                         {"a": 1, "b": 3.14159, "c": "foo", "d": [1, 2, 3]}
                     ),
-                    "parameterValueStartDate": "2000-01-01",
+                    "parameterValueStartDate": datetime(2000, 1, 1).isoformat(),
                 }
             ],
         },
@@ -796,7 +795,7 @@ def post_fit_polcal_fitter(
     return fitter
-class InputDatasetTask(WorkflowTaskBase, InputDatasetMixin):
+class InputDatasetTask(WorkflowTaskBase):
     def run(self):
         pass
@@ -824,7 +823,7 @@ def task_with_input_dataset(
         task.scratch.workflow_base_path = tmp_path / str(recipe_run_id)
         for part, tag in input_dataset_parts:
             file_path = task.scratch.workflow_base_path / Path(f"{uuid4().hex[:6]}.ext")
-            file_path.write_text(data=json.dumps(part))
+            file_path.write_text(data=json.dumps({"doc_list": part}))
             task.tag(path=file_path, tags=tag)
         yield task
@@ -851,6 +850,24 @@ def create_parameter_files(
             task.tag(path=file_path, tags=Tag.parameter(param_path))
+def create_input_frames(
+    task: WorkflowTaskBase,
+    input_frame_docs: list[dict] = FakeGQLClient.observe_frames_doc_object
+    + FakeGQLClient.calibration_frames_doc_object,
+):
+    """
+    Create the observe and calibration frame files specified in the input dataset documents
+    returned by the metadata store.
+    """
+    for frame in input_frame_docs:
+        for object_key in frame["object_keys"]:
+            file_path = task.scratch.workflow_base_path / Path(object_key)
+            if not file_path.parent.exists():
+                file_path.parent.mkdir(parents=True, exist_ok=True)
+            file_path.write_text(data="")
+            task.tag(path=file_path, tags=[Tag.frame(), Tag.input()])
 @pytest.fixture()
 def fake_constants_db() -> dict:
     """

dkist_processing_common/tests/test_codecs.py CHANGED Viewed

@@ -19,10 +19,15 @@ from astropy.io.fits import CompImageHDU
 from astropy.io.fits import HDUList
 from astropy.io.fits import Header
 from astropy.io.fits import PrimaryHDU
+from pydantic import BaseModel
+from pydantic import create_model
+from pydantic import Field
 from dkist_processing_common.codecs.asdf import asdf_decoder
 from dkist_processing_common.codecs.asdf import asdf_encoder
 from dkist_processing_common.codecs.asdf import asdf_fileobj_encoder
+from dkist_processing_common.codecs.basemodel import basemodel_decoder
+from dkist_processing_common.codecs.basemodel import basemodel_encoder
 from dkist_processing_common.codecs.bytes import bytes_decoder
 from dkist_processing_common.codecs.bytes import bytes_encoder
 from dkist_processing_common.codecs.fits import fits_access_decoder
@@ -100,6 +105,14 @@ def path_to_json(dictionary, tmp_file) -> Path:
     return tmp_file
+@pytest.fixture
+def pydantic_basemodel() -> BaseModel:
+    class Foo(BaseModel):
+        bar: int
+    return Foo(bar=123)
 @pytest.fixture
 def string() -> str:
     return "string"
@@ -356,6 +369,7 @@ class DummyFitsAccess(FitsAccessBase):
         pytest.param("primary_hdu_list", fits_hdulist_encoder, id="fits uncompressed HDUList"),
         pytest.param("compressed_hdu_list", fits_hdulist_encoder, id="fits compressed HDUList"),
         pytest.param("dictionary", json_encoder, id="json"),
+        pytest.param("pydantic_basemodel", basemodel_encoder, id="pydantic basemodel"),
         pytest.param("string", str_encoder, id="str"),
         pytest.param("asdf_tree", asdf_encoder, id="asdf"),
         pytest.param("asdf_obj", asdf_fileobj_encoder, id="asdf_obj"),
@@ -600,6 +614,30 @@ def test_json_encoder_invalid(python_object: Any, expected_exception_type: type[
         json_encoder(python_object)
+def test_basemodel_decoder(valid_json_codec, path_to_text_file):
+    """
+    Given: a python object that can be validated to a Pydantic BaseModel object is written to file as json
+    When: basemodel decoding is applied to the json file
+    Then: the string gets decoded to the correct Pydantic BaseModel object
+    """
+    # write python object to file as json string
+    python_object = valid_json_codec["python_object"]
+    path = path_to_text_file(json.dumps({"foo": python_object}))
+    # create basemodel on the fly
+    DynamicBaseModel = create_model(
+        "DynamicBaseModel", foo=(Any, Field(default_factory=type(python_object)))
+    )
+    # get the same object via the basemodel decoder
+    decoded_obj = basemodel_decoder(path, model=DynamicBaseModel)
+    if python_object is nan:
+        # By definition, nan != nan
+        assert isnan(decoded_obj.foo)
+    else:
+        assert decoded_obj.foo == python_object
 def test_quality_data_encoder_valid(valid_quality_codec):
     """
     Given: a python object that can be encoded as a json string

dkist_processing_common/tests/test_fried_parameter.py ADDED Viewed

@@ -0,0 +1,27 @@
+import pytest
+from dkist_processing_common.models.fried_parameter import r0_valid
+@pytest.mark.parametrize(
+    "r0, ao_lock, oob_shift, should_r0_exist",
+    [
+        pytest.param(0.2, True, 17, True, id="AO_LOCK_True_good_R0_good_oob"),
+        pytest.param(1, True, 17, False, id="AO_LOCK_True_bad_R0_good_oob"),
+        pytest.param(0.2, False, 17, False, id="AO_LOCK_False_good_R0_good_oob"),
+        pytest.param(1, False, 17, False, id="AO_LOCK_False_bad_R0_good_oob"),
+        pytest.param(0.2, True, 150, False, id="AO_LOCK_True_good_R0_bad_oob"),
+        pytest.param(1, True, 150, False, id="AO_LOCK_True_bad_R0_bad_oob"),
+        pytest.param(0.2, False, 150, False, id="AO_LOCK_False_good_R0_bad_oob"),
+        pytest.param(1, False, 150, False, id="AO_LOCK_False_bad_R0_bad_oob"),
+        pytest.param(0.2, None, 17, False, id="AO_LOCK_missing"),
+        pytest.param(0.2, True, None, True, id="OOBSHIFT_missing"),
+    ],
+)
+def test_check_r0_valid(r0, ao_lock, oob_shift, should_r0_exist):
+    """
+    :Given: values for r0, the ao_lock status, and the ao out of bound shift value
+    :When: checking for a valid state to use r0
+    :Then: valid conditions are marked True, invalid conditions marked False
+    """
+    assert r0_valid(r0, ao_lock, oob_shift) == should_r0_exist

dkist-processing-common 10.8.2__py3-none-any.whl → 10.8.4rc1__py3-none-any.whl

dkist-processing-common 10.8.2py3-none-any.whl → 10.8.4rc1py3-none-any.whl