PyPI - dkist-processing-common - Versions diffs - 10.6.1rc3__py3-none-any.whl → 10.6.1rc4__py3-none-any.whl - Mend

dkist-processing-common 10.6.1rc3py3-none-any.whl → 10.6.1rc4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

changelog/236.misc.1.rst CHANGED Viewed

@@ -1 +1,2 @@
-Change returns from the metadata store queries into Pydantic BaseModel instances.  Remove unnecessary parsing and error checking.
+Change returns from the metadata store queries into Pydantic BaseModel instances.  Remove unnecessary parsing
+and error checking in the metadata store mixin.

changelog/236.misc.rst CHANGED Viewed

@@ -1 +1,3 @@
-Convert dataclasses in the graphql model to Pydantic BaseModels for additional validation. In the RecipeRunResponse class, configuration is now returned as a dictionary. In the InputDatasetPartResponse class, inputDatasetPartDocument is now returned as a list of dictionaries.
+Convert dataclasses in the graphql model to Pydantic BaseModels for additional validation. In the
+RecipeRunResponse class, configuration is converted from a JSON dictionary to its own Pydantic BaseModel.
+In the InputDatasetPartResponse class, the inputDatasetPartDocument is now returned as a list of dictionaries.

dkist_processing_common/models/graphql.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """GraphQL Data models for the metadata store api."""
 from pydantic import BaseModel
-from pydantic import Field
+from pydantic import field_validator
 from pydantic import Json
@@ -85,13 +85,32 @@ class RecipeRunProvenanceResponse(BaseModel):
     isTaskManual: bool
+class RecipeRunConfiguration(BaseModel):
+    """Response class for a recipe run configuration dictionary."""
+    validate_l1_on_write: bool = True
+    destination_bucket: str = "data"
+    tile_size: int | None = None
+    trial_directory_name: str | None = None
+    trial_root_directory_name: str | None = None
+    teardown_enabled: bool = True
+    trial_exclusive_transfer_tag_lists: list[str] | None = None
 class RecipeRunResponse(BaseModel):
     """Recipe run query response."""
     recipeInstance: RecipeInstanceResponse
     recipeInstanceId: int
     recipeRunProvenances: list[RecipeRunProvenanceResponse]
-    configuration: Json[dict] | None = Field(default_factory=dict)
+    configuration: Json[RecipeRunConfiguration] | None
+    @field_validator("configuration", mode="after")
+    @classmethod
+    def _use_default_configuration_model(cls, value):
+        if value is None:
+            return RecipeRunConfiguration()
+        return value
 class RecipeRunMutationResponse(BaseModel):

dkist_processing_common/tasks/mixin/metadata_store.py CHANGED Viewed

@@ -4,6 +4,8 @@ import logging
 from functools import cached_property
 from typing import Literal
+from pydantic import validate_call
 from dkist_processing_common._util.graphql import GraphQLClient
 from dkist_processing_common.codecs.quality import QualityDataEncoder
 from dkist_processing_common.config import common_configurations
@@ -61,6 +63,7 @@ class MetadataStoreMixin:
         if len(response) > 0:
             return response[0].recipeRunStatusId
+    @validate_call
     def _metadata_store_create_recipe_run_status(self, status: str, is_complete: bool) -> int:
         """
         Add a new recipe run status to the db.
@@ -75,10 +78,6 @@ class MetadataStoreMixin:
             "marked complete.",
         }
-        if not isinstance(status, str):
-            raise TypeError(f"status must be of type str: {status}")
-        if not isinstance(is_complete, bool):
-            raise TypeError(f"is_complete must be of type bool: {is_complete}")
         params = RecipeRunStatusMutation(
             recipeRunStatusName=status,
             isComplete=is_complete,
@@ -197,7 +196,7 @@ class MetadataStoreMixin:
     # INPUT DATASET RECIPE RUN
     @cached_property
-    def metadata_store_input_dataset_recipe_run_response(self) -> InputDatasetRecipeRunResponse:
+    def metadata_store_input_dataset_recipe_run(self) -> InputDatasetRecipeRunResponse:
         """Get the input dataset recipe run response from the metadata store."""
         params = RecipeRunQuery(recipeRunId=self.recipe_run_id)
         response = self.metadata_store_client.execute_gql_query(
@@ -213,7 +212,7 @@ class MetadataStoreMixin:
         """Get the input dataset part by input dataset part type name."""
         part_type_dict = {}
         parts = (
-            self.metadata_store_input_dataset_recipe_run_response.recipeInstance.inputDataset.inputDatasetInputDatasetParts
+            self.metadata_store_input_dataset_recipe_run.recipeInstance.inputDataset.inputDatasetInputDatasetParts
         )
         for part in parts:
             part_type_name = part.inputDatasetPart.inputDatasetPartType.inputDatasetPartTypeName

dkist_processing_common/tasks/output_data_base.py CHANGED Viewed

@@ -19,7 +19,7 @@ class OutputDataBase(WorkflowTaskBase, ABC):
     @cached_property
     def destination_bucket(self) -> str:
         """Get the destination bucket."""
-        return self.metadata_store_recipe_run.configuration.get("destination_bucket", "data")
+        return self.metadata_store_recipe_run.configuration.destination_bucket
     def format_object_key(self, path: Path) -> str:
         """

dkist_processing_common/tasks/teardown.py CHANGED Viewed

@@ -22,7 +22,7 @@ class TeardownBase(WorkflowTaskBase, ABC):
     @property
     def teardown_enabled(self) -> bool:
         """Recipe run configuration indicating if data should be removed at the end of a run."""
-        return self.metadata_store_recipe_run.configuration.get("teardown_enabled", True)
+        return self.metadata_store_recipe_run.configuration.teardown_enabled
     def run(self) -> None:
         """Run method for Teardown class."""

dkist_processing_common/tasks/transfer_input_data.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import logging
 from pathlib import Path
+from dkist_processing_common.codecs.json import json_encoder
 from dkist_processing_common.models.tags import Tag
 from dkist_processing_common.tasks.base import WorkflowTaskBase
 from dkist_processing_common.tasks.mixin.globus import GlobusMixin
@@ -20,11 +21,11 @@ class TransferL0Data(WorkflowTaskBase, GlobusMixin, InputDatasetMixin):
     def download_input_dataset(self):
         """Get the input dataset document parts and save it to scratch with the appropriate tags."""
         if doc := self.metadata_store_input_dataset_observe_frames.inputDatasetPartDocument:
-            self.write(doc.encode("utf-8"), tags=Tag.input_dataset_observe_frames())
+            self.write(doc, tags=Tag.input_dataset_observe_frames(), encoder=json_encoder)
         if doc := self.metadata_store_input_dataset_calibration_frames.inputDatasetPartDocument:
-            self.write(doc.encode("utf-8"), tags=Tag.input_dataset_calibration_frames())
+            self.write(doc, tags=Tag.input_dataset_calibration_frames(), encoder=json_encoder)
         if doc := self.metadata_store_input_dataset_parameters.inputDatasetPartDocument:
-            self.write(doc.encode("utf-8"), tags=Tag.input_dataset_parameters())
+            self.write(doc, tags=Tag.input_dataset_parameters(), encoder=json_encoder)
     def format_transfer_items(
         self, input_dataset_objects: list[InputDatasetObject]

dkist_processing_common/tasks/trial_output_data.py CHANGED Viewed

@@ -43,25 +43,23 @@ class TransferTrialData(TransferDataBase, GlobusMixin):
     @cached_property
     def destination_bucket(self) -> str:
-        """Get the destination bucket with a trial default."""
-        return self.metadata_store_recipe_run.configuration.get("destination_bucket", "etc")
+        """Get the destination bucket."""
+        return self.metadata_store_recipe_run.configuration.destination_bucket
     @property
     def destination_root_folder(self) -> Path:
         """Format the destination root folder with a value that can be set in the recipe run configuration."""
-        root_name_from_configuration = self.metadata_store_recipe_run.configuration.get(
-            "trial_root_directory_name"
+        root_name_from_config = (
+            self.metadata_store_recipe_run.configuration.trial_root_directory_name
         )
-        root_name = Path(root_name_from_configuration or super().destination_root_folder)
+        root_name = Path(root_name_from_config or super().destination_root_folder)
         return root_name
     @property
     def destination_folder(self) -> Path:
         """Format the destination folder with a parent that can be set by the recipe run configuration."""
-        dir_name = self.metadata_store_recipe_run.configuration.get("trial_directory_name") or Path(
-            self.constants.dataset_id
-        )
+        dir_name_from_config = self.metadata_store_recipe_run.configuration.trial_directory_name
+        dir_name = dir_name_from_config or Path(self.constants.dataset_id)
         return self.destination_root_folder / dir_name
     @property
@@ -71,9 +69,12 @@ class TransferTrialData(TransferDataBase, GlobusMixin):
         Defaults to transferring all product files.  Setting `trial_exclusive_transfer_tag_lists` in the
         recipe run configuration to a list of tag lists will override the default.
         """
-        return self.metadata_store_recipe_run.configuration.get(
-            "trial_exclusive_transfer_tag_lists", self.default_transfer_tag_lists
+        tag_list_from_config = (
+            self.metadata_store_recipe_run.configuration.trial_exclusive_transfer_tag_lists
         )
+        if tag_list_from_config is not None:
+            return tag_list_from_config
+        return self.default_transfer_tag_lists
     @property
     def output_frame_tag_list(self) -> list[list[str]]:

dkist_processing_common/tasks/write_l1.py CHANGED Viewed

@@ -105,14 +105,14 @@ class WriteL1Frame(WorkflowTaskBase, MetadataStoreMixin, ABC):
                     spec214_validator.validate(self.scratch.absolute_path(relative_path))
     @cached_property
-    def tile_size_param(self) -> int:
+    def tile_size_param(self) -> int | None:
         """Get the tile size parameter for compression."""
-        return self.metadata_store_recipe_run.configuration.get("tile_size", None)
+        return self.metadata_store_recipe_run.configuration.tile_size
     @cached_property
     def validate_l1_on_write(self) -> bool:
         """Check for validate on write."""
-        return self.metadata_store_recipe_run.configuration.get("validate_l1_on_write", True)
+        return self.metadata_store_recipe_run.configuration.validate_l1_on_write
     @cached_property
     def workflow_had_manual_intervention(self):

dkist_processing_common/tests/conftest.py CHANGED Viewed

@@ -333,6 +333,79 @@ def max_cs_step_time_sec() -> float:
 class FakeGQLClient:
+    observe_frames_doc_object = [
+        {
+            "bucket": uuid4().hex[:6],
+            "object_keys": [Path(uuid4().hex[:6]).as_posix() for _ in range(3)],
+        }
+    ]
+    calibration_frames_doc_object = [
+        {
+            "bucket": uuid4().hex[:6],
+            "object_keys": [Path(uuid4().hex[:6]).as_posix() for _ in range(3)],
+        },
+        {
+            "bucket": uuid4().hex[:6],
+            "object_keys": [Path(uuid4().hex[:6]).as_posix() for _ in range(3)],
+        },
+    ]
+    parameters_doc_object = [
+        {
+            "parameterName": "param_name_1",
+            "parameterValues": [
+                {
+                    "parameterValueId": 1,
+                    "parameterValue": json.dumps([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
+                    "parameterValueStartDate": "2000-01-01",
+                }
+            ],
+        },
+        {
+            "parameterName": "param_name_2",
+            "parameterValues": [
+                {
+                    "parameterValueId": 2,
+                    "parameterValue": json.dumps(
+                        {
+                            "__file__": {
+                                "bucket": "data",
+                                "objectKey": f"parameters/param_name/{uuid4().hex}.dat",
+                            }
+                        }
+                    ),
+                    "parameterValueStartDate": "2000-01-01",
+                },
+                {
+                    "parameterValueId": 3,
+                    "parameterValue": json.dumps(
+                        {
+                            "__file__": {
+                                "bucket": "data",
+                                "objectKey": f"parameters/param_name/{uuid4().hex}.dat",
+                            }
+                        }
+                    ),
+                    "parameterValueStartDate": "2000-01-02",
+                },
+            ],
+        },
+        {
+            "parameterName": "param_name_4",
+            "parameterValues": [
+                {
+                    "parameterValueId": 4,
+                    "parameterValue": json.dumps(
+                        {"a": 1, "b": 3.14159, "c": "foo", "d": [1, 2, 3]}
+                    ),
+                    "parameterValueStartDate": "2000-01-01",
+                }
+            ],
+        },
+    ]
     def __init__(self, *args, **kwargs):
         pass
@@ -352,7 +425,9 @@ class FakeGQLClient:
                                     InputDatasetInputDatasetPartResponse(
                                         inputDatasetPart=InputDatasetPartResponse(
                                             inputDatasetPartId=1,
-                                            inputDatasetPartDocument='[{"parameterName": "", "parameterValues": [{"parameterValueId": 1, "parameterValue": "[[1,2,3],[4,5,6],[7,8,9]]", "parameterValueStartDate": "1/1/2000"}]}]',
+                                            inputDatasetPartDocument=json.dumps(
+                                                self.parameters_doc_object
+                                            ),
                                             inputDatasetPartType=InputDatasetPartTypeResponse(
                                                 inputDatasetPartTypeName="parameters"
                                             ),
@@ -361,15 +436,9 @@ class FakeGQLClient:
                                     InputDatasetInputDatasetPartResponse(
                                         inputDatasetPart=InputDatasetPartResponse(
                                             inputDatasetPartId=2,
-                                            inputDatasetPartDocument="""[
-                                            {
-                                                "bucket": "bucket_name",
-                                                "object_keys": [
-                                                    "key1",
-                                                    "key2"
-                                                ]
-                                            }
-                                        ]""",
+                                            inputDatasetPartDocument=json.dumps(
+                                                self.observe_frames_doc_object
+                                            ),
                                             inputDatasetPartType=InputDatasetPartTypeResponse(
                                                 inputDatasetPartTypeName="observe_frames"
                                             ),
@@ -378,15 +447,9 @@ class FakeGQLClient:
                                     InputDatasetInputDatasetPartResponse(
                                         inputDatasetPart=InputDatasetPartResponse(
                                             inputDatasetPartId=3,
-                                            inputDatasetPartDocument="""[
-                                            {
-                                                "bucket": "bucket_name",
-                                                "object_keys": [
-                                                    "key3",
-                                                    "key4"
-                                                ]
-                                            }
-                                        ]""",
+                                            inputDatasetPartDocument=json.dumps(
+                                                self.calibration_frames_doc_object
+                                            ),
                                             inputDatasetPartType=InputDatasetPartTypeResponse(
                                                 inputDatasetPartTypeName="calibration_frames"
                                             ),
@@ -417,14 +480,6 @@ class FakeGQLClient:
         ...
-class FakeGQLClientNoRecipeConfiguration(FakeGQLClient):
-    def execute_gql_query(self, **kwargs):
-        response = super().execute_gql_query(**kwargs)
-        if type(response[0]) == RecipeRunResponse:
-            response[0].configuration = {}
-        return response
 # All the following stuff is copied from dkist-processing-pac
 def compute_telgeom(time_hst: Time):
     dkist_lon = (156 + 15 / 60.0 + 21.7 / 3600.0) * (-1)
@@ -774,43 +829,21 @@ def task_with_input_dataset(
         yield task
-def create_parameter_files(task: WorkflowTaskBase, expected_parameters: dict):
+def create_parameter_files(
+    task: WorkflowTaskBase, parameters_doc: list[dict] = FakeGQLClient.parameters_doc_object
+):
     """
-    Create the parameter files required by the task.
-    Parameters
-    ----------
-    task
-        The task associated with these parameters
-    expected_parameters
-        A dict of parameters with the format shown below
-    Returns
-    -------
-    None
-    expected_parameters is a dict with the parameter names as the keys
-    and the values are a list of value dicts for each parameter:
-    expected_parameters =
-        { 'parameter_name_1': [param_dict_1, param_dict_2, ...],
-          'parameter_name_2': [param_dict_1, param_dict_2, ...],
-           ...
-        }
-    where the param_dicts have the following format:
-    sample_param_dict =
-        { "parameterValueId": <param_id>,
-          "parameterValue": <param_value>,
-          "parameterValueStartDate": <start_date>
-        }
+    Create the parameter files specified in the parameters document returned by the metadata store.
+    This fixture assumes that the JSON parameters document has already been loaded into a python
+    structure, but the parameter values themselves are still JSON.
     """
-    # Loop over all the parameter values. Each value is a list of parameterValue dicts
-    for expected_parameter_values in expected_parameters.values():
-        for value_dict in expected_parameter_values:
-            if "__file__" not in value_dict["parameterValue"]:
+    for parameter in parameters_doc:
+        for value in parameter["parameterValues"]:
+            if "__file__" not in value["parameterValue"]:
                 continue
-            value = json.loads(value_dict["parameterValue"])
-            param_path = value["__file__"]["objectKey"]
+            parameter_value = json.loads(value["parameterValue"])
+            param_path = parameter_value["__file__"]["objectKey"]
             file_path = task.scratch.workflow_base_path / Path(param_path)
             if not file_path.parent.exists():
                 file_path.parent.mkdir(parents=True, exist_ok=True)

dkist_processing_common/tests/test_input_dataset.py CHANGED Viewed

@@ -308,26 +308,8 @@ def test_input_dataset_parameters(
     task = task_with_input_dataset
     doc_part, _ = input_dataset_parts
     doc_part = doc_part or []  # None case parsing of expected values
-    """
-    expected_parameters is a dict with the parameter names as the keys
-    and the values are a list of value dicts for each parameter:
-    expected_parameters =
-        { 'parameter_name_1': [param_dict_1, param_dict_2, ...],
-          'parameter_name_2': [param_dict_1, param_dict_2, ...],
-           ...
-        }
-    where the param_dicts have the following format:
-    sample_param_dict =
-        { "parameterValueId": <param_id>,
-          "parameterValue": <param_value>,
-          "parameterValueStartDate": <start_date>
-        }
-    """
-    expected_parameters = dict()
-    for item in doc_part:
-        expected_parameters[item["parameterName"]] = item["parameterValues"]
-    create_parameter_files(task, expected_parameters)
-    # key is param name, values is list of InputDatasetParameterValue objects
+    create_parameter_files(task, doc_part)
+    expected_parameters = {item["parameterName"]: item["parameterValues"] for item in doc_part}
     for key, values in task.input_dataset_parameters.items():
         assert key in expected_parameters
         expected_values = expected_parameters[key]

dkist_processing_common/tests/test_teardown.py CHANGED Viewed

@@ -18,13 +18,14 @@ class TeardownTest(Teardown):
 @pytest.fixture()
 def make_mock_GQL_with_configuration():
-    def class_generator(configuration: dict):
+    def class_generator(teardown_option: bool | None):
         class TeardownFakeGQLClient(FakeGQLClient):
             def execute_gql_query(self, **kwargs):
                 response = super().execute_gql_query(**kwargs)
                 if isinstance(response, list):
                     if isinstance(response[0], RecipeRunResponse):
-                        response[0].configuration = configuration
+                        if isinstance(teardown_option, bool):
+                            response[0].configuration.teardown_enabled = teardown_option
                 return response
         return TeardownFakeGQLClient
@@ -33,18 +34,18 @@ def make_mock_GQL_with_configuration():
 @pytest.fixture(scope="session")
-def config_with_teardown_enabled() -> dict:
-    return {"teardown_enabled": True}
+def teardown_enabled() -> bool:
+    return True
 @pytest.fixture(scope="session")
-def config_with_teardown_disabled() -> dict:
-    return {"teardown_enabled": False}
+def teardown_disabled() -> bool:
+    return False
 @pytest.fixture(scope="session")
-def config_with_no_teardown() -> dict:
-    return dict()
+def teardown_default() -> None:
+    return None
 @pytest.fixture(scope="function")
@@ -75,14 +76,14 @@ def teardown_task_factory(tmp_path, recipe_run_id):
 def test_purge_data(
-    teardown_task_factory, make_mock_GQL_with_configuration, config_with_teardown_enabled, mocker
+    teardown_task_factory, make_mock_GQL_with_configuration, teardown_enabled, mocker
 ):
     """
     :Given: A Teardown task with files and tags linked to it and teardown enabled
     :When: Running the task
     :Then: All the files are deleted and the tags are removed
     """
-    FakeGQLClass = make_mock_GQL_with_configuration(config_with_teardown_enabled)
+    FakeGQLClass = make_mock_GQL_with_configuration(teardown_enabled)
     mocker.patch(
         "dkist_processing_common.tasks.mixin.metadata_store.GraphQLClient", new=FakeGQLClass
     )
@@ -102,14 +103,14 @@ def test_purge_data(
 def test_purge_data_disabled(
-    teardown_task_factory, make_mock_GQL_with_configuration, config_with_teardown_disabled, mocker
+    teardown_task_factory, make_mock_GQL_with_configuration, teardown_disabled, mocker
 ):
     """
     :Given: A Teardown task with files and tags linked to it and teardown disabled
     :When: Running the task
     :Then: All the files are not deleted and the tags remain
     """
-    FakeGQLClass = make_mock_GQL_with_configuration(config_with_teardown_disabled)
+    FakeGQLClass = make_mock_GQL_with_configuration(teardown_disabled)
     mocker.patch(
         "dkist_processing_common.tasks.mixin.metadata_store.GraphQLClient", new=FakeGQLClass
     )
@@ -129,14 +130,14 @@ def test_purge_data_disabled(
 def test_purge_data_no_config(
-    teardown_task_factory, make_mock_GQL_with_configuration, config_with_no_teardown, mocker
+    teardown_task_factory, make_mock_GQL_with_configuration, teardown_default, mocker
 ):
     """
-    :Given: A Teardown task with files and tags linked and teardown not specified in the configuration
+    :Given: A Teardown task with files and tags linked and default teardown configuration
     :When: Running the task
     :Then: All the files are deleted and the tags are removed
     """
-    FakeGQLClass = make_mock_GQL_with_configuration(config_with_no_teardown)
+    FakeGQLClass = make_mock_GQL_with_configuration(teardown_default)
     mocker.patch(
         "dkist_processing_common.tasks.mixin.metadata_store.GraphQLClient", new=FakeGQLClass
     )

dkist-processing-common 10.6.1rc3__py3-none-any.whl → 10.6.1rc4__py3-none-any.whl

dkist-processing-common 10.6.1rc3py3-none-any.whl → 10.6.1rc4py3-none-any.whl