PyPI - dkist-processing-common - Versions diffs - 10.8.1__py3-none-any.whl → 10.8.1rc1__py3-none-any.whl - Mend

dkist-processing-common 10.8.1py3-none-any.whl → 10.8.1rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

changelog/235.feature.rst ADDED Viewed

@@ -0,0 +1,3 @@
+Add two new codecs:  Basemodel codecs are used for encoding and decoding Pydantic BaseModel objects.  For decoding, the intended model
+is passed to the decoder through a keyword argument in the task read method.  Array codecs are used for encoding and decoding numpy
+arrays similar to the standard np.load() and np.save(), but with the task tag-based write method.

changelog/235.misc.1.rst ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ Change the behavior of ParameterBase such that it takes the task scratch as an argument to provide access to the
2	+ parameter document and parameters that are files. This behavior replaces the input dataset mixin parameter read method.

changelog/235.misc.rst ADDED Viewed

	@@ -0,0 +1 @@
1	+ Remove the input_dataset mixin and replace it with input_dataset Pydantic BaseModel models.

dkist_processing_common/codecs/array.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""Encoder/decoder for writing/reading numpy arrays."""
+import io
+from pathlib import Path
+import numpy as np
+from dkist_processing_common.codecs.iobase import iobase_encoder
+def array_encoder(data: np.ndarray, **np_kwargs) -> bytes:
+    """Convert a numpy array to bytes compatible with np.load()."""
+    buffer = io.BytesIO()
+    np.save(buffer, data, **np_kwargs)
+    return iobase_encoder(buffer)
+def array_decoder(path: Path, **np_kwargs) -> np.ndarray:
+    """Return the data in the file as a numpy array using np.load()."""
+    return np.load(path, **np_kwargs)

dkist_processing_common/codecs/basemodel.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Encoder/decoder for writing and reading Pydantic BaseModel objects."""
+from pathlib import Path
+from typing import Type
+from pydantic import BaseModel
+from dkist_processing_common.codecs.bytes import bytes_decoder
+from dkist_processing_common.codecs.str import str_encoder
+def basemodel_encoder(data: BaseModel, **basemodel_kwargs) -> bytes:
+    """Convert a Pydantic BaseModel object into bytes for writing to file."""
+    data_dump = data.model_dump_json(**basemodel_kwargs)
+    return str_encoder(data_dump)
+def basemodel_decoder(path: Path, model: Type[BaseModel], **basemodel_kwargs) -> BaseModel:
+    """Return the data in the file as a Pydantic BaseModel object."""
+    data = bytes_decoder(path)
+    model_validated = model.model_validate_json(data, **basemodel_kwargs)
+    return model_validated

dkist_processing_common/codecs/fits.py CHANGED Viewed

@@ -30,15 +30,15 @@ def fits_hdulist_encoder(hdu_list: fits.HDUList) -> bytes:
     return iobase_encoder(file_obj)
-def fits_hdu_decoder(path: Path) -> fits.PrimaryHDU | fits.CompImageHDU:
+def fits_hdu_decoder(path: Path, hdu: int | None = None) -> fits.PrimaryHDU | fits.CompImageHDU:
     """Read a Path with `fits` to produce an `HDUList`."""
     hdu_list = fits.open(path, checksum=True)
-    return _extract_hdu(hdu_list)
+    return _extract_hdu(hdu_list, hdu)
-def fits_array_decoder(path: Path, auto_squeeze: bool = True) -> np.ndarray:
+def fits_array_decoder(path: Path, hdu: int | None = None, auto_squeeze: bool = True) -> np.ndarray:
     """Read a Path with `fits` and return the `.data` property."""
-    hdu = fits_hdu_decoder(path)
+    hdu = fits_hdu_decoder(path, hdu=hdu)
     data = hdu.data
     # This conditional is explicitly to catch summit data with a dummy first axis for WCS
@@ -56,8 +56,14 @@ def fits_access_decoder(
     return fits_access_class(hdu=hdu, name=str(path), **fits_access_kwargs)
-def _extract_hdu(hdul: fits.HDUList) -> fits.PrimaryHDU | fits.CompImageHDU:
-    """Return the fits hdu associated with the data in the hdu list."""
+def _extract_hdu(hdul: fits.HDUList, hdu: int | None = None) -> fits.PrimaryHDU | fits.CompImageHDU:
+    """
+    Return the fits hdu associated with the data in the hdu list.
+    Only search down the hdu index for the data if the hdu index is not explicitly provided.
+    """
+    if hdu is not None:
+        return hdul[hdu]
     if hdul[0].data is not None:
         return hdul[0]
     return hdul[1]

dkist_processing_common/manual.py CHANGED Viewed

@@ -2,15 +2,13 @@
 import json
 import logging
 import shutil
-from dataclasses import asdict
-from io import BytesIO
 from pathlib import Path
 from typing import Callable
 from unittest.mock import patch
 from dkist_processing_core.task import TaskBase
-from dkist_processing_common.codecs.json import json_encoder
+from dkist_processing_common.codecs.basemodel import basemodel_encoder
 from dkist_processing_common.models.graphql import RecipeRunProvenanceMutation
 from dkist_processing_common.models.tags import Tag
 from dkist_processing_common.tasks.base import WorkflowTaskBase
@@ -182,8 +180,8 @@ def writing_metadata_store_record_provenance(self, is_task_manual: bool, library
         workflowVersion=self.workflow_version,
     )
     self.write(
-        data=params.model_dump(),
-        encoder=json_encoder,
+        data=params,
+        encoder=basemodel_encoder,
         tags=["PROVENANCE_RECORD"],
         relative_path=f"{self.task_name}_provenance.json",
         overwrite=True,

dkist_processing_common/models/graphql.py CHANGED Viewed

@@ -3,6 +3,9 @@ from pydantic import BaseModel
 from pydantic import field_validator
 from pydantic import Json
+from dkist_processing_common.models.input_dataset import InputDatasetBaseModel
+from dkist_processing_common.models.input_dataset import InputDatasetPartDocumentList
 class RecipeRunMutation(BaseModel):
     """Recipe run mutation record."""
@@ -37,13 +40,19 @@ class InputDatasetPartTypeResponse(BaseModel):
     inputDatasetPartTypeName: str
-class InputDatasetPartResponse(BaseModel):
+class InputDatasetPartResponse(InputDatasetBaseModel):
     """Response class for the input dataset part entity."""
     inputDatasetPartId: int
-    inputDatasetPartDocument: Json[list[dict]]
+    # inputDatasetPartDocument : Json[InputDatasetPartDocumentList] # will work in gqlclient v2
+    inputDatasetPartDocument: Json[list]
     inputDatasetPartType: InputDatasetPartTypeResponse
+    @field_validator("inputDatasetPartDocument", mode="after")
+    @classmethod
+    def _use_frame_or_parameter_model(cls, value_list):  # not needed for gqlclient v2
+        return InputDatasetPartDocumentList(doc_list=value_list)
 class InputDatasetInputDatasetPartResponse(BaseModel):
     """Response class for the join entity between input datasets and input dataset parts."""
@@ -103,11 +112,12 @@ class RecipeRunResponse(BaseModel):
     recipeInstance: RecipeInstanceResponse
     recipeInstanceId: int
     recipeRunProvenances: list[RecipeRunProvenanceResponse]
+    # configuration: Json[RecipeRunConfiguration] | None # will work in gqlclient v2
     configuration: Json[dict] | None
     @field_validator("configuration", mode="after")
     @classmethod
-    def _use_recipe_run_configuration_model(cls, value):
+    def _use_recipe_run_configuration_model(cls, value):  # not needed for gqlclient v2
         if value is None:
             return RecipeRunConfiguration()
         return RecipeRunConfiguration.model_validate(value)

dkist_processing_common/models/input_dataset.py ADDED Viewed

@@ -0,0 +1,113 @@
+"""Input dataset models for the inputDatasetPartDocument from the metadata store api."""
+import json
+from datetime import datetime
+from typing import Any
+from pydantic import BaseModel
+from pydantic import ConfigDict
+from pydantic import Field
+from pydantic import field_serializer
+from pydantic import field_validator
+from pydantic import Json
+from pydantic import PlainSerializer
+from pydantic.alias_generators import to_camel
+from typing_extensions import Annotated
+class InputDatasetBaseModel(BaseModel):
+    """Custom BaseModel for input datasets."""
+    model_config = ConfigDict(
+        alias_generator=to_camel, validate_by_name=True, validate_by_alias=True
+    )
+    def model_dump(self, **kwargs) -> dict:
+        """Dump models as they were in the metadata store."""
+        kwargs.setdefault("exclude_defaults", True)
+        kwargs.setdefault("by_alias", True)  # will not be needed in Pydantic v3
+        return super().model_dump(**kwargs)
+    def model_dump_json(self, **kwargs) -> str:
+        """Dump models as they were in the metadata store."""
+        kwargs.setdefault("exclude_defaults", True)
+        kwargs.setdefault("by_alias", True)  # will not be needed in Pydantic v3
+        return super().model_dump_json(**kwargs)
+class InputDatasetObject(InputDatasetBaseModel):
+    """Input dataset object validator for a single file."""
+    bucket: str
+    object_key: str
+    tag: str | None = None
+class InputDatasetFilePointer(InputDatasetBaseModel):
+    """Wrapper for InputDatasetObject files."""
+    file_pointer: InputDatasetObject = Field(alias="__file__")
+class InputDatasetParameterValue(InputDatasetBaseModel):
+    """Input dataset parameter value validator."""
+    parameter_value_id: int
+    # parameter_value: Json[InputDatasetFilePointer] | Json[Any] # will work in gqlclient v2
+    parameter_value: Json[Any]
+    parameter_value_start_date: Annotated[
+        datetime, Field(default=datetime(1, 1, 1)), PlainSerializer(lambda x: x.isoformat())
+    ]
+    @field_validator("parameter_value", mode="after")
+    @classmethod
+    def validate_parameter_value(cls, param_val):
+        """Decode and provide additional validation for parameter_value types."""
+        match param_val:
+            case {"__file__": _}:
+                return InputDatasetFilePointer.model_validate(param_val)
+            case _:
+                return param_val
+    @field_serializer("parameter_value")
+    def serialize_parameter_value(self, param_val):
+        """Serialize the parameter_value types."""
+        if isinstance(param_val, InputDatasetBaseModel):
+            return json.dumps(param_val.model_dump())
+        return json.dumps(param_val)
+class InputDatasetParameter(InputDatasetBaseModel):
+    """Parsing of the inputDatasetPartDocument that is relevant for parameters."""
+    parameter_name: str
+    parameter_values: list[InputDatasetParameterValue]
+    @property
+    def input_dataset_objects(self) -> list[InputDatasetObject]:
+        """Find and return list of InputDatasetObjects."""
+        object_list = []
+        for param in self.parameter_values:
+            if isinstance(param.parameter_value, InputDatasetFilePointer):
+                object_list.append(param.parameter_value.file_pointer)
+        return object_list
+class InputDatasetFrames(InputDatasetBaseModel):
+    """Parsing of the inputDatasetPartDocument that is relevant for frames."""
+    bucket: str
+    object_keys: list[str] = Field(alias="object_keys")  # not camel case in metadata store
+    @property
+    def input_dataset_objects(self) -> list[InputDatasetObject]:
+        """Convert a single bucket and a list of object_keys list into a list of InputDatasetObjects."""
+        object_list = []
+        for frame in self.object_keys:
+            object_list.append(InputDatasetObject(bucket=self.bucket, object_key=frame))
+        return object_list
+class InputDatasetPartDocumentList(InputDatasetBaseModel):
+    """List of either InputDatasetFrames or InputDatasetParameter objects."""
+    doc_list: list[InputDatasetFrames] | list[InputDatasetParameter] = Field(alias="doc_list")

dkist_processing_common/models/parameters.py CHANGED Viewed

@@ -1,14 +1,23 @@
 """Base class for parameter-parsing object."""
 import logging
+from contextlib import contextmanager
 from datetime import datetime
+from pathlib import Path
 from typing import Any
+from typing import Callable
 from typing import Literal
 import numpy as np
 import scipy.interpolate as spi
-from astropy.io import fits
-from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetParameterValue
+from dkist_processing_common._util.scratch import WorkflowFileSystem
+from dkist_processing_common.codecs.array import array_decoder
+from dkist_processing_common.codecs.basemodel import basemodel_decoder
+from dkist_processing_common.codecs.fits import fits_array_decoder
+from dkist_processing_common.models.input_dataset import InputDatasetFilePointer
+from dkist_processing_common.models.input_dataset import InputDatasetPartDocumentList
+from dkist_processing_common.models.tags import Tag
 logger = logging.getLogger(__name__)
@@ -24,9 +33,9 @@ class ParameterBase:
     To use in an instrument pipeline a subclass is required. Here's a simple, but complete example::
-        class InstParameters(ParameterBase)
-            def __init__(self, input_dataset_parameters, some_other_parameter):
-                super().__init__(input_dataset_parameters)
+        class InstParameters(ParameterBase):
+            def __init__(self, scratch, some_other_parameters):
+                super().__init__(scratch=scratch)
                 self._thing = self._some_function(some_other_parameters)
             @property
@@ -34,7 +43,7 @@ class ParameterBase:
                 return self._find_most_recent_past_value("some_parameter_name")
             @property
-            def complicate_parameter(self):
+            def complicated_parameter(self):
                 return self._some_complicated_parsing_function("complicated_parameter_name", another_argument)
@@ -55,15 +64,16 @@ class ParameterBase:
                     workflow_version=workflow_version,
                 )
-                self.parameters = InstParameters(self.input_dataset_parameters)  #<------ This is the important line
+                self.parameters = InstParameters(scratch=self.scratch)  #<------ This is the important line
-    Note that the first argument to the ConstantsSubclass with *always* be self.input_dataset_parameters, but
-    additional argument can be passed if the subclass requires them.
+    ParameterBase needs the task scratch in order to read the parameters document written at input dataset
+    transfer.  Note that the first argument to the ConstantsSubclass will *always* be scratch, but additional
+    arguments can be passed if the subclass requires them.
     Parameters
     ----------
-    input_dataset_parameters
-        The input parameters
+     scratch
+        The task scratch WorkflowFileSystem instance
     obs_ip_start_time
         A string containing the start date of the Observe IP task type frames. Must be in isoformat.
@@ -74,25 +84,53 @@ class ParameterBase:
     def __init__(
         self,
-        input_dataset_parameters: dict[str, list[InputDatasetParameterValue]],
+        scratch: WorkflowFileSystem,
         obs_ip_start_time: str | None = None,
         **kwargs,
     ):
+        self.scratch = scratch
+        input_dataset_parameter_model = self._get_parameters_doc_from_file()
+        input_dataset_parameters = {}
+        if input_dataset_parameter_model is not None:
+            input_dataset_parameters = {
+                p.parameter_name: p.parameter_values for p in input_dataset_parameter_model.doc_list
+            }
         self.input_dataset_parameters = input_dataset_parameters
         if obs_ip_start_time is not None:
             # Specifically `not None` because we want to error normally on badly formatted strings (including "").
             self._obs_ip_start_datetime = datetime.fromisoformat(obs_ip_start_time)
         else:
             logger.info(
                 "WARNING: "
-                "The task containing this parameters object did not provide an obs ip start time. "
-                "This really only makes sense for Parsing tasks."
+                "The task containing this parameters object did not provide an obs ip start time, "
+                "which really only makes sense for Parsing tasks."
             )
         for parent_class in self.__class__.__bases__:
             if hasattr(parent_class, "is_param_mixin"):
                 parent_class.__init__(self, **kwargs)
+    def _read_parameter_file(
+        self, tag: str, decoder: Callable[[Path], Any], **decoder_kwargs
+    ) -> Any:
+        """Read any file in the task scratch instance."""
+        paths = list(self.scratch.find_all(tags=tag))
+        if len(paths) == 0:
+            logger.info(f"WARNING: There is no parameter file for {tag = }")
+        if len(paths) == 1:
+            return decoder(paths[0], **decoder_kwargs)
+        if len(paths) > 1:
+            raise ValueError(f"There is more than one parameter file for {tag = }: {paths}")
+    def _get_parameters_doc_from_file(self) -> InputDatasetPartDocumentList:
+        """Get parameters doc saved at the TransferL0Data task."""
+        tag = Tag.input_dataset_parameters()
+        parameters_from_file = self._read_parameter_file(
+            tag=tag, decoder=basemodel_decoder, model=InputDatasetPartDocumentList
+        )
+        return parameters_from_file
     def _find_most_recent_past_value(
         self,
         parameter_name: str,
@@ -113,20 +151,19 @@ class ParameterBase:
             )
         return result
-    @staticmethod
-    def _load_param_value_from_fits(param_dict: dict, hdu: int = 0) -> np.ndarray:
-        """Load a numpy array from a parameter pointing to a FITS file."""
-        file_path = param_dict["param_path"]
-        hdul = fits.open(file_path)
-        return hdul[hdu].data
-    @staticmethod
-    def _load_param_value_from_numpy_save(param_dict: dict) -> np.ndarray:
-        """Return the data associated with a parameter file saved in numpy format."""
-        file_path = param_dict["param_path"]
-        result = np.load(file_path)
-        return result
+    def _load_param_value_from_fits(
+        self, param_obj: InputDatasetFilePointer, hdu: int = 0
+    ) -> np.ndarray:
+        """Return the data associated with a tagged parameter file saved in FITS format."""
+        tag = param_obj.file_pointer.tag
+        param_value = self._read_parameter_file(tag=tag, decoder=fits_array_decoder, hdu=hdu)
+        return param_value
+    def _load_param_value_from_numpy_save(self, param_obj: InputDatasetFilePointer) -> np.ndarray:
+        """Return the data associated with a tagged parameter file saved in numpy format."""
+        tag = param_obj.file_pointer.tag
+        param_value = self._read_parameter_file(tag=tag, decoder=array_decoder)
+        return param_value
 class _ParamMixinBase:

dkist_processing_common/tasks/mixin/metadata_store.py CHANGED Viewed

@@ -210,16 +210,19 @@ class MetadataStoreMixin:
         self, part_type: Literal["observe_frames", "calibration_frames", "parameters"]
     ) -> InputDatasetPartResponse:
         """Get the input dataset part by input dataset part type name."""
-        part_type_dict = {}
+        part_types_found = set()
+        input_dataset_part = None
         parts = (
             self.metadata_store_input_dataset_recipe_run.recipeInstance.inputDataset.inputDatasetInputDatasetParts
         )
         for part in parts:
             part_type_name = part.inputDatasetPart.inputDatasetPartType.inputDatasetPartTypeName
-            if part_type_name in part_type_dict.keys():
+            if part_type_name in part_types_found:
                 raise ValueError(f"Multiple input dataset parts found for {part_type_name=}.")
-            part_type_dict[part_type_name] = part.inputDatasetPart
-        return part_type_dict.get(part_type)
+            part_types_found.add(part_type_name)
+            if part_type_name == part_type:
+                input_dataset_part = part.inputDatasetPart
+        return input_dataset_part
     @property
     def metadata_store_input_dataset_observe_frames(self) -> InputDatasetPartResponse:

dkist_processing_common/tasks/transfer_input_data.py CHANGED Viewed

@@ -2,35 +2,44 @@
 import logging
 from pathlib import Path
-from dkist_processing_common.codecs.json import json_encoder
+from dkist_processing_common.codecs.basemodel import basemodel_decoder
+from dkist_processing_common.codecs.basemodel import basemodel_encoder
+from dkist_processing_common.models.input_dataset import InputDatasetObject
+from dkist_processing_common.models.input_dataset import InputDatasetPartDocumentList
 from dkist_processing_common.models.tags import Tag
 from dkist_processing_common.tasks.base import WorkflowTaskBase
 from dkist_processing_common.tasks.mixin.globus import GlobusMixin
 from dkist_processing_common.tasks.mixin.globus import GlobusTransferItem
-from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetMixin
-from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetObject
 __all__ = ["TransferL0Data"]
 logger = logging.getLogger(__name__)
-class TransferL0Data(WorkflowTaskBase, GlobusMixin, InputDatasetMixin):
+class TransferL0Data(WorkflowTaskBase, GlobusMixin):
     """Transfers Level 0 data and required parameter files to the scratch store."""
     def download_input_dataset(self):
-        """Get the input dataset document parts and save it to scratch with the appropriate tags."""
-        if observe_frames := self.metadata_store_input_dataset_observe_frames:
-            observe_doc = observe_frames.inputDatasetPartDocument
-            self.write(observe_doc, tags=Tag.input_dataset_observe_frames(), encoder=json_encoder)
-        if calibration_frames := self.metadata_store_input_dataset_calibration_frames:
-            calibration_doc = calibration_frames.inputDatasetPartDocument
+        """Write the input dataset part documents to scratch with appropriate tags."""
+        if observe_frames_part := self.metadata_store_input_dataset_observe_frames:
+            doc = observe_frames_part.inputDatasetPartDocument
+            self.write(data=doc, tags=Tag.input_dataset_observe_frames(), encoder=basemodel_encoder)
+        if calibration_frames_part := self.metadata_store_input_dataset_calibration_frames:
+            doc = calibration_frames_part.inputDatasetPartDocument
             self.write(
-                calibration_doc, tags=Tag.input_dataset_calibration_frames(), encoder=json_encoder
+                data=doc, tags=Tag.input_dataset_calibration_frames(), encoder=basemodel_encoder
             )
-        if parameters := self.metadata_store_input_dataset_parameters:
-            parameters_doc = parameters.inputDatasetPartDocument
-            self.write(parameters_doc, tags=Tag.input_dataset_parameters(), encoder=json_encoder)
+        if parameters_part := self.metadata_store_input_dataset_parameters:
+            doc = parameters_part.inputDatasetPartDocument
+            self.add_file_tags_to_parameters_doc(param_doc=doc)
+            self.write(data=doc, tags=Tag.input_dataset_parameters(), encoder=basemodel_encoder)
+    def add_file_tags_to_parameters_doc(self, param_doc: InputDatasetPartDocumentList):
+        """Update the input dataset document with the location of the file parameters."""
+        for doc_item in param_doc.doc_list:
+            for obj in doc_item.input_dataset_objects:
+                obj.tag = Tag.parameter(Path(obj.object_key).name)
     def format_transfer_items(
         self, input_dataset_objects: list[InputDatasetObject]
@@ -49,77 +58,59 @@ class TransferL0Data(WorkflowTaskBase, GlobusMixin, InputDatasetMixin):
             )
         return transfer_items
-    def format_frame_transfer_items(self) -> list[GlobusTransferItem]:
+    def build_transfer_list(self, doc_tag: str) -> list[InputDatasetObject]:
         """Format the list of frames as transfer items to be used by globus."""
-        return self.format_transfer_items(self.input_dataset_frames)
-    def format_parameter_transfer_items(self) -> list[GlobusTransferItem]:
-        """Format the list of parameter objects as transfer items to be used by globus."""
-        return self.format_transfer_items(self.input_dataset_parameter_objects)
-    def tag_input_frames(self, transfer_items: list[GlobusTransferItem]) -> None:
-        """
-        Tag all the input files with 'frame' and 'input' tags.
-        Parameters
-        ----------
-        transfer_items
-            List of items to be tagged
-        Returns
-        -------
-        None
-        """
-        scratch_items = [
-            self.scratch.scratch_base_path / ti.destination_path for ti in transfer_items
-        ]
-        for si in scratch_items:
-            self.tag(si, tags=[Tag.input(), Tag.frame()])
-    def tag_parameter_objects(self, transfer_items: list[GlobusTransferItem]) -> None:
-        """
-        Tag all the parameter files with 'parameter'.
-        Parameters
-        ----------
-        transfer_items
-            List of items to be tagged
-        Returns
-        -------
-        None
-        """
-        scratch_items = [
-            self.scratch.scratch_base_path / ti.destination_path for ti in transfer_items
-        ]
-        for si in scratch_items:
-            self.tag(si, tags=[Tag.parameter(si.name)])
+        doc = next(
+            self.read(tags=doc_tag, decoder=basemodel_decoder, model=InputDatasetPartDocumentList),
+            None,
+        )
+        doc_list = doc.doc_list if doc else []
+        input_dataset_objects = []
+        for doc_item in doc_list:
+            input_dataset_objects += doc_item.input_dataset_objects
+        return input_dataset_objects
+    def tag_transfer_objects(self, input_dataset_objects: list[InputDatasetObject]) -> None:
+        """Tag all the transferred input files."""
+        for obj in input_dataset_objects:
+            obj_path = self.scratch.absolute_path(obj.object_key)
+            if obj.tag:
+                self.tag(obj_path, tags=obj.tag)
+            else:
+                self.tag(obj_path, tags=[Tag.input(), Tag.frame()])
     def run(self) -> None:
         """Execute the data transfer."""
         with self.apm_task_step("Change Status to InProgress"):
             self.metadata_store_change_recipe_run_to_inprogress()
-        with self.apm_task_step("Download Input Dataset"):
+        with self.apm_task_step("Download Input Dataset Documents"):
             self.download_input_dataset()
-        with self.apm_task_step("Format Frame Transfer Items"):
-            frame_transfer_items = self.format_frame_transfer_items()
-            if not frame_transfer_items:
-                raise ValueError("No input dataset frames found")
-        with self.apm_task_step("Format Parameter Transfer Items"):
-            parameter_transfer_items = self.format_parameter_transfer_items()
+        with self.apm_task_step("Build Input Dataset Transfer List"):
+            observe_transfer_objects = self.build_transfer_list(
+                doc_tag=Tag.input_dataset_observe_frames()
+            )
+            calibration_transfer_objects = self.build_transfer_list(
+                doc_tag=Tag.input_dataset_calibration_frames()
+            )
+            parameter_transfer_objects = self.build_transfer_list(
+                doc_tag=Tag.input_dataset_parameters()
+            )
+            transfer_objects = (
+                observe_transfer_objects + calibration_transfer_objects + parameter_transfer_objects
+            )
+            if len(observe_transfer_objects + calibration_transfer_objects) == 0:
+                raise ValueError("No input dataset frames found to transfer")
         with self.apm_task_step("Transfer Input Frames and Parameter Files via Globus"):
             self.globus_transfer_object_store_to_scratch(
-                transfer_items=frame_transfer_items + parameter_transfer_items,
-                label=f"Transfer Inputs for Recipe Run {self.recipe_run_id}",
+                transfer_items=self.format_transfer_items(input_dataset_objects=transfer_objects),
+                label=f"Transfer Input Objects for Recipe Run {self.recipe_run_id}",
             )
         with self.apm_processing_step("Tag Input Frames and Parameter Files"):
-            self.tag_input_frames(transfer_items=frame_transfer_items)
-            self.tag_parameter_objects(transfer_items=parameter_transfer_items)
+            self.tag_transfer_objects(input_dataset_objects=transfer_objects)
     def rollback(self):
         """Warn that depending on the progress of the task all data may not be removed because it hadn't been tagged."""

dkist-processing-common 10.8.1__py3-none-any.whl → 10.8.1rc1__py3-none-any.whl

dkist-processing-common 10.8.1py3-none-any.whl → 10.8.1rc1py3-none-any.whl