PyPI - dkist-processing-common - Versions diffs - 10.5.4__py3-none-any.whl → 12.1.0rc1__py3-none-any.whl - Mend

dkist-processing-common 10.5.4py3-none-any.whl → 12.1.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

changelog/280.misc.rst +1 -0
changelog/282.feature.2.rst +2 -0
changelog/282.feature.rst +2 -0
changelog/284.feature.rst +1 -0
changelog/285.feature.rst +2 -0
changelog/285.misc.rst +2 -0
changelog/286.feature.rst +2 -0
changelog/287.misc.rst +1 -0
dkist_processing_common/__init__.py +1 -0
dkist_processing_common/_util/constants.py +1 -0
dkist_processing_common/_util/graphql.py +1 -0
dkist_processing_common/_util/scratch.py +9 -9
dkist_processing_common/_util/tags.py +1 -0
dkist_processing_common/codecs/array.py +20 -0
dkist_processing_common/codecs/asdf.py +9 -3
dkist_processing_common/codecs/basemodel.py +22 -0
dkist_processing_common/codecs/bytes.py +1 -0
dkist_processing_common/codecs/fits.py +37 -9
dkist_processing_common/codecs/iobase.py +1 -0
dkist_processing_common/codecs/json.py +1 -0
dkist_processing_common/codecs/path.py +1 -0
dkist_processing_common/codecs/quality.py +1 -1
dkist_processing_common/codecs/str.py +1 -0
dkist_processing_common/config.py +64 -25
dkist_processing_common/manual.py +6 -8
dkist_processing_common/models/constants.py +373 -37
dkist_processing_common/models/dkist_location.py +27 -0
dkist_processing_common/models/fits_access.py +48 -0
dkist_processing_common/models/flower_pot.py +231 -9
dkist_processing_common/models/fried_parameter.py +41 -0
dkist_processing_common/models/graphql.py +66 -75
dkist_processing_common/models/input_dataset.py +117 -0
dkist_processing_common/models/message.py +1 -1
dkist_processing_common/models/message_queue_binding.py +1 -1
dkist_processing_common/models/metric_code.py +2 -0
dkist_processing_common/models/parameters.py +65 -28
dkist_processing_common/models/quality.py +50 -5
dkist_processing_common/models/tags.py +23 -21
dkist_processing_common/models/task_name.py +3 -2
dkist_processing_common/models/telemetry.py +28 -0
dkist_processing_common/models/wavelength.py +3 -1
dkist_processing_common/parsers/average_bud.py +46 -0
dkist_processing_common/parsers/cs_step.py +13 -12
dkist_processing_common/parsers/dsps_repeat.py +6 -4
dkist_processing_common/parsers/experiment_id_bud.py +12 -4
dkist_processing_common/parsers/id_bud.py +42 -27
dkist_processing_common/parsers/l0_fits_access.py +5 -3
dkist_processing_common/parsers/l1_fits_access.py +51 -23
dkist_processing_common/parsers/lookup_bud.py +125 -0
dkist_processing_common/parsers/near_bud.py +21 -20
dkist_processing_common/parsers/observing_program_id_bud.py +24 -0
dkist_processing_common/parsers/proposal_id_bud.py +13 -5
dkist_processing_common/parsers/quality.py +2 -0
dkist_processing_common/parsers/retarder.py +32 -0
dkist_processing_common/parsers/single_value_single_key_flower.py +6 -1
dkist_processing_common/parsers/task.py +8 -6
dkist_processing_common/parsers/time.py +178 -72
dkist_processing_common/parsers/unique_bud.py +21 -22
dkist_processing_common/parsers/wavelength.py +5 -3
dkist_processing_common/tasks/__init__.py +3 -2
dkist_processing_common/tasks/assemble_movie.py +4 -3
dkist_processing_common/tasks/base.py +59 -60
dkist_processing_common/tasks/l1_output_data.py +54 -53
dkist_processing_common/tasks/mixin/globus.py +24 -27
dkist_processing_common/tasks/mixin/interservice_bus.py +1 -0
dkist_processing_common/tasks/mixin/metadata_store.py +108 -243
dkist_processing_common/tasks/mixin/object_store.py +22 -0
dkist_processing_common/tasks/mixin/quality/__init__.py +1 -0
dkist_processing_common/tasks/mixin/quality/_base.py +8 -1
dkist_processing_common/tasks/mixin/quality/_metrics.py +166 -14
dkist_processing_common/tasks/output_data_base.py +4 -3
dkist_processing_common/tasks/parse_l0_input_data.py +277 -15
dkist_processing_common/tasks/quality_metrics.py +9 -9
dkist_processing_common/tasks/teardown.py +7 -7
dkist_processing_common/tasks/transfer_input_data.py +67 -69
dkist_processing_common/tasks/trial_catalog.py +77 -17
dkist_processing_common/tasks/trial_output_data.py +16 -17
dkist_processing_common/tasks/write_l1.py +102 -72
dkist_processing_common/tests/conftest.py +32 -173
dkist_processing_common/tests/mock_metadata_store.py +271 -0
dkist_processing_common/tests/test_assemble_movie.py +4 -4
dkist_processing_common/tests/test_assemble_quality.py +32 -4
dkist_processing_common/tests/test_base.py +5 -19
dkist_processing_common/tests/test_codecs.py +103 -12
dkist_processing_common/tests/test_constants.py +15 -0
dkist_processing_common/tests/test_dkist_location.py +15 -0
dkist_processing_common/tests/test_fits_access.py +56 -19
dkist_processing_common/tests/test_flower_pot.py +147 -5
dkist_processing_common/tests/test_fried_parameter.py +27 -0
dkist_processing_common/tests/test_input_dataset.py +78 -361
dkist_processing_common/tests/test_interservice_bus.py +1 -0
dkist_processing_common/tests/test_interservice_bus_mixin.py +1 -1
dkist_processing_common/tests/test_manual_processing.py +33 -0
dkist_processing_common/tests/test_output_data_base.py +5 -7
dkist_processing_common/tests/test_parameters.py +71 -22
dkist_processing_common/tests/test_parse_l0_input_data.py +115 -32
dkist_processing_common/tests/test_publish_catalog_messages.py +2 -24
dkist_processing_common/tests/test_quality.py +1 -0
dkist_processing_common/tests/test_quality_mixin.py +255 -23
dkist_processing_common/tests/test_scratch.py +2 -1
dkist_processing_common/tests/test_stems.py +511 -168
dkist_processing_common/tests/test_submit_dataset_metadata.py +3 -7
dkist_processing_common/tests/test_tags.py +1 -0
dkist_processing_common/tests/test_task_name.py +1 -1
dkist_processing_common/tests/test_task_parsing.py +17 -7
dkist_processing_common/tests/test_teardown.py +28 -24
dkist_processing_common/tests/test_transfer_input_data.py +270 -125
dkist_processing_common/tests/test_transfer_l1_output_data.py +2 -3
dkist_processing_common/tests/test_trial_catalog.py +83 -8
dkist_processing_common/tests/test_trial_output_data.py +46 -73
dkist_processing_common/tests/test_workflow_task_base.py +8 -10
dkist_processing_common/tests/test_write_l1.py +298 -76
dkist_processing_common-12.1.0rc1.dist-info/METADATA +265 -0
dkist_processing_common-12.1.0rc1.dist-info/RECORD +134 -0
{dkist_processing_common-10.5.4.dist-info → dkist_processing_common-12.1.0rc1.dist-info}/WHEEL +1 -1
docs/conf.py +1 -0
docs/index.rst +1 -1
docs/landing_page.rst +13 -0
dkist_processing_common/tasks/mixin/input_dataset.py +0 -166
dkist_processing_common-10.5.4.dist-info/METADATA +0 -175
dkist_processing_common-10.5.4.dist-info/RECORD +0 -112
{dkist_processing_common-10.5.4.dist-info → dkist_processing_common-12.1.0rc1.dist-info}/top_level.txt +0 -0

dkist_processing_common/tasks/base.py CHANGED Viewed

@@ -1,17 +1,21 @@
 """Wrappers for all workflow tasks."""
 import json
 import logging
 import re
 from abc import ABC
+from importlib import metadata
 from pathlib import Path
-from types import NoneType
 from typing import Any
 from typing import Generator
 from typing import Iterable
 from typing import Type
-import pkg_resources
 from dkist_processing_core import TaskBase
+from opentelemetry.metrics import CallbackOptions
+from opentelemetry.metrics import Counter
+from opentelemetry.metrics import ObservableGauge
+from opentelemetry.metrics import Observation
 from dkist_processing_common._util.scratch import WorkflowFileSystem
 from dkist_processing_common._util.tags import TagDB
@@ -21,6 +25,7 @@ from dkist_processing_common.config import common_configurations
 from dkist_processing_common.models.constants import ConstantsBase
 from dkist_processing_common.models.tags import StemName
 from dkist_processing_common.models.tags import Tag
+from dkist_processing_common.models.telemetry import ObservableProgress
 from dkist_processing_common.tasks.mixin.metadata_store import MetadataStoreMixin
 __all__ = ["WorkflowTaskBase", "tag_type_hint"]
@@ -66,7 +71,6 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
             workflow_name=workflow_name,
             workflow_version=workflow_version,
         )
-        self.task_name = self.__class__.__name__
         self.scratch = WorkflowFileSystem(recipe_run_id=recipe_run_id, task_name=self.task_name)
         self.constants = self.constants_model_class(
             recipe_run_id=recipe_run_id, task_name=self.task_name
@@ -76,50 +80,30 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
             recipe_run_id=recipe_run_id, task_name=self.task_name, namespace="counter"
         )
-    # These apm* functions provide tagged spans for APM bliss
-    def apm_type_base(
-        self,
-        name: str,
-        *args,
-        arg_span_type: str = None,
-        arg_labels: dict[str, str] = None,
-        **kwargs,
-    ):
-        """Groom inputs to apm_step to handle various kwarg collisions."""
-        if "span_type" in kwargs:
-            raise RuntimeError(
-                f"Cannot specify 'span_type' {kwargs['span_type']} in step that forces is it to be {arg_span_type}"
-            )
-        if "labels" in kwargs:
-            arg_labels.update(kwargs["labels"])
-            del kwargs["labels"]
-        logger.info(
-            f"Recording APM span: {name = }, {arg_span_type = }, {arg_labels = }, "
-            f"recipe_run_id = {self.recipe_run_id}"
+        # meter instruments
+        self.read_counter: Counter = self.meter.create_counter(
+            name=self.format_metric_name("tasks.reads"),
+            unit="1",
+            description="The number of reads executed in the processing stack.",
         )
-        return self.apm_step(name, *args, span_type=arg_span_type, labels=arg_labels, **kwargs)
-    def apm_task_step(self, name: str, *args, **kwargs):
-        """Span for management/organizational/info type stuff."""
-        return self.apm_type_base(
-            name, *args, arg_span_type="code.task", arg_labels={"type": "task"}, **kwargs
+        self.write_counter: Counter = self.meter.create_counter(
+            name=self.format_metric_name("tasks.writes"),
+            unit="1",
+            description="The number of writes executed in the processing stack.",
         )
-    def apm_processing_step(self, name: str, *args, **kwargs):
-        """Span for computations."""
-        return self.apm_type_base(
-            name,
-            *args,
-            arg_span_type="code.processing",
-            arg_labels={"type": "processing"},
-            **kwargs,
+        self.outer_loop_progress = ObservableProgress()
+        self.outer_loop_progress_gauge: ObservableGauge = self.meter.create_observable_gauge(
+            name=self.format_metric_name("tasks.outer.loop.progress"),
+            description="The progress of a task through the main processing loop.",
+            callbacks=[lambda options: self.outer_loop_run_progress(options)],
         )
-    def apm_writing_step(self, name: str, *args, **kwargs):
-        """Span for writing to disk."""
-        return self.apm_type_base(
-            name, *args, arg_span_type="code.writing", arg_labels={"type": "writing"}, **kwargs
+    def outer_loop_run_progress(
+        self, options: CallbackOptions
+    ) -> Generator[Observation, None, None]:
+        """Observe the progress of the current task as a percentage."""
+        yield Observation(
+            self.outer_loop_progress.percent_complete, attributes=self.base_telemetry_attributes
         )
     @property
@@ -130,13 +114,20 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
     @property
     def library_versions(self) -> str:
         """Harvest the dependency names and versions from the environment for all packages beginning with 'dkist' or are a requirement for a package beginning with 'dkist'."""
-        distributions = {d.key: d.version for d in pkg_resources.working_set}
+        distributions = {
+            d.name.lower().replace("_", "-"): d.version for d in metadata.distributions()
+        }
         libraries = {}
-        for pkg in pkg_resources.working_set:
-            if pkg.key.startswith("dkist"):
-                libraries[pkg.key] = pkg.version
-                for req in pkg.requires():
-                    libraries[req.key] = distributions[req.key]
+        for pkg in metadata.distributions():
+            if pkg.name.startswith("dkist"):
+                libraries[pkg.name.lower().replace("_", "-")] = pkg.version
+                for req in metadata.requires(pkg.name):
+                    is_extra_requirement = "extra" in req
+                    if not is_extra_requirement:
+                        key = re.split(r"[ \[=<>~!]", req.lower())[
+                            0
+                        ]  # get the raw name of the package
+                        libraries[key] = distributions[key]
         return json.dumps(libraries)
     def _record_provenance(self):
@@ -154,9 +145,14 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
         """Execute any pre-task setup required."""
         super().pre_run()
         if self.record_provenance or self.is_task_manual:
-            with self.apm_task_step("Record Provenance"):
+            with self.telemetry_span("Record Provenance"):
                 self._record_provenance()
+    def post_run(self) -> None:
+        """Execute and post-task bookkeeping required."""
+        super().post_run()
+        self.outer_loop_progress.set_complete()
     def read(
         self, tags: tag_type_hint, decoder: callable = path_decoder, **decoder_kwargs
     ) -> Generator[Any, None, None]:
@@ -176,7 +172,9 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
         **decoder_kwargs
             Additional arguments to pass to the `decoder` function.
         """
-        return (decoder(p, **decoder_kwargs) for p in self.scratch.find_all(tags=tags))
+        for p in self.scratch.find_all(tags=tags):
+            self.read_counter.add(amount=1, attributes=self.base_telemetry_attributes)
+            yield decoder(p, **decoder_kwargs)
     def write(
         self,
@@ -214,6 +212,7 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
         -------
         The path for the written file
         """
+        self.write_counter.add(amount=1, attributes=self.base_telemetry_attributes)
         file_obj = encoder(data, **encoder_kwargs)
         if isinstance(tags, str):
             tags = [tags]
@@ -248,7 +247,7 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
             StemName.modstate.value,
         ]
-    def build_generic_tag_filename(self, tags: Iterable[str]) -> str:
+    def build_generic_tag_filename(self, tags: list) -> str:
         """
         Build a filename from a set of tags.
@@ -264,9 +263,9 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
          4. You can have any extension you want so long as it's ".dat".
         """
-        # This call to list not only copies the input object so it doesn't get modified in place, it also ensures
-        # any Iterable that got passed in is a list inside the function.
-        copied_tags = list(tags)
+        # This call copies the input list so it doesn't get modified in place and flattens the list to allow
+        # arbitrarily nested lists.
+        copied_tags = self.scratch.parse_tags(tags)
         try:
             copied_tags.remove(StemName.frame.value)
         except ValueError:
@@ -283,8 +282,8 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
         sorted_remaining_tags = sorted(copied_tags)
         filename_parts += sorted_remaining_tags
-        # replace spaces and underscores with dashes - dynamic part (e.g. polcal `Beam 1` label) may include spaces
-        dash_separated_parts = [re.sub("[ _]", "-", t) for t in filename_parts]
+        # replace spaces, underscores, and colons with dashes - dynamic part (e.g. polcal `Beam 1` label) may include spaces
+        dash_separated_parts = [re.sub("[ _:]", "-", t) for t in filename_parts]
         base_filename = "_".join(dash_separated_parts)
         base_filename_counter = str(self.filename_counter.increment(base_filename))
@@ -365,11 +364,11 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
         Filename Counter: not rolled back but its purpose of preventing file name collisions is not impacted
         """
         super().rollback()
-        with self.apm_writing_step("Rollback Scratch"):
+        with self.telemetry_span("Rollback Scratch"):
             self.scratch.rollback()
-        with self.apm_writing_step("Rollback Constants"):
+        with self.telemetry_span("Rollback Constants"):
             self.constants._rollback()
-        with self.apm_task_step("Change Recipe Run to Inprogress"):
+        with self.telemetry_span("Change Recipe Run to Inprogress"):
             self.metadata_store_change_recipe_run_to_inprogress()
     def __exit__(self, exc_type, exc_val, exc_tb):

dkist_processing_common/tasks/l1_output_data.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Task(s) for the transfer and publishing of L1 data from a production run of a processing pipeline."""
 import logging
 from abc import ABC
 from itertools import chain
@@ -20,7 +21,6 @@ from dkist_processing_common.tasks.mixin.quality import QualityMixin
 from dkist_processing_common.tasks.output_data_base import OutputDataBase
 from dkist_processing_common.tasks.output_data_base import TransferDataBase
 __all__ = [
     "L1OutputDataBase",
     "TransferL1Data",
@@ -38,8 +38,9 @@ class L1OutputDataBase(OutputDataBase, ABC):
     @property
     def dataset_has_quality_data(self) -> bool:
-        """Return True if quality data has been persisted to the metadata-store."""
-        return self.metadata_store_quality_data_exists(dataset_id=self.constants.dataset_id)
+        """Return True if the dataset has quality data."""
+        path_count = self.count(tags=[Tag.output(), Tag.quality_data()])
+        return path_count > 0
     def rollback(self):
         """Warn that the metadata-store and the interservice bus retain the effect of this tasks execution.  Rolling back this task may not be achievable without other action."""
@@ -54,11 +55,14 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
     def transfer_objects(self):
         """Transfer movie and L1 output frames."""
-        with self.apm_task_step("Upload movie"):
+        with self.telemetry_span("Upload movie"):
             # Movie needs to be transferred separately as the movie headers need to go with it
             self.transfer_movie()
-        with self.apm_task_step("Upload science frames"):
+        with self.telemetry_span("Upload quality data"):
+            self.transfer_quality_data()
+        with self.telemetry_span("Upload science frames"):
             self.transfer_output_frames()
     def transfer_output_frames(self):
@@ -79,19 +83,14 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
     def transfer_movie(self):
         """Transfer the movie to the object store."""
         paths = list(self.read(tags=[Tag.output(), Tag.movie()]))
-        if len(paths) == 0:
-            logger.warning(
-                f"No movies found to upload for dataset. recipe_run_id={self.recipe_run_id}"
-            )
-            return
-        movie = paths[0]
-        if count := len(paths) > 1:
-            # note: this needs to be an error or the dataset receipt accounting will have an
-            # expected count > the eventual actual
+        count = len(paths)
+        if count != 1:
             raise RuntimeError(
-                f"Multiple movies found to upload.  Uploading the first one. "
-                f"{count=}, {movie=}, recipe_run_id={self.recipe_run_id}"
+                f"Expected exactly one movie to upload, found {count}. "
+                f"recipe_run_id={self.recipe_run_id}"
             )
+        movie = paths[0]
         logger.info(f"Uploading Movie: recipe_run_id={self.recipe_run_id}, {movie=}")
         movie_object_key = self.format_object_key(movie)
         self.object_store_upload_movie(
@@ -101,6 +100,33 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
             content_type="video/mp4",
         )
+    def transfer_quality_data(self):
+        """Transfer quality data to the object store."""
+        paths = list(self.read(tags=[Tag.output(), Tag.quality_data()]))
+        if len(paths) == 0:
+            logger.info(
+                f"No quality data found to upload for dataset. recipe_run_id={self.recipe_run_id}"
+            )
+            return
+        if count := len(paths) > 1:
+            # dataset inventory does not support multiple quality data object keys
+            raise RuntimeError(
+                f"Found multiple quality data files to upload.  Not supported."
+                f"{count=}, recipe_run_id={self.recipe_run_id}"
+            )
+        with self.telemetry_span(f"Uploading the trial quality data"):
+            path = paths[0]
+            logger.info(f"Uploading quality data: recipe_run_id={self.recipe_run_id}, {path=}")
+            quality_data_object_key = self.format_object_key(path)
+            self.object_store_upload_quality_data(
+                quality_data=path,
+                bucket=self.destination_bucket,
+                object_key=quality_data_object_key,
+                content_type="application/json",
+            )
 class AssembleQualityData(L1OutputDataBase, QualityMixin):
     """
@@ -120,15 +146,15 @@ class AssembleQualityData(L1OutputDataBase, QualityMixin):
     def run(self):
         """Run method for the task."""
-        with self.apm_processing_step("Assembling quality data"):
+        with self.telemetry_span("Assembling quality data"):
             quality_data = self.quality_assemble_data(polcal_label_list=self.polcal_label_list)
-        with self.apm_writing_step(
+        with self.telemetry_span(
             f"Saving quality data with {len(quality_data)} metrics to the file system"
         ):
             self.write(
                 quality_data,
-                tags=Tag.quality_data(),
+                tags=[Tag.output(), Tag.quality_data()],
                 encoder=quality_data_encoder,
                 relative_path=f"{self.constants.dataset_id}_quality_data.json",
             )
@@ -136,36 +162,23 @@ class AssembleQualityData(L1OutputDataBase, QualityMixin):
 class SubmitDatasetMetadata(L1OutputDataBase):
     """
-    Add quality data and receipt account to the metadata store.
+    Add receipt account to the metadata store.
-    Add the quality data to the Quality database.
     Add a Dataset Receipt Account record to Processing Support for use by the Dataset Catalog Locker.
-    Adds the number of files created during the calibration processing to the Processing Support table
+    Adds the number of files to be created during the calibration processing to the Processing Support table
     for use by the Dataset Catalog Locker.
     """
     def run(self) -> None:
         """Run method for this task."""
-        with self.apm_writing_step(f"Storing quality data to metadata store"):
-            # each quality_data file is a list - this will combine the elements of multiple lists into a single list
-            quality_data = list(
-                chain.from_iterable(
-                    self.read(tags=Tag.quality_data(), decoder=quality_data_decoder)
-                )
-            )
-            self.metadata_store_add_quality_data(
-                dataset_id=self.constants.dataset_id, quality_data=quality_data
-            )
-        with self.apm_processing_step("Count Expected Outputs"):
+        with self.telemetry_span("Count Expected Outputs"):
             dataset_id = self.constants.dataset_id
             expected_object_count = self.count(tags=Tag.output())
-            if quality_data:
-                expected_object_count += 1
         logger.info(
             f"Adding Dataset Receipt Account: "
             f"{dataset_id=}, {expected_object_count=}, recipe_run_id={self.recipe_run_id}"
         )
-        with self.apm_task_step(
+        with self.telemetry_span(
             f"Add Dataset Receipt Account: {dataset_id = }, {expected_object_count = }"
         ):
             self.metadata_store_add_dataset_receipt_account(
@@ -230,25 +243,13 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
         messages = [CatalogObjectMessage(body=body) for body in message_bodies]
         return messages
-    @property
-    def quality_report_message(self) -> CreateQualityReportMessage:
-        """Create the Quality Report Message."""
-        file_name = Path(f"{self.constants.dataset_id}_quality_report.pdf")
-        body = CreateQualityReportMessageBody(
-            bucket=self.destination_bucket,
-            objectName=self.format_object_key(file_name),
-            conversationId=str(self.recipe_run_id),
-            datasetId=self.constants.dataset_id,
-            incrementDatasetCatalogReceiptCount=True,
-        )
-        return CreateQualityReportMessage(body=body)
     def run(self) -> None:
         """Run method for this task."""
-        with self.apm_task_step("Gather output data"):
+        with self.telemetry_span("Gather output data"):
             frames = self.read(tags=self.output_frame_tags)
             movies = self.read(tags=[Tag.output(), Tag.movie()])
-        with self.apm_task_step("Create message objects"):
+            quality_data = self.read(tags=[Tag.output(), Tag.quality_data()])
+        with self.telemetry_span("Create message objects"):
             messages = []
             messages += self.frame_messages(paths=frames)
             frame_message_count = len(messages)
@@ -256,8 +257,8 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
             object_message_count = len(messages) - frame_message_count
             dataset_has_quality_data = self.dataset_has_quality_data
             if dataset_has_quality_data:
-                messages.append(self.quality_report_message)
-        with self.apm_task_step(
+                messages += self.object_messages(paths=quality_data, object_type="QDATA")
+        with self.telemetry_span(
             f"Publish messages: {frame_message_count = }, {object_message_count = }, {dataset_has_quality_data = }"
         ):
             self.interservice_bus_publish(messages=messages)

dkist_processing_common/tasks/mixin/globus.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Mixin to add methods to a Task to support globus transfers."""
 import logging
 from dataclasses import dataclass
 from pathlib import Path
@@ -8,10 +9,11 @@ from globus_sdk import ConfidentialAppAuthClient
 from globus_sdk import GlobusError
 from globus_sdk import TransferClient
 from globus_sdk import TransferData
+from globus_sdk.scopes import TransferScopes
+from globus_sdk.transport import RetryConfig
 from dkist_processing_common.config import common_configurations
 logger = logging.getLogger(__name__)
@@ -31,27 +33,32 @@ class GlobusTransferItem:
 class GlobusMixin:
     """Mixin to add methods to a Task to support globus transfers."""
-    @property
-    def globus_transfer_client(self) -> TransferClient:
-        """Get the globus transfer client, creating it if it doesn't exist."""
-        if getattr(self, "_globus_transfer_client", False):
-            return self._globus_transfer_client
+    def globus_transfer_client_factory(self, transfer_data: TransferData) -> TransferClient:
+        """Create a globus transfer client based on the direction of transfer and round-robin the available application credentials."""
+        if (
+            transfer_data["source_endpoint"] == common_configurations.object_store_endpoint
+        ):  # inbound
+            client_credentials = common_configurations.globus_inbound_client_credentials
+        else:  # outbound
+            client_credentials = common_configurations.globus_outbound_client_credentials
+        # Round-robin the client credentials based on the recipe run id
+        index = self.recipe_run_id % len(client_credentials)
+        selected_credential = client_credentials[index]
         confidential_client = ConfidentialAppAuthClient(
-            client_id=common_configurations.globus_client_id,
-            client_secret=common_configurations.globus_client_secret,
-            transport_params=common_configurations.globus_transport_params,
-        )
-        authorizer = ClientCredentialsAuthorizer(
-            confidential_client, scopes="urn:globus:auth:scope:transfer.api.globus.org:all"
+            client_id=selected_credential.client_id,
+            client_secret=selected_credential.client_secret,
         )
-        self._globus_transfer_client = TransferClient(authorizer=authorizer)
-        return self._globus_transfer_client
+        authorizer = ClientCredentialsAuthorizer(confidential_client, scopes=TransferScopes)
+        retry_config = RetryConfig(max_retries=common_configurations.globus_max_retries)
+        return TransferClient(authorizer=authorizer, retry_config=retry_config)
     def globus_transfer_scratch_to_object_store(
         self,
         transfer_items: list[GlobusTransferItem],
         label: str = None,
-        sync_level: str = None,
         verify_checksum: bool = True,
     ) -> None:
         """Transfer data from scratch to the object store."""
@@ -60,7 +67,6 @@ class GlobusMixin:
             destination_endpoint=common_configurations.object_store_endpoint,
             transfer_items=transfer_items,
             label=label,
-            sync_level=sync_level,
             verify_checksum=verify_checksum,
         )
@@ -68,7 +74,6 @@ class GlobusMixin:
         self,
         transfer_items: list[GlobusTransferItem],
         label: str = None,
-        sync_level: str = None,
         verify_checksum: bool = True,
     ) -> None:
         """Transfer data from the object store to scratch."""
@@ -77,7 +82,6 @@ class GlobusMixin:
             destination_endpoint=common_configurations.scratch_endpoint,
             transfer_items=transfer_items,
             label=label,
-            sync_level=sync_level,
             verify_checksum=verify_checksum,
         )
@@ -87,7 +91,6 @@ class GlobusMixin:
         destination_endpoint: str,
         transfer_items: list[GlobusTransferItem],
         label: str = None,
-        sync_level: str = None,
         verify_checksum: bool = True,
     ) -> TransferData:
         """Format a globus TransferData instance."""
@@ -95,7 +98,6 @@ class GlobusMixin:
             source_endpoint=source_endpoint,
             destination_endpoint=destination_endpoint,
             label=label,
-            sync_level=sync_level,
             verify_checksum=verify_checksum,
         )
         for item in transfer_items:
@@ -112,7 +114,6 @@ class GlobusMixin:
         destination_endpoint: str,
         transfer_items: list[GlobusTransferItem],
         label: str = None,
-        sync_level: str = None,
         verify_checksum: bool = True,
     ) -> None:
         """Perform a transfer of data using globus."""
@@ -121,7 +122,6 @@ class GlobusMixin:
             destination_endpoint=destination_endpoint,
             transfer_items=transfer_items,
             label=label,
-            sync_level=sync_level,
             verify_checksum=verify_checksum,
         )
         self._blocking_globus_transfer(transfer_data=transfer_data)
@@ -131,24 +131,21 @@ class GlobusMixin:
         source_endpoint: str,
         destination_endpoint: str,
         label: str = None,
-        sync_level: str = None,
         verify_checksum: bool = True,
     ) -> TransferData:
         label = label or "Data Processing Transfer"
         return TransferData(
-            transfer_client=self.globus_transfer_client,
             source_endpoint=source_endpoint,
             destination_endpoint=destination_endpoint,
             label=label,
-            sync_level=sync_level,
             verify_checksum=verify_checksum,
         )
     def _blocking_globus_transfer(self, transfer_data: TransferData) -> None:
-        tc = self.globus_transfer_client
-        logger.info(f"Starting globus transfer: label={transfer_data.get('label')}")
+        tc = self.globus_transfer_client_factory(transfer_data=transfer_data)
         transfer_result = tc.submit_transfer(transfer_data)
         task_id = transfer_result["task_id"]
+        logger.info(f"Starting globus transfer: label={transfer_data.get('label')}, {task_id=}, ")
         polling_interval = 60
         while not tc.task_wait(
             task_id=task_id, timeout=polling_interval, polling_interval=polling_interval

dkist_processing_common/tasks/mixin/interservice_bus.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Mixin for a WorkflowDataTaskBase subclass which implements interservice bus access functionality."""
 from talus import DurableProducer
 from talus import PublishMessageBase

dkist-processing-common 10.5.4__py3-none-any.whl → 12.1.0rc1__py3-none-any.whl

dkist-processing-common 10.5.4py3-none-any.whl → 12.1.0rc1py3-none-any.whl