dkist-processing-common 10.5.4__py3-none-any.whl → 12.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. changelog/280.misc.rst +1 -0
  2. changelog/282.feature.2.rst +2 -0
  3. changelog/282.feature.rst +2 -0
  4. changelog/284.feature.rst +1 -0
  5. changelog/285.feature.rst +2 -0
  6. changelog/285.misc.rst +2 -0
  7. changelog/286.feature.rst +2 -0
  8. changelog/287.misc.rst +1 -0
  9. dkist_processing_common/__init__.py +1 -0
  10. dkist_processing_common/_util/constants.py +1 -0
  11. dkist_processing_common/_util/graphql.py +1 -0
  12. dkist_processing_common/_util/scratch.py +9 -9
  13. dkist_processing_common/_util/tags.py +1 -0
  14. dkist_processing_common/codecs/array.py +20 -0
  15. dkist_processing_common/codecs/asdf.py +9 -3
  16. dkist_processing_common/codecs/basemodel.py +22 -0
  17. dkist_processing_common/codecs/bytes.py +1 -0
  18. dkist_processing_common/codecs/fits.py +37 -9
  19. dkist_processing_common/codecs/iobase.py +1 -0
  20. dkist_processing_common/codecs/json.py +1 -0
  21. dkist_processing_common/codecs/path.py +1 -0
  22. dkist_processing_common/codecs/quality.py +1 -1
  23. dkist_processing_common/codecs/str.py +1 -0
  24. dkist_processing_common/config.py +64 -25
  25. dkist_processing_common/manual.py +6 -8
  26. dkist_processing_common/models/constants.py +373 -37
  27. dkist_processing_common/models/dkist_location.py +27 -0
  28. dkist_processing_common/models/fits_access.py +48 -0
  29. dkist_processing_common/models/flower_pot.py +231 -9
  30. dkist_processing_common/models/fried_parameter.py +41 -0
  31. dkist_processing_common/models/graphql.py +66 -75
  32. dkist_processing_common/models/input_dataset.py +117 -0
  33. dkist_processing_common/models/message.py +1 -1
  34. dkist_processing_common/models/message_queue_binding.py +1 -1
  35. dkist_processing_common/models/metric_code.py +2 -0
  36. dkist_processing_common/models/parameters.py +65 -28
  37. dkist_processing_common/models/quality.py +50 -5
  38. dkist_processing_common/models/tags.py +23 -21
  39. dkist_processing_common/models/task_name.py +3 -2
  40. dkist_processing_common/models/telemetry.py +28 -0
  41. dkist_processing_common/models/wavelength.py +3 -1
  42. dkist_processing_common/parsers/average_bud.py +46 -0
  43. dkist_processing_common/parsers/cs_step.py +13 -12
  44. dkist_processing_common/parsers/dsps_repeat.py +6 -4
  45. dkist_processing_common/parsers/experiment_id_bud.py +12 -4
  46. dkist_processing_common/parsers/id_bud.py +42 -27
  47. dkist_processing_common/parsers/l0_fits_access.py +5 -3
  48. dkist_processing_common/parsers/l1_fits_access.py +51 -23
  49. dkist_processing_common/parsers/lookup_bud.py +125 -0
  50. dkist_processing_common/parsers/near_bud.py +21 -20
  51. dkist_processing_common/parsers/observing_program_id_bud.py +24 -0
  52. dkist_processing_common/parsers/proposal_id_bud.py +13 -5
  53. dkist_processing_common/parsers/quality.py +2 -0
  54. dkist_processing_common/parsers/retarder.py +32 -0
  55. dkist_processing_common/parsers/single_value_single_key_flower.py +6 -1
  56. dkist_processing_common/parsers/task.py +8 -6
  57. dkist_processing_common/parsers/time.py +178 -72
  58. dkist_processing_common/parsers/unique_bud.py +21 -22
  59. dkist_processing_common/parsers/wavelength.py +5 -3
  60. dkist_processing_common/tasks/__init__.py +3 -2
  61. dkist_processing_common/tasks/assemble_movie.py +4 -3
  62. dkist_processing_common/tasks/base.py +59 -60
  63. dkist_processing_common/tasks/l1_output_data.py +54 -53
  64. dkist_processing_common/tasks/mixin/globus.py +24 -27
  65. dkist_processing_common/tasks/mixin/interservice_bus.py +1 -0
  66. dkist_processing_common/tasks/mixin/metadata_store.py +108 -243
  67. dkist_processing_common/tasks/mixin/object_store.py +22 -0
  68. dkist_processing_common/tasks/mixin/quality/__init__.py +1 -0
  69. dkist_processing_common/tasks/mixin/quality/_base.py +8 -1
  70. dkist_processing_common/tasks/mixin/quality/_metrics.py +166 -14
  71. dkist_processing_common/tasks/output_data_base.py +4 -3
  72. dkist_processing_common/tasks/parse_l0_input_data.py +277 -15
  73. dkist_processing_common/tasks/quality_metrics.py +9 -9
  74. dkist_processing_common/tasks/teardown.py +7 -7
  75. dkist_processing_common/tasks/transfer_input_data.py +67 -69
  76. dkist_processing_common/tasks/trial_catalog.py +77 -17
  77. dkist_processing_common/tasks/trial_output_data.py +16 -17
  78. dkist_processing_common/tasks/write_l1.py +102 -72
  79. dkist_processing_common/tests/conftest.py +32 -173
  80. dkist_processing_common/tests/mock_metadata_store.py +271 -0
  81. dkist_processing_common/tests/test_assemble_movie.py +4 -4
  82. dkist_processing_common/tests/test_assemble_quality.py +32 -4
  83. dkist_processing_common/tests/test_base.py +5 -19
  84. dkist_processing_common/tests/test_codecs.py +103 -12
  85. dkist_processing_common/tests/test_constants.py +15 -0
  86. dkist_processing_common/tests/test_dkist_location.py +15 -0
  87. dkist_processing_common/tests/test_fits_access.py +56 -19
  88. dkist_processing_common/tests/test_flower_pot.py +147 -5
  89. dkist_processing_common/tests/test_fried_parameter.py +27 -0
  90. dkist_processing_common/tests/test_input_dataset.py +78 -361
  91. dkist_processing_common/tests/test_interservice_bus.py +1 -0
  92. dkist_processing_common/tests/test_interservice_bus_mixin.py +1 -1
  93. dkist_processing_common/tests/test_manual_processing.py +33 -0
  94. dkist_processing_common/tests/test_output_data_base.py +5 -7
  95. dkist_processing_common/tests/test_parameters.py +71 -22
  96. dkist_processing_common/tests/test_parse_l0_input_data.py +115 -32
  97. dkist_processing_common/tests/test_publish_catalog_messages.py +2 -24
  98. dkist_processing_common/tests/test_quality.py +1 -0
  99. dkist_processing_common/tests/test_quality_mixin.py +255 -23
  100. dkist_processing_common/tests/test_scratch.py +2 -1
  101. dkist_processing_common/tests/test_stems.py +511 -168
  102. dkist_processing_common/tests/test_submit_dataset_metadata.py +3 -7
  103. dkist_processing_common/tests/test_tags.py +1 -0
  104. dkist_processing_common/tests/test_task_name.py +1 -1
  105. dkist_processing_common/tests/test_task_parsing.py +17 -7
  106. dkist_processing_common/tests/test_teardown.py +28 -24
  107. dkist_processing_common/tests/test_transfer_input_data.py +270 -125
  108. dkist_processing_common/tests/test_transfer_l1_output_data.py +2 -3
  109. dkist_processing_common/tests/test_trial_catalog.py +83 -8
  110. dkist_processing_common/tests/test_trial_output_data.py +46 -73
  111. dkist_processing_common/tests/test_workflow_task_base.py +8 -10
  112. dkist_processing_common/tests/test_write_l1.py +298 -76
  113. dkist_processing_common-12.1.0rc1.dist-info/METADATA +265 -0
  114. dkist_processing_common-12.1.0rc1.dist-info/RECORD +134 -0
  115. {dkist_processing_common-10.5.4.dist-info → dkist_processing_common-12.1.0rc1.dist-info}/WHEEL +1 -1
  116. docs/conf.py +1 -0
  117. docs/index.rst +1 -1
  118. docs/landing_page.rst +13 -0
  119. dkist_processing_common/tasks/mixin/input_dataset.py +0 -166
  120. dkist_processing_common-10.5.4.dist-info/METADATA +0 -175
  121. dkist_processing_common-10.5.4.dist-info/RECORD +0 -112
  122. {dkist_processing_common-10.5.4.dist-info → dkist_processing_common-12.1.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,21 @@
1
1
  """Wrappers for all workflow tasks."""
2
+
2
3
  import json
3
4
  import logging
4
5
  import re
5
6
  from abc import ABC
7
+ from importlib import metadata
6
8
  from pathlib import Path
7
- from types import NoneType
8
9
  from typing import Any
9
10
  from typing import Generator
10
11
  from typing import Iterable
11
12
  from typing import Type
12
13
 
13
- import pkg_resources
14
14
  from dkist_processing_core import TaskBase
15
+ from opentelemetry.metrics import CallbackOptions
16
+ from opentelemetry.metrics import Counter
17
+ from opentelemetry.metrics import ObservableGauge
18
+ from opentelemetry.metrics import Observation
15
19
 
16
20
  from dkist_processing_common._util.scratch import WorkflowFileSystem
17
21
  from dkist_processing_common._util.tags import TagDB
@@ -21,6 +25,7 @@ from dkist_processing_common.config import common_configurations
21
25
  from dkist_processing_common.models.constants import ConstantsBase
22
26
  from dkist_processing_common.models.tags import StemName
23
27
  from dkist_processing_common.models.tags import Tag
28
+ from dkist_processing_common.models.telemetry import ObservableProgress
24
29
  from dkist_processing_common.tasks.mixin.metadata_store import MetadataStoreMixin
25
30
 
26
31
  __all__ = ["WorkflowTaskBase", "tag_type_hint"]
@@ -66,7 +71,6 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
66
71
  workflow_name=workflow_name,
67
72
  workflow_version=workflow_version,
68
73
  )
69
- self.task_name = self.__class__.__name__
70
74
  self.scratch = WorkflowFileSystem(recipe_run_id=recipe_run_id, task_name=self.task_name)
71
75
  self.constants = self.constants_model_class(
72
76
  recipe_run_id=recipe_run_id, task_name=self.task_name
@@ -76,50 +80,30 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
76
80
  recipe_run_id=recipe_run_id, task_name=self.task_name, namespace="counter"
77
81
  )
78
82
 
79
- # These apm* functions provide tagged spans for APM bliss
80
- def apm_type_base(
81
- self,
82
- name: str,
83
- *args,
84
- arg_span_type: str = None,
85
- arg_labels: dict[str, str] = None,
86
- **kwargs,
87
- ):
88
- """Groom inputs to apm_step to handle various kwarg collisions."""
89
- if "span_type" in kwargs:
90
- raise RuntimeError(
91
- f"Cannot specify 'span_type' {kwargs['span_type']} in step that forces is it to be {arg_span_type}"
92
- )
93
-
94
- if "labels" in kwargs:
95
- arg_labels.update(kwargs["labels"])
96
- del kwargs["labels"]
97
- logger.info(
98
- f"Recording APM span: {name = }, {arg_span_type = }, {arg_labels = }, "
99
- f"recipe_run_id = {self.recipe_run_id}"
83
+ # meter instruments
84
+ self.read_counter: Counter = self.meter.create_counter(
85
+ name=self.format_metric_name("tasks.reads"),
86
+ unit="1",
87
+ description="The number of reads executed in the processing stack.",
100
88
  )
101
- return self.apm_step(name, *args, span_type=arg_span_type, labels=arg_labels, **kwargs)
102
-
103
- def apm_task_step(self, name: str, *args, **kwargs):
104
- """Span for management/organizational/info type stuff."""
105
- return self.apm_type_base(
106
- name, *args, arg_span_type="code.task", arg_labels={"type": "task"}, **kwargs
89
+ self.write_counter: Counter = self.meter.create_counter(
90
+ name=self.format_metric_name("tasks.writes"),
91
+ unit="1",
92
+ description="The number of writes executed in the processing stack.",
107
93
  )
108
-
109
- def apm_processing_step(self, name: str, *args, **kwargs):
110
- """Span for computations."""
111
- return self.apm_type_base(
112
- name,
113
- *args,
114
- arg_span_type="code.processing",
115
- arg_labels={"type": "processing"},
116
- **kwargs,
94
+ self.outer_loop_progress = ObservableProgress()
95
+ self.outer_loop_progress_gauge: ObservableGauge = self.meter.create_observable_gauge(
96
+ name=self.format_metric_name("tasks.outer.loop.progress"),
97
+ description="The progress of a task through the main processing loop.",
98
+ callbacks=[lambda options: self.outer_loop_run_progress(options)],
117
99
  )
118
100
 
119
- def apm_writing_step(self, name: str, *args, **kwargs):
120
- """Span for writing to disk."""
121
- return self.apm_type_base(
122
- name, *args, arg_span_type="code.writing", arg_labels={"type": "writing"}, **kwargs
101
+ def outer_loop_run_progress(
102
+ self, options: CallbackOptions
103
+ ) -> Generator[Observation, None, None]:
104
+ """Observe the progress of the current task as a percentage."""
105
+ yield Observation(
106
+ self.outer_loop_progress.percent_complete, attributes=self.base_telemetry_attributes
123
107
  )
124
108
 
125
109
  @property
@@ -130,13 +114,20 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
130
114
  @property
131
115
  def library_versions(self) -> str:
132
116
  """Harvest the dependency names and versions from the environment for all packages beginning with 'dkist' or are a requirement for a package beginning with 'dkist'."""
133
- distributions = {d.key: d.version for d in pkg_resources.working_set}
117
+ distributions = {
118
+ d.name.lower().replace("_", "-"): d.version for d in metadata.distributions()
119
+ }
134
120
  libraries = {}
135
- for pkg in pkg_resources.working_set:
136
- if pkg.key.startswith("dkist"):
137
- libraries[pkg.key] = pkg.version
138
- for req in pkg.requires():
139
- libraries[req.key] = distributions[req.key]
121
+ for pkg in metadata.distributions():
122
+ if pkg.name.startswith("dkist"):
123
+ libraries[pkg.name.lower().replace("_", "-")] = pkg.version
124
+ for req in metadata.requires(pkg.name):
125
+ is_extra_requirement = "extra" in req
126
+ if not is_extra_requirement:
127
+ key = re.split(r"[ \[=<>~!]", req.lower())[
128
+ 0
129
+ ] # get the raw name of the package
130
+ libraries[key] = distributions[key]
140
131
  return json.dumps(libraries)
141
132
 
142
133
  def _record_provenance(self):
@@ -154,9 +145,14 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
154
145
  """Execute any pre-task setup required."""
155
146
  super().pre_run()
156
147
  if self.record_provenance or self.is_task_manual:
157
- with self.apm_task_step("Record Provenance"):
148
+ with self.telemetry_span("Record Provenance"):
158
149
  self._record_provenance()
159
150
 
151
+ def post_run(self) -> None:
152
+ """Execute and post-task bookkeeping required."""
153
+ super().post_run()
154
+ self.outer_loop_progress.set_complete()
155
+
160
156
  def read(
161
157
  self, tags: tag_type_hint, decoder: callable = path_decoder, **decoder_kwargs
162
158
  ) -> Generator[Any, None, None]:
@@ -176,7 +172,9 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
176
172
  **decoder_kwargs
177
173
  Additional arguments to pass to the `decoder` function.
178
174
  """
179
- return (decoder(p, **decoder_kwargs) for p in self.scratch.find_all(tags=tags))
175
+ for p in self.scratch.find_all(tags=tags):
176
+ self.read_counter.add(amount=1, attributes=self.base_telemetry_attributes)
177
+ yield decoder(p, **decoder_kwargs)
180
178
 
181
179
  def write(
182
180
  self,
@@ -214,6 +212,7 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
214
212
  -------
215
213
  The path for the written file
216
214
  """
215
+ self.write_counter.add(amount=1, attributes=self.base_telemetry_attributes)
217
216
  file_obj = encoder(data, **encoder_kwargs)
218
217
  if isinstance(tags, str):
219
218
  tags = [tags]
@@ -248,7 +247,7 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
248
247
  StemName.modstate.value,
249
248
  ]
250
249
 
251
- def build_generic_tag_filename(self, tags: Iterable[str]) -> str:
250
+ def build_generic_tag_filename(self, tags: list) -> str:
252
251
  """
253
252
  Build a filename from a set of tags.
254
253
 
@@ -264,9 +263,9 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
264
263
 
265
264
  4. You can have any extension you want so long as it's ".dat".
266
265
  """
267
- # This call to list not only copies the input object so it doesn't get modified in place, it also ensures
268
- # any Iterable that got passed in is a list inside the function.
269
- copied_tags = list(tags)
266
+ # This call copies the input list so it doesn't get modified in place and flattens the list to allow
267
+ # arbitrarily nested lists.
268
+ copied_tags = self.scratch.parse_tags(tags)
270
269
  try:
271
270
  copied_tags.remove(StemName.frame.value)
272
271
  except ValueError:
@@ -283,8 +282,8 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
283
282
  sorted_remaining_tags = sorted(copied_tags)
284
283
  filename_parts += sorted_remaining_tags
285
284
 
286
- # replace spaces and underscores with dashes - dynamic part (e.g. polcal `Beam 1` label) may include spaces
287
- dash_separated_parts = [re.sub("[ _]", "-", t) for t in filename_parts]
285
+ # replace spaces, underscores, and colons with dashes - dynamic part (e.g. polcal `Beam 1` label) may include spaces
286
+ dash_separated_parts = [re.sub("[ _:]", "-", t) for t in filename_parts]
288
287
 
289
288
  base_filename = "_".join(dash_separated_parts)
290
289
  base_filename_counter = str(self.filename_counter.increment(base_filename))
@@ -365,11 +364,11 @@ class WorkflowTaskBase(TaskBase, MetadataStoreMixin, ABC):
365
364
  Filename Counter: not rolled back but its purpose of preventing file name collisions is not impacted
366
365
  """
367
366
  super().rollback()
368
- with self.apm_writing_step("Rollback Scratch"):
367
+ with self.telemetry_span("Rollback Scratch"):
369
368
  self.scratch.rollback()
370
- with self.apm_writing_step("Rollback Constants"):
369
+ with self.telemetry_span("Rollback Constants"):
371
370
  self.constants._rollback()
372
- with self.apm_task_step("Change Recipe Run to Inprogress"):
371
+ with self.telemetry_span("Change Recipe Run to Inprogress"):
373
372
  self.metadata_store_change_recipe_run_to_inprogress()
374
373
 
375
374
  def __exit__(self, exc_type, exc_val, exc_tb):
@@ -1,4 +1,5 @@
1
1
  """Task(s) for the transfer and publishing of L1 data from a production run of a processing pipeline."""
2
+
2
3
  import logging
3
4
  from abc import ABC
4
5
  from itertools import chain
@@ -20,7 +21,6 @@ from dkist_processing_common.tasks.mixin.quality import QualityMixin
20
21
  from dkist_processing_common.tasks.output_data_base import OutputDataBase
21
22
  from dkist_processing_common.tasks.output_data_base import TransferDataBase
22
23
 
23
-
24
24
  __all__ = [
25
25
  "L1OutputDataBase",
26
26
  "TransferL1Data",
@@ -38,8 +38,9 @@ class L1OutputDataBase(OutputDataBase, ABC):
38
38
 
39
39
  @property
40
40
  def dataset_has_quality_data(self) -> bool:
41
- """Return True if quality data has been persisted to the metadata-store."""
42
- return self.metadata_store_quality_data_exists(dataset_id=self.constants.dataset_id)
41
+ """Return True if the dataset has quality data."""
42
+ path_count = self.count(tags=[Tag.output(), Tag.quality_data()])
43
+ return path_count > 0
43
44
 
44
45
  def rollback(self):
45
46
  """Warn that the metadata-store and the interservice bus retain the effect of this tasks execution. Rolling back this task may not be achievable without other action."""
@@ -54,11 +55,14 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
54
55
 
55
56
  def transfer_objects(self):
56
57
  """Transfer movie and L1 output frames."""
57
- with self.apm_task_step("Upload movie"):
58
+ with self.telemetry_span("Upload movie"):
58
59
  # Movie needs to be transferred separately as the movie headers need to go with it
59
60
  self.transfer_movie()
60
61
 
61
- with self.apm_task_step("Upload science frames"):
62
+ with self.telemetry_span("Upload quality data"):
63
+ self.transfer_quality_data()
64
+
65
+ with self.telemetry_span("Upload science frames"):
62
66
  self.transfer_output_frames()
63
67
 
64
68
  def transfer_output_frames(self):
@@ -79,19 +83,14 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
79
83
  def transfer_movie(self):
80
84
  """Transfer the movie to the object store."""
81
85
  paths = list(self.read(tags=[Tag.output(), Tag.movie()]))
82
- if len(paths) == 0:
83
- logger.warning(
84
- f"No movies found to upload for dataset. recipe_run_id={self.recipe_run_id}"
85
- )
86
- return
87
- movie = paths[0]
88
- if count := len(paths) > 1:
89
- # note: this needs to be an error or the dataset receipt accounting will have an
90
- # expected count > the eventual actual
86
+
87
+ count = len(paths)
88
+ if count != 1:
91
89
  raise RuntimeError(
92
- f"Multiple movies found to upload. Uploading the first one. "
93
- f"{count=}, {movie=}, recipe_run_id={self.recipe_run_id}"
90
+ f"Expected exactly one movie to upload, found {count}. "
91
+ f"recipe_run_id={self.recipe_run_id}"
94
92
  )
93
+ movie = paths[0]
95
94
  logger.info(f"Uploading Movie: recipe_run_id={self.recipe_run_id}, {movie=}")
96
95
  movie_object_key = self.format_object_key(movie)
97
96
  self.object_store_upload_movie(
@@ -101,6 +100,33 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
101
100
  content_type="video/mp4",
102
101
  )
103
102
 
103
+ def transfer_quality_data(self):
104
+ """Transfer quality data to the object store."""
105
+ paths = list(self.read(tags=[Tag.output(), Tag.quality_data()]))
106
+ if len(paths) == 0:
107
+ logger.info(
108
+ f"No quality data found to upload for dataset. recipe_run_id={self.recipe_run_id}"
109
+ )
110
+ return
111
+
112
+ if count := len(paths) > 1:
113
+ # dataset inventory does not support multiple quality data object keys
114
+ raise RuntimeError(
115
+ f"Found multiple quality data files to upload. Not supported."
116
+ f"{count=}, recipe_run_id={self.recipe_run_id}"
117
+ )
118
+
119
+ with self.telemetry_span(f"Uploading the trial quality data"):
120
+ path = paths[0]
121
+ logger.info(f"Uploading quality data: recipe_run_id={self.recipe_run_id}, {path=}")
122
+ quality_data_object_key = self.format_object_key(path)
123
+ self.object_store_upload_quality_data(
124
+ quality_data=path,
125
+ bucket=self.destination_bucket,
126
+ object_key=quality_data_object_key,
127
+ content_type="application/json",
128
+ )
129
+
104
130
 
105
131
  class AssembleQualityData(L1OutputDataBase, QualityMixin):
106
132
  """
@@ -120,15 +146,15 @@ class AssembleQualityData(L1OutputDataBase, QualityMixin):
120
146
 
121
147
  def run(self):
122
148
  """Run method for the task."""
123
- with self.apm_processing_step("Assembling quality data"):
149
+ with self.telemetry_span("Assembling quality data"):
124
150
  quality_data = self.quality_assemble_data(polcal_label_list=self.polcal_label_list)
125
151
 
126
- with self.apm_writing_step(
152
+ with self.telemetry_span(
127
153
  f"Saving quality data with {len(quality_data)} metrics to the file system"
128
154
  ):
129
155
  self.write(
130
156
  quality_data,
131
- tags=Tag.quality_data(),
157
+ tags=[Tag.output(), Tag.quality_data()],
132
158
  encoder=quality_data_encoder,
133
159
  relative_path=f"{self.constants.dataset_id}_quality_data.json",
134
160
  )
@@ -136,36 +162,23 @@ class AssembleQualityData(L1OutputDataBase, QualityMixin):
136
162
 
137
163
  class SubmitDatasetMetadata(L1OutputDataBase):
138
164
  """
139
- Add quality data and receipt account to the metadata store.
165
+ Add receipt account to the metadata store.
140
166
 
141
- Add the quality data to the Quality database.
142
167
  Add a Dataset Receipt Account record to Processing Support for use by the Dataset Catalog Locker.
143
- Adds the number of files created during the calibration processing to the Processing Support table
168
+ Adds the number of files to be created during the calibration processing to the Processing Support table
144
169
  for use by the Dataset Catalog Locker.
145
170
  """
146
171
 
147
172
  def run(self) -> None:
148
173
  """Run method for this task."""
149
- with self.apm_writing_step(f"Storing quality data to metadata store"):
150
- # each quality_data file is a list - this will combine the elements of multiple lists into a single list
151
- quality_data = list(
152
- chain.from_iterable(
153
- self.read(tags=Tag.quality_data(), decoder=quality_data_decoder)
154
- )
155
- )
156
- self.metadata_store_add_quality_data(
157
- dataset_id=self.constants.dataset_id, quality_data=quality_data
158
- )
159
- with self.apm_processing_step("Count Expected Outputs"):
174
+ with self.telemetry_span("Count Expected Outputs"):
160
175
  dataset_id = self.constants.dataset_id
161
176
  expected_object_count = self.count(tags=Tag.output())
162
- if quality_data:
163
- expected_object_count += 1
164
177
  logger.info(
165
178
  f"Adding Dataset Receipt Account: "
166
179
  f"{dataset_id=}, {expected_object_count=}, recipe_run_id={self.recipe_run_id}"
167
180
  )
168
- with self.apm_task_step(
181
+ with self.telemetry_span(
169
182
  f"Add Dataset Receipt Account: {dataset_id = }, {expected_object_count = }"
170
183
  ):
171
184
  self.metadata_store_add_dataset_receipt_account(
@@ -230,25 +243,13 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
230
243
  messages = [CatalogObjectMessage(body=body) for body in message_bodies]
231
244
  return messages
232
245
 
233
- @property
234
- def quality_report_message(self) -> CreateQualityReportMessage:
235
- """Create the Quality Report Message."""
236
- file_name = Path(f"{self.constants.dataset_id}_quality_report.pdf")
237
- body = CreateQualityReportMessageBody(
238
- bucket=self.destination_bucket,
239
- objectName=self.format_object_key(file_name),
240
- conversationId=str(self.recipe_run_id),
241
- datasetId=self.constants.dataset_id,
242
- incrementDatasetCatalogReceiptCount=True,
243
- )
244
- return CreateQualityReportMessage(body=body)
245
-
246
246
  def run(self) -> None:
247
247
  """Run method for this task."""
248
- with self.apm_task_step("Gather output data"):
248
+ with self.telemetry_span("Gather output data"):
249
249
  frames = self.read(tags=self.output_frame_tags)
250
250
  movies = self.read(tags=[Tag.output(), Tag.movie()])
251
- with self.apm_task_step("Create message objects"):
251
+ quality_data = self.read(tags=[Tag.output(), Tag.quality_data()])
252
+ with self.telemetry_span("Create message objects"):
252
253
  messages = []
253
254
  messages += self.frame_messages(paths=frames)
254
255
  frame_message_count = len(messages)
@@ -256,8 +257,8 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
256
257
  object_message_count = len(messages) - frame_message_count
257
258
  dataset_has_quality_data = self.dataset_has_quality_data
258
259
  if dataset_has_quality_data:
259
- messages.append(self.quality_report_message)
260
- with self.apm_task_step(
260
+ messages += self.object_messages(paths=quality_data, object_type="QDATA")
261
+ with self.telemetry_span(
261
262
  f"Publish messages: {frame_message_count = }, {object_message_count = }, {dataset_has_quality_data = }"
262
263
  ):
263
264
  self.interservice_bus_publish(messages=messages)
@@ -1,4 +1,5 @@
1
1
  """Mixin to add methods to a Task to support globus transfers."""
2
+
2
3
  import logging
3
4
  from dataclasses import dataclass
4
5
  from pathlib import Path
@@ -8,10 +9,11 @@ from globus_sdk import ConfidentialAppAuthClient
8
9
  from globus_sdk import GlobusError
9
10
  from globus_sdk import TransferClient
10
11
  from globus_sdk import TransferData
12
+ from globus_sdk.scopes import TransferScopes
13
+ from globus_sdk.transport import RetryConfig
11
14
 
12
15
  from dkist_processing_common.config import common_configurations
13
16
 
14
-
15
17
  logger = logging.getLogger(__name__)
16
18
 
17
19
 
@@ -31,27 +33,32 @@ class GlobusTransferItem:
31
33
  class GlobusMixin:
32
34
  """Mixin to add methods to a Task to support globus transfers."""
33
35
 
34
- @property
35
- def globus_transfer_client(self) -> TransferClient:
36
- """Get the globus transfer client, creating it if it doesn't exist."""
37
- if getattr(self, "_globus_transfer_client", False):
38
- return self._globus_transfer_client
36
+ def globus_transfer_client_factory(self, transfer_data: TransferData) -> TransferClient:
37
+ """Create a globus transfer client based on the direction of transfer and round-robin the available application credentials."""
38
+ if (
39
+ transfer_data["source_endpoint"] == common_configurations.object_store_endpoint
40
+ ): # inbound
41
+ client_credentials = common_configurations.globus_inbound_client_credentials
42
+ else: # outbound
43
+ client_credentials = common_configurations.globus_outbound_client_credentials
44
+
45
+ # Round-robin the client credentials based on the recipe run id
46
+ index = self.recipe_run_id % len(client_credentials)
47
+ selected_credential = client_credentials[index]
48
+
39
49
  confidential_client = ConfidentialAppAuthClient(
40
- client_id=common_configurations.globus_client_id,
41
- client_secret=common_configurations.globus_client_secret,
42
- transport_params=common_configurations.globus_transport_params,
43
- )
44
- authorizer = ClientCredentialsAuthorizer(
45
- confidential_client, scopes="urn:globus:auth:scope:transfer.api.globus.org:all"
50
+ client_id=selected_credential.client_id,
51
+ client_secret=selected_credential.client_secret,
46
52
  )
47
- self._globus_transfer_client = TransferClient(authorizer=authorizer)
48
- return self._globus_transfer_client
53
+ authorizer = ClientCredentialsAuthorizer(confidential_client, scopes=TransferScopes)
54
+ retry_config = RetryConfig(max_retries=common_configurations.globus_max_retries)
55
+
56
+ return TransferClient(authorizer=authorizer, retry_config=retry_config)
49
57
 
50
58
  def globus_transfer_scratch_to_object_store(
51
59
  self,
52
60
  transfer_items: list[GlobusTransferItem],
53
61
  label: str = None,
54
- sync_level: str = None,
55
62
  verify_checksum: bool = True,
56
63
  ) -> None:
57
64
  """Transfer data from scratch to the object store."""
@@ -60,7 +67,6 @@ class GlobusMixin:
60
67
  destination_endpoint=common_configurations.object_store_endpoint,
61
68
  transfer_items=transfer_items,
62
69
  label=label,
63
- sync_level=sync_level,
64
70
  verify_checksum=verify_checksum,
65
71
  )
66
72
 
@@ -68,7 +74,6 @@ class GlobusMixin:
68
74
  self,
69
75
  transfer_items: list[GlobusTransferItem],
70
76
  label: str = None,
71
- sync_level: str = None,
72
77
  verify_checksum: bool = True,
73
78
  ) -> None:
74
79
  """Transfer data from the object store to scratch."""
@@ -77,7 +82,6 @@ class GlobusMixin:
77
82
  destination_endpoint=common_configurations.scratch_endpoint,
78
83
  transfer_items=transfer_items,
79
84
  label=label,
80
- sync_level=sync_level,
81
85
  verify_checksum=verify_checksum,
82
86
  )
83
87
 
@@ -87,7 +91,6 @@ class GlobusMixin:
87
91
  destination_endpoint: str,
88
92
  transfer_items: list[GlobusTransferItem],
89
93
  label: str = None,
90
- sync_level: str = None,
91
94
  verify_checksum: bool = True,
92
95
  ) -> TransferData:
93
96
  """Format a globus TransferData instance."""
@@ -95,7 +98,6 @@ class GlobusMixin:
95
98
  source_endpoint=source_endpoint,
96
99
  destination_endpoint=destination_endpoint,
97
100
  label=label,
98
- sync_level=sync_level,
99
101
  verify_checksum=verify_checksum,
100
102
  )
101
103
  for item in transfer_items:
@@ -112,7 +114,6 @@ class GlobusMixin:
112
114
  destination_endpoint: str,
113
115
  transfer_items: list[GlobusTransferItem],
114
116
  label: str = None,
115
- sync_level: str = None,
116
117
  verify_checksum: bool = True,
117
118
  ) -> None:
118
119
  """Perform a transfer of data using globus."""
@@ -121,7 +122,6 @@ class GlobusMixin:
121
122
  destination_endpoint=destination_endpoint,
122
123
  transfer_items=transfer_items,
123
124
  label=label,
124
- sync_level=sync_level,
125
125
  verify_checksum=verify_checksum,
126
126
  )
127
127
  self._blocking_globus_transfer(transfer_data=transfer_data)
@@ -131,24 +131,21 @@ class GlobusMixin:
131
131
  source_endpoint: str,
132
132
  destination_endpoint: str,
133
133
  label: str = None,
134
- sync_level: str = None,
135
134
  verify_checksum: bool = True,
136
135
  ) -> TransferData:
137
136
  label = label or "Data Processing Transfer"
138
137
  return TransferData(
139
- transfer_client=self.globus_transfer_client,
140
138
  source_endpoint=source_endpoint,
141
139
  destination_endpoint=destination_endpoint,
142
140
  label=label,
143
- sync_level=sync_level,
144
141
  verify_checksum=verify_checksum,
145
142
  )
146
143
 
147
144
  def _blocking_globus_transfer(self, transfer_data: TransferData) -> None:
148
- tc = self.globus_transfer_client
149
- logger.info(f"Starting globus transfer: label={transfer_data.get('label')}")
145
+ tc = self.globus_transfer_client_factory(transfer_data=transfer_data)
150
146
  transfer_result = tc.submit_transfer(transfer_data)
151
147
  task_id = transfer_result["task_id"]
148
+ logger.info(f"Starting globus transfer: label={transfer_data.get('label')}, {task_id=}, ")
152
149
  polling_interval = 60
153
150
  while not tc.task_wait(
154
151
  task_id=task_id, timeout=polling_interval, polling_interval=polling_interval
@@ -1,4 +1,5 @@
1
1
  """Mixin for a WorkflowDataTaskBase subclass which implements interservice bus access functionality."""
2
+
2
3
  from talus import DurableProducer
3
4
  from talus import PublishMessageBase
4
5