dkist-processing-common 11.7.0rc3__py3-none-any.whl → 11.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. dkist_processing_common/config.py +28 -6
  2. dkist_processing_common/models/constants.py +12 -36
  3. dkist_processing_common/models/graphql.py +0 -33
  4. dkist_processing_common/parsers/observing_program_id_bud.py +24 -0
  5. dkist_processing_common/tasks/l1_output_data.py +38 -32
  6. dkist_processing_common/tasks/mixin/globus.py +23 -26
  7. dkist_processing_common/tasks/mixin/metadata_store.py +0 -49
  8. dkist_processing_common/tasks/mixin/object_store.py +21 -0
  9. dkist_processing_common/tasks/mixin/quality/_metrics.py +4 -6
  10. dkist_processing_common/tasks/parse_l0_input_data.py +9 -30
  11. dkist_processing_common/tasks/trial_catalog.py +49 -1
  12. dkist_processing_common/tasks/trial_output_data.py +1 -1
  13. dkist_processing_common/tests/mock_metadata_store.py +39 -4
  14. dkist_processing_common/tests/test_input_dataset.py +1 -37
  15. dkist_processing_common/tests/test_parse_l0_input_data.py +9 -3
  16. dkist_processing_common/tests/test_publish_catalog_messages.py +0 -21
  17. dkist_processing_common/tests/test_quality_mixin.py +11 -3
  18. dkist_processing_common/tests/test_stems.py +35 -0
  19. dkist_processing_common/tests/test_submit_dataset_metadata.py +1 -5
  20. dkist_processing_common/tests/test_trial_catalog.py +72 -2
  21. dkist_processing_common/tests/test_trial_output_data.py +1 -2
  22. {dkist_processing_common-11.7.0rc3.dist-info → dkist_processing_common-11.9.1.dist-info}/METADATA +17 -13
  23. {dkist_processing_common-11.7.0rc3.dist-info → dkist_processing_common-11.9.1.dist-info}/RECORD +25 -30
  24. changelog/267.feature.1.rst +0 -1
  25. changelog/267.feature.2.rst +0 -1
  26. changelog/267.feature.rst +0 -1
  27. changelog/267.misc.rst +0 -1
  28. changelog/267.removal.1.rst +0 -2
  29. changelog/267.removal.rst +0 -1
  30. {dkist_processing_common-11.7.0rc3.dist-info → dkist_processing_common-11.9.1.dist-info}/WHEEL +0 -0
  31. {dkist_processing_common-11.7.0rc3.dist-info → dkist_processing_common-11.9.1.dist-info}/top_level.txt +0 -0
@@ -3,12 +3,20 @@
3
3
  from dkist_processing_core.config import DKISTProcessingCoreConfiguration
4
4
  from dkist_service_configuration.settings import DEFAULT_MESH_SERVICE
5
5
  from dkist_service_configuration.settings import MeshService
6
+ from pydantic import BaseModel
6
7
  from pydantic import Field
7
8
  from talus import ConnectionRetryerFactory
8
9
  from talus import ConsumerConnectionParameterFactory
9
10
  from talus import ProducerConnectionParameterFactory
10
11
 
11
12
 
13
+ class GlobusClientCredential(BaseModel):
14
+ """Globus client credential."""
15
+
16
+ client_id: str = Field(..., description="Globus client ID for transfers.")
17
+ client_secret: str = Field(..., description="Globus client secret for transfers.")
18
+
19
+
12
20
  class DKISTProcessingCommonConfiguration(DKISTProcessingCoreConfiguration):
13
21
  """Common configurations."""
14
22
 
@@ -40,14 +48,28 @@ class DKISTProcessingCommonConfiguration(DKISTProcessingCoreConfiguration):
40
48
  default=None, description="S3 download configuration for the object store."
41
49
  )
42
50
  # globus
43
- globus_transport_params: dict = Field(
44
- default_factory=dict, description="Globus transfer parameters."
51
+ globus_max_retries: int = Field(
52
+ default=5, description="Max retries for transient errors on calls to the globus api."
45
53
  )
46
- globus_client_id: str | None = Field(
47
- default=None, description="Globus client ID for inbound/outbound transfers."
54
+ globus_inbound_client_credentials: list[GlobusClientCredential] = Field(
55
+ default_factory=list,
56
+ description="Globus client credentials for inbound transfers.",
57
+ examples=[
58
+ [
59
+ {"client_id": "id1", "client_secret": "secret1"},
60
+ {"client_id": "id2", "client_secret": "secret2"},
61
+ ],
62
+ ],
48
63
  )
49
- globus_client_secret: str | None = Field(
50
- default=None, description="Globus client secret for inbound/outbound transfers."
64
+ globus_outbound_client_credentials: list[GlobusClientCredential] = Field(
65
+ default_factory=list,
66
+ description="Globus client credentials for outbound transfers.",
67
+ examples=[
68
+ [
69
+ {"client_id": "id3", "client_secret": "secret3"},
70
+ {"client_id": "id4", "client_secret": "secret4"},
71
+ ],
72
+ ],
51
73
  )
52
74
  object_store_endpoint: str | None = Field(
53
75
  default=None, description="Object store Globus Endpoint ID."
@@ -44,25 +44,21 @@ class BudName(StrEnum):
44
44
  software_binning_y = "SOFTWARE_BINNING_Y"
45
45
  hls_version = "HLS_VERSION"
46
46
  # Multi-task buds start here:
47
- dark_observing_program_execution_id = "DARK_OBSERVING_PROGRAM_EXECUTION_ID"
48
- solar_gain_observing_program_execution_id = "SOLAR_GAIN_OBSERVING_PROGRAM_EXECUTION_ID"
49
- polcal_observing_program_execution_id = "POLCAL_OBSERVING_PROGRAM_EXECUTION_ID"
47
+ dark_observing_program_execution_ids = "DARK_OBSERVING_PROGRAM_EXECUTION_IDS"
48
+ solar_gain_observing_program_execution_ids = "SOLAR_GAIN_OBSERVING_PROGRAM_EXECUTION_IDS"
49
+ polcal_observing_program_execution_ids = "POLCAL_OBSERVING_PROGRAM_EXECUTION_IDS"
50
50
  dark_date_begin = "DARK_DATE_BEGIN"
51
51
  solar_gain_date_begin = "SOLAR_GAIN_DATE_BEGIN"
52
52
  polcal_date_begin = "POLCAL_DATE_BEGIN"
53
53
  dark_date_end = "DARK_DATE_END"
54
54
  solar_gain_date_end = "SOLAR_GAIN_DATE_END"
55
55
  polcal_date_end = "POLCAL_DATE_END"
56
- dark_num_raw_frames_per_fpa = "DARK_NUM_RAW_FRAMES_PER_FPA"
57
56
  solar_gain_num_raw_frames_per_fpa = "SOLAR_GAIN_NUM_RAW_FRAMES_PER_FPA"
58
57
  polcal_num_raw_frames_per_fpa = "POLCAL_NUM_RAW_FRAMES_PER_FPA"
59
- dark_telescope_tracking_mode = "DARK_TELESCOPE_TRACKING_MODE"
60
58
  solar_gain_telescope_tracking_mode = "SOLAR_GAIN_TELESCOPE_TRACKING_MODE"
61
59
  polcal_telescope_tracking_mode = "POLCAL_TELESCOPE_TRACKING_MODE"
62
- dark_coude_table_tracking_mode = "DARK_COUDE_TABLE_TRACKING_MODE"
63
60
  solar_gain_coude_table_tracking_mode = "SOLAR_GAIN_COUDE_TABLE_TRACKING_MODE"
64
61
  polcal_coude_table_tracking_mode = "POLCAL_COUDE_TABLE_TRACKING_MODE"
65
- dark_telescope_scanning_mode = "DARK_TELESCOPE_SCANNING_MODE"
66
62
  solar_gain_telescope_scanning_mode = "SOLAR_GAIN_TELESCOPE_SCANNING_MODE"
67
63
  polcal_telescope_scanning_mode = "POLCAL_TELESCOPE_SCANNING_MODE"
68
64
  dark_average_light_level = "DARK_AVERAGE_LIGHT_LEVEL"
@@ -285,21 +281,21 @@ class ConstantsBase:
285
281
  # Multi-task constants start here:
286
282
 
287
283
  @property
288
- def dark_observing_program_execution_id(self) -> list[str]:
289
- """Return the observing program execution id constant for the dark task."""
290
- observing_programs = self._db_dict[BudName.dark_observing_program_execution_id]
284
+ def dark_observing_program_execution_ids(self) -> list[str]:
285
+ """Return the observing program execution ids constant for the dark task."""
286
+ observing_programs = self._db_dict[BudName.dark_observing_program_execution_ids]
291
287
  return list(observing_programs)
292
288
 
293
289
  @property
294
- def solar_gain_observing_program_execution_id(self) -> list[str]:
295
- """Return the observing program execution id constant for the solar_gain task."""
296
- observing_programs = self._db_dict[BudName.solar_gain_observing_program_execution_id]
290
+ def solar_gain_observing_program_execution_ids(self) -> list[str]:
291
+ """Return the observing program execution ids constant for the solar_gain task."""
292
+ observing_programs = self._db_dict[BudName.solar_gain_observing_program_execution_ids]
297
293
  return list(observing_programs)
298
294
 
299
295
  @property
300
- def polcal_observing_program_execution_id(self) -> list[str]:
301
- """Return the observing program execution id constant."""
302
- observing_programs = self._db_dict[BudName.polcal_observing_program_execution_id]
296
+ def polcal_observing_program_execution_ids(self) -> list[str]:
297
+ """Return the observing program execution ids constant."""
298
+ observing_programs = self._db_dict[BudName.polcal_observing_program_execution_ids]
303
299
  return list(observing_programs)
304
300
 
305
301
  @property
@@ -332,11 +328,6 @@ class ConstantsBase:
332
328
  """Return the date end constant for the polcal task."""
333
329
  return self._db_dict[BudName.polcal_date_end]
334
330
 
335
- @property
336
- def dark_num_raw_frames_per_fpa(self) -> int:
337
- """Return the number of raw frames per fpa constant for the dark task."""
338
- return self._db_dict[BudName.dark_num_raw_frames_per_fpa]
339
-
340
331
  @property
341
332
  def solar_gain_num_raw_frames_per_fpa(self) -> int:
342
333
  """Return the number of raw frames per fpa constant for the solar gain task."""
@@ -347,11 +338,6 @@ class ConstantsBase:
347
338
  """Return the num raw frames per fpa constant for the polcal task."""
348
339
  return self._db_dict[BudName.polcal_num_raw_frames_per_fpa]
349
340
 
350
- @property
351
- def dark_telescope_tracking_mode(self) -> str:
352
- """Return the telescope tracking mode constant for the dark task."""
353
- return self._db_dict[BudName.dark_telescope_tracking_mode]
354
-
355
341
  @property
356
342
  def solar_gain_telescope_tracking_mode(self) -> str:
357
343
  """Return the telescope tracking mode constant for the solar gain task."""
@@ -362,11 +348,6 @@ class ConstantsBase:
362
348
  """Return the telescope tracking mode constant for the polcal task."""
363
349
  return self._db_dict[BudName.polcal_telescope_tracking_mode]
364
350
 
365
- @property
366
- def dark_coude_table_tracking_mode(self) -> str:
367
- """Return the coude table tracking mode constant for the dark task."""
368
- return self._db_dict[BudName.dark_coude_table_tracking_mode]
369
-
370
351
  @property
371
352
  def solar_gain_coude_table_tracking_mode(self) -> str:
372
353
  """Return the coude table tracking mode constant for the solar gain task."""
@@ -377,11 +358,6 @@ class ConstantsBase:
377
358
  """Return the coude table tracking mode constant for the polcal task."""
378
359
  return self._db_dict[BudName.polcal_coude_table_tracking_mode]
379
360
 
380
- @property
381
- def dark_telescope_scanning_mode(self) -> str:
382
- """Return the telescope scanning mode constant for the dark task."""
383
- return self._db_dict[BudName.dark_telescope_scanning_mode]
384
-
385
361
  @property
386
362
  def solar_gain_telescope_scanning_mode(self) -> str:
387
363
  """Return the telescope scanning mode constant for the solar gain task."""
@@ -176,36 +176,3 @@ class RecipeRunProvenanceMutation(GraphqlBaseModel):
176
176
  libraryVersions: str
177
177
  workflowVersion: str
178
178
  codeVersion: str | None = None
179
-
180
-
181
- class QualityCreation(GraphqlBaseModel):
182
- """Quality data creation record."""
183
-
184
- datasetId: str
185
- metricCode: str
186
- facet: str | None = None
187
- name: str | None = None
188
- description: str | None = None
189
- statement: str | None = None
190
- # JSON array
191
- warnings: str | None = None
192
- # JSON objects
193
- plotData: str | None = None
194
- multiPlotData: str | None = None
195
- tableData: str | None = None
196
- histogramData: str | None = None
197
- modmatData: str | None = None
198
- raincloudData: str | None = None
199
- efficiencyData: str | None = None
200
-
201
-
202
- class QualitiesRequest(GraphqlBaseModel):
203
- """Query parameters for quality data."""
204
-
205
- datasetId: str
206
-
207
-
208
- class QualityResponse(GraphqlBaseModel):
209
- """Query Response for quality data."""
210
-
211
- qualityId: int
@@ -0,0 +1,24 @@
1
+ """Observing Program Id parser."""
2
+
3
+ from typing import Callable
4
+
5
+ from dkist_processing_common.models.fits_access import MetadataKey
6
+ from dkist_processing_common.parsers.id_bud import TaskContributingIdsBud
7
+ from dkist_processing_common.parsers.task import passthrough_header_ip_task
8
+
9
+
10
+ class TaskContributingObservingProgramExecutionIdsBud(TaskContributingIdsBud):
11
+ """Class to create a Bud for the supporting observing_program_execution_ids."""
12
+
13
+ def __init__(
14
+ self,
15
+ constant_name: str,
16
+ ip_task_types: str | list[str],
17
+ task_type_parsing_function: Callable = passthrough_header_ip_task,
18
+ ):
19
+ super().__init__(
20
+ constant_name=constant_name,
21
+ metadata_key=MetadataKey.observing_program_execution_id,
22
+ ip_task_types=ip_task_types,
23
+ task_type_parsing_function=task_type_parsing_function,
24
+ )
@@ -38,8 +38,9 @@ class L1OutputDataBase(OutputDataBase, ABC):
38
38
 
39
39
  @property
40
40
  def dataset_has_quality_data(self) -> bool:
41
- """Return True if quality data has been persisted to the metadata-store."""
42
- return self.metadata_store_quality_data_exists(dataset_id=self.constants.dataset_id)
41
+ """Return True if the dataset has quality data."""
42
+ path_count = self.count(tags=[Tag.output(), Tag.quality_data()])
43
+ return path_count > 0
43
44
 
44
45
  def rollback(self):
45
46
  """Warn that the metadata-store and the interservice bus retain the effect of this tasks execution. Rolling back this task may not be achievable without other action."""
@@ -58,6 +59,9 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
58
59
  # Movie needs to be transferred separately as the movie headers need to go with it
59
60
  self.transfer_movie()
60
61
 
62
+ with self.telemetry_span("Upload quality data"):
63
+ self.transfer_quality_data()
64
+
61
65
  with self.telemetry_span("Upload science frames"):
62
66
  self.transfer_output_frames()
63
67
 
@@ -101,6 +105,33 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
101
105
  content_type="video/mp4",
102
106
  )
103
107
 
108
+ def transfer_quality_data(self):
109
+ """Transfer quality data to the object store."""
110
+ paths = list(self.read(tags=[Tag.output(), Tag.quality_data()]))
111
+ if len(paths) == 0:
112
+ logger.info(
113
+ f"No quality data found to upload for dataset. recipe_run_id={self.recipe_run_id}"
114
+ )
115
+ return
116
+
117
+ if count := len(paths) > 1:
118
+ # dataset inventory does not support multiple quality data object keys
119
+ raise RuntimeError(
120
+ f"Found multiple quality data files to upload. Not supported."
121
+ f"{count=}, recipe_run_id={self.recipe_run_id}"
122
+ )
123
+
124
+ with self.telemetry_span(f"Uploading the trial quality data"):
125
+ path = paths[0]
126
+ logger.info(f"Uploading quality data: recipe_run_id={self.recipe_run_id}, {path=}")
127
+ quality_data_object_key = self.format_object_key(path)
128
+ self.object_store_upload_quality_data(
129
+ quality_data=path,
130
+ bucket=self.destination_bucket,
131
+ object_key=quality_data_object_key,
132
+ content_type="application/json",
133
+ )
134
+
104
135
 
105
136
  class AssembleQualityData(L1OutputDataBase, QualityMixin):
106
137
  """
@@ -128,7 +159,7 @@ class AssembleQualityData(L1OutputDataBase, QualityMixin):
128
159
  ):
129
160
  self.write(
130
161
  quality_data,
131
- tags=Tag.quality_data(),
162
+ tags=[Tag.output(), Tag.quality_data()],
132
163
  encoder=quality_data_encoder,
133
164
  relative_path=f"{self.constants.dataset_id}_quality_data.json",
134
165
  )
@@ -136,31 +167,18 @@ class AssembleQualityData(L1OutputDataBase, QualityMixin):
136
167
 
137
168
  class SubmitDatasetMetadata(L1OutputDataBase):
138
169
  """
139
- Add quality data and receipt account to the metadata store.
170
+ Add receipt account to the metadata store.
140
171
 
141
- Add the quality data to the Quality database.
142
172
  Add a Dataset Receipt Account record to Processing Support for use by the Dataset Catalog Locker.
143
- Adds the number of files created during the calibration processing to the Processing Support table
173
+ Adds the number of files to be created during the calibration processing to the Processing Support table
144
174
  for use by the Dataset Catalog Locker.
145
175
  """
146
176
 
147
177
  def run(self) -> None:
148
178
  """Run method for this task."""
149
- with self.telemetry_span(f"Storing quality data to metadata store"):
150
- # each quality_data file is a list - this will combine the elements of multiple lists into a single list
151
- quality_data = list(
152
- chain.from_iterable(
153
- self.read(tags=Tag.quality_data(), decoder=quality_data_decoder)
154
- )
155
- )
156
- self.metadata_store_add_quality_data(
157
- dataset_id=self.constants.dataset_id, quality_data=quality_data
158
- )
159
179
  with self.telemetry_span("Count Expected Outputs"):
160
180
  dataset_id = self.constants.dataset_id
161
181
  expected_object_count = self.count(tags=Tag.output())
162
- if quality_data:
163
- expected_object_count += 1
164
182
  logger.info(
165
183
  f"Adding Dataset Receipt Account: "
166
184
  f"{dataset_id=}, {expected_object_count=}, recipe_run_id={self.recipe_run_id}"
@@ -230,24 +248,12 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
230
248
  messages = [CatalogObjectMessage(body=body) for body in message_bodies]
231
249
  return messages
232
250
 
233
- @property
234
- def quality_report_message(self) -> CreateQualityReportMessage:
235
- """Create the Quality Report Message."""
236
- file_name = Path(f"{self.constants.dataset_id}_quality_report.pdf")
237
- body = CreateQualityReportMessageBody(
238
- bucket=self.destination_bucket,
239
- objectName=self.format_object_key(file_name),
240
- conversationId=str(self.recipe_run_id),
241
- datasetId=self.constants.dataset_id,
242
- incrementDatasetCatalogReceiptCount=True,
243
- )
244
- return CreateQualityReportMessage(body=body)
245
-
246
251
  def run(self) -> None:
247
252
  """Run method for this task."""
248
253
  with self.telemetry_span("Gather output data"):
249
254
  frames = self.read(tags=self.output_frame_tags)
250
255
  movies = self.read(tags=[Tag.output(), Tag.movie()])
256
+ quality_data = self.read(tags=[Tag.output(), Tag.quality_data()])
251
257
  with self.telemetry_span("Create message objects"):
252
258
  messages = []
253
259
  messages += self.frame_messages(paths=frames)
@@ -256,7 +262,7 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
256
262
  object_message_count = len(messages) - frame_message_count
257
263
  dataset_has_quality_data = self.dataset_has_quality_data
258
264
  if dataset_has_quality_data:
259
- messages.append(self.quality_report_message)
265
+ messages += self.object_messages(paths=quality_data, object_type="QDATA")
260
266
  with self.telemetry_span(
261
267
  f"Publish messages: {frame_message_count = }, {object_message_count = }, {dataset_has_quality_data = }"
262
268
  ):
@@ -9,6 +9,8 @@ from globus_sdk import ConfidentialAppAuthClient
9
9
  from globus_sdk import GlobusError
10
10
  from globus_sdk import TransferClient
11
11
  from globus_sdk import TransferData
12
+ from globus_sdk.scopes import TransferScopes
13
+ from globus_sdk.transport import RetryConfig
12
14
 
13
15
  from dkist_processing_common.config import common_configurations
14
16
 
@@ -31,27 +33,32 @@ class GlobusTransferItem:
31
33
  class GlobusMixin:
32
34
  """Mixin to add methods to a Task to support globus transfers."""
33
35
 
34
- @property
35
- def globus_transfer_client(self) -> TransferClient:
36
- """Get the globus transfer client, creating it if it doesn't exist."""
37
- if getattr(self, "_globus_transfer_client", False):
38
- return self._globus_transfer_client
36
+ def globus_transfer_client_factory(self, transfer_data: TransferData) -> TransferClient:
37
+ """Create a globus transfer client based on the direction of transfer and round-robin the available application credentials."""
38
+ if (
39
+ transfer_data["source_endpoint"] == common_configurations.object_store_endpoint
40
+ ): # inbound
41
+ client_credentials = common_configurations.globus_inbound_client_credentials
42
+ else: # outbound
43
+ client_credentials = common_configurations.globus_outbound_client_credentials
44
+
45
+ # Round-robin the client credentials based on the recipe run id
46
+ index = self.recipe_run_id % len(client_credentials)
47
+ selected_credential = client_credentials[index]
48
+
39
49
  confidential_client = ConfidentialAppAuthClient(
40
- client_id=common_configurations.globus_client_id,
41
- client_secret=common_configurations.globus_client_secret,
42
- transport_params=common_configurations.globus_transport_params,
50
+ client_id=selected_credential.client_id,
51
+ client_secret=selected_credential.client_secret,
43
52
  )
44
- authorizer = ClientCredentialsAuthorizer(
45
- confidential_client, scopes="urn:globus:auth:scope:transfer.api.globus.org:all"
46
- )
47
- self._globus_transfer_client = TransferClient(authorizer=authorizer)
48
- return self._globus_transfer_client
53
+ authorizer = ClientCredentialsAuthorizer(confidential_client, scopes=TransferScopes)
54
+ retry_config = RetryConfig(max_retries=common_configurations.globus_max_retries)
55
+
56
+ return TransferClient(authorizer=authorizer, retry_config=retry_config)
49
57
 
50
58
  def globus_transfer_scratch_to_object_store(
51
59
  self,
52
60
  transfer_items: list[GlobusTransferItem],
53
61
  label: str = None,
54
- sync_level: str = None,
55
62
  verify_checksum: bool = True,
56
63
  ) -> None:
57
64
  """Transfer data from scratch to the object store."""
@@ -60,7 +67,6 @@ class GlobusMixin:
60
67
  destination_endpoint=common_configurations.object_store_endpoint,
61
68
  transfer_items=transfer_items,
62
69
  label=label,
63
- sync_level=sync_level,
64
70
  verify_checksum=verify_checksum,
65
71
  )
66
72
 
@@ -68,7 +74,6 @@ class GlobusMixin:
68
74
  self,
69
75
  transfer_items: list[GlobusTransferItem],
70
76
  label: str = None,
71
- sync_level: str = None,
72
77
  verify_checksum: bool = True,
73
78
  ) -> None:
74
79
  """Transfer data from the object store to scratch."""
@@ -77,7 +82,6 @@ class GlobusMixin:
77
82
  destination_endpoint=common_configurations.scratch_endpoint,
78
83
  transfer_items=transfer_items,
79
84
  label=label,
80
- sync_level=sync_level,
81
85
  verify_checksum=verify_checksum,
82
86
  )
83
87
 
@@ -87,7 +91,6 @@ class GlobusMixin:
87
91
  destination_endpoint: str,
88
92
  transfer_items: list[GlobusTransferItem],
89
93
  label: str = None,
90
- sync_level: str = None,
91
94
  verify_checksum: bool = True,
92
95
  ) -> TransferData:
93
96
  """Format a globus TransferData instance."""
@@ -95,7 +98,6 @@ class GlobusMixin:
95
98
  source_endpoint=source_endpoint,
96
99
  destination_endpoint=destination_endpoint,
97
100
  label=label,
98
- sync_level=sync_level,
99
101
  verify_checksum=verify_checksum,
100
102
  )
101
103
  for item in transfer_items:
@@ -112,7 +114,6 @@ class GlobusMixin:
112
114
  destination_endpoint: str,
113
115
  transfer_items: list[GlobusTransferItem],
114
116
  label: str = None,
115
- sync_level: str = None,
116
117
  verify_checksum: bool = True,
117
118
  ) -> None:
118
119
  """Perform a transfer of data using globus."""
@@ -121,7 +122,6 @@ class GlobusMixin:
121
122
  destination_endpoint=destination_endpoint,
122
123
  transfer_items=transfer_items,
123
124
  label=label,
124
- sync_level=sync_level,
125
125
  verify_checksum=verify_checksum,
126
126
  )
127
127
  self._blocking_globus_transfer(transfer_data=transfer_data)
@@ -131,24 +131,21 @@ class GlobusMixin:
131
131
  source_endpoint: str,
132
132
  destination_endpoint: str,
133
133
  label: str = None,
134
- sync_level: str = None,
135
134
  verify_checksum: bool = True,
136
135
  ) -> TransferData:
137
136
  label = label or "Data Processing Transfer"
138
137
  return TransferData(
139
- transfer_client=self.globus_transfer_client,
140
138
  source_endpoint=source_endpoint,
141
139
  destination_endpoint=destination_endpoint,
142
140
  label=label,
143
- sync_level=sync_level,
144
141
  verify_checksum=verify_checksum,
145
142
  )
146
143
 
147
144
  def _blocking_globus_transfer(self, transfer_data: TransferData) -> None:
148
- tc = self.globus_transfer_client
149
- logger.info(f"Starting globus transfer: label={transfer_data.get('label')}")
145
+ tc = self.globus_transfer_client_factory(transfer_data=transfer_data)
150
146
  transfer_result = tc.submit_transfer(transfer_data)
151
147
  task_id = transfer_result["task_id"]
148
+ logger.info(f"Starting globus transfer: label={transfer_data.get('label')}, {task_id=}, ")
152
149
  polling_interval = 60
153
150
  while not tc.task_wait(
154
151
  task_id=task_id, timeout=polling_interval, polling_interval=polling_interval
@@ -1,6 +1,5 @@
1
1
  """Mixin for a WorkflowDataTaskBase subclass which implements Metadata Store data access functionality."""
2
2
 
3
- import json
4
3
  import logging
5
4
  from functools import cached_property
6
5
  from typing import Literal
@@ -8,15 +7,11 @@ from typing import Literal
8
7
  from pydantic import validate_call
9
8
 
10
9
  from dkist_processing_common._util.graphql import GraphQLClient
11
- from dkist_processing_common.codecs.quality import QualityDataEncoder
12
10
  from dkist_processing_common.config import common_configurations
13
11
  from dkist_processing_common.models.graphql import DatasetCatalogReceiptAccountMutation
14
12
  from dkist_processing_common.models.graphql import DatasetCatalogReceiptAccountResponse
15
13
  from dkist_processing_common.models.graphql import InputDatasetPartResponse
16
14
  from dkist_processing_common.models.graphql import InputDatasetRecipeRunResponse
17
- from dkist_processing_common.models.graphql import QualitiesRequest
18
- from dkist_processing_common.models.graphql import QualityCreation
19
- from dkist_processing_common.models.graphql import QualityResponse
20
15
  from dkist_processing_common.models.graphql import RecipeRunMutation
21
16
  from dkist_processing_common.models.graphql import RecipeRunMutationResponse
22
17
  from dkist_processing_common.models.graphql import RecipeRunProvenanceMutation
@@ -150,50 +145,6 @@ class MetadataStoreMixin:
150
145
  mutation_response_cls=RecipeRunProvenanceResponse,
151
146
  )
152
147
 
153
- # QUALITY
154
-
155
- def metadata_store_add_quality_data(self, dataset_id: str, quality_data: list[dict]):
156
- """Add the quality data to the metadata-store."""
157
- if self.metadata_store_quality_data_exists(dataset_id):
158
- raise RuntimeError(f"Quality data already persisted for dataset {dataset_id!r}")
159
- for metric in quality_data:
160
- if (metric_code := metric.get("metric_code")) is None:
161
- name = metric.get("name")
162
- raise ValueError(f"No metric_code for {name!r} in dataset {dataset_id!r}")
163
- params = QualityCreation(
164
- datasetId=dataset_id,
165
- metricCode=metric_code,
166
- facet=metric.get("facet"),
167
- name=metric.get("name"),
168
- description=metric.get("description"),
169
- statement=metric.get("statement"),
170
- # JSON array
171
- warnings=json.dumps(metric.get("warnings")),
172
- # JSON objects
173
- plotData=json.dumps(metric.get("plot_data"), cls=QualityDataEncoder),
174
- multiPlotData=json.dumps(metric.get("multi_plot_data"), cls=QualityDataEncoder),
175
- tableData=json.dumps(metric.get("table_data"), cls=QualityDataEncoder),
176
- histogramData=json.dumps(metric.get("histogram_data"), cls=QualityDataEncoder),
177
- modmatData=json.dumps(metric.get("modmat_data"), cls=QualityDataEncoder),
178
- raincloudData=json.dumps(metric.get("raincloud_data"), cls=QualityDataEncoder),
179
- efficiencyData=json.dumps(metric.get("efficiency_data"), cls=QualityDataEncoder),
180
- )
181
- self.metadata_store_client.execute_gql_mutation(
182
- mutation_base="createQuality",
183
- mutation_parameters=params,
184
- mutation_response_cls=QualityResponse,
185
- )
186
-
187
- def metadata_store_quality_data_exists(self, dataset_id: str) -> bool:
188
- """Return True if quality data exists in the metadata-store for the given dataset id."""
189
- params = QualitiesRequest(datasetId=dataset_id)
190
- response = self.metadata_store_client.execute_gql_query(
191
- query_base="qualities",
192
- query_response_cls=QualityResponse,
193
- query_parameters=params,
194
- )
195
- return bool(response)
196
-
197
148
  # INPUT DATASET RECIPE RUN
198
149
 
199
150
  @cached_property
@@ -55,6 +55,27 @@ class ObjectStoreMixin:
55
55
  },
56
56
  )
57
57
 
58
+ def object_store_upload_quality_data(
59
+ self,
60
+ quality_data: Path | bytes,
61
+ bucket: str,
62
+ object_key: str,
63
+ content_type: str = "application/json",
64
+ ):
65
+ """Upload quality data to the object store."""
66
+ self.object_store_client.upload_object(
67
+ object_data=quality_data,
68
+ bucket=bucket,
69
+ object_key=object_key,
70
+ verify_checksum=True,
71
+ content_type=content_type,
72
+ metadata={
73
+ "groupname": "DATASET",
74
+ "groupid": self.constants.dataset_id,
75
+ "objecttype": "QDATA",
76
+ },
77
+ )
78
+
58
79
  def object_store_remove_folder_objects(self, bucket: str, path: Path | str) -> list[str]:
59
80
  """
60
81
  Remove folder objects (end with /) in the specified bucket and path.
@@ -1356,15 +1356,13 @@ class _WavecalQualityMixin:
1356
1356
  Note that the residuals are the *unweighed* residuals.
1357
1357
  """
1358
1358
  weight_data = np.ones(input_wavelength.size) if weights is None else weights
1359
- prepared_weights = np.sqrt(weight_data / np.sum(weight_data))
1359
+ prepared_weights = fit_result.prepared_weights
1360
1360
  residuals = fit_result.minimizer_result.residual / prepared_weights
1361
1361
  residuals[~np.isfinite(residuals)] = 0.0
1362
- best_fit_atlas = input_spectrum - residuals
1363
1362
  normalized_residuals = residuals / input_spectrum
1364
1363
 
1365
- best_fit_header = fit_result.wavelength_parameters.to_header(axis_num=1)
1366
- wcs = WCS(best_fit_header)
1367
- best_fit_wavelength = wcs.spectral.pixel_to_world(np.arange(input_spectrum.size))
1364
+ best_fit_atlas = fit_result.best_fit_atlas
1365
+ best_fit_wavelength = fit_result.best_fit_wavelength_vector
1368
1366
 
1369
1367
  finite_idx = (
1370
1368
  np.isfinite(input_wavelength)
@@ -1378,7 +1376,7 @@ class _WavecalQualityMixin:
1378
1376
  data = {
1379
1377
  "input_wavelength_nm": input_wavelength.to_value(u.nm)[finite_idx].tolist(),
1380
1378
  "input_spectrum": input_spectrum[finite_idx].tolist(),
1381
- "best_fit_wavelength_nm": best_fit_wavelength.to_value(u.nm)[finite_idx].tolist(),
1379
+ "best_fit_wavelength_nm": best_fit_wavelength[finite_idx].tolist(),
1382
1380
  "best_fit_atlas": best_fit_atlas[finite_idx].tolist(),
1383
1381
  "normalized_residuals": normalized_residuals[finite_idx].tolist(),
1384
1382
  "weights": None if weights is None else weight_data[finite_idx].tolist(),