dkist-processing-common 12.0.0rc5__py3-none-any.whl → 12.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. dkist_processing_common/codecs/fits.py +27 -6
  2. dkist_processing_common/models/constants.py +16 -10
  3. dkist_processing_common/models/extras.py +35 -0
  4. dkist_processing_common/models/flower_pot.py +230 -9
  5. dkist_processing_common/models/tags.py +13 -0
  6. dkist_processing_common/parsers/average_bud.py +0 -2
  7. dkist_processing_common/parsers/cs_step.py +10 -10
  8. dkist_processing_common/parsers/id_bud.py +8 -10
  9. dkist_processing_common/parsers/lookup_bud.py +7 -11
  10. dkist_processing_common/parsers/near_bud.py +7 -12
  11. dkist_processing_common/parsers/retarder.py +9 -13
  12. dkist_processing_common/parsers/time.py +19 -55
  13. dkist_processing_common/parsers/unique_bud.py +7 -14
  14. dkist_processing_common/tasks/l1_output_data.py +23 -14
  15. dkist_processing_common/tasks/output_data_base.py +25 -4
  16. dkist_processing_common/tasks/parse_l0_input_data.py +4 -2
  17. dkist_processing_common/tasks/transfer_input_data.py +1 -0
  18. dkist_processing_common/tasks/write_extra.py +333 -0
  19. dkist_processing_common/tasks/write_l1.py +2 -55
  20. dkist_processing_common/tasks/write_l1_base.py +67 -0
  21. dkist_processing_common/tests/test_codecs.py +57 -11
  22. dkist_processing_common/tests/test_construct_dataset_extras.py +224 -0
  23. dkist_processing_common/tests/test_flower_pot.py +147 -5
  24. dkist_processing_common/tests/test_output_data_base.py +24 -2
  25. dkist_processing_common/tests/test_parse_l0_input_data.py +28 -4
  26. dkist_processing_common/tests/test_stems.py +140 -193
  27. dkist_processing_common/tests/test_transfer_l1_output_data.py +1 -0
  28. dkist_processing_common/tests/test_trial_catalog.py +2 -0
  29. dkist_processing_common/tests/test_workflow_task_base.py +0 -11
  30. dkist_processing_common/tests/test_write_l1.py +0 -1
  31. {dkist_processing_common-12.0.0rc5.dist-info → dkist_processing_common-12.2.0.dist-info}/METADATA +4 -4
  32. {dkist_processing_common-12.0.0rc5.dist-info → dkist_processing_common-12.2.0.dist-info}/RECORD +34 -31
  33. {dkist_processing_common-12.0.0rc5.dist-info → dkist_processing_common-12.2.0.dist-info}/WHEEL +1 -1
  34. changelog/288.misc.rst +0 -1
  35. {dkist_processing_common-12.0.0rc5.dist-info → dkist_processing_common-12.2.0.dist-info}/top_level.txt +0 -0
@@ -4,15 +4,15 @@ from enum import StrEnum
4
4
  from statistics import mean
5
5
  from typing import Callable
6
6
 
7
+ from dkist_processing_common.models.flower_pot import ListStem
7
8
  from dkist_processing_common.models.flower_pot import SpilledDirt
8
- from dkist_processing_common.models.flower_pot import Stem
9
9
  from dkist_processing_common.parsers.l0_fits_access import L0FitsAccess
10
10
  from dkist_processing_common.parsers.task import passthrough_header_ip_task
11
11
 
12
12
 
13
- class NearFloatBud(Stem):
13
+ class NearFloatBud(ListStem):
14
14
  """
15
- Pre-made flower that reads a single header key from all files and raises a ValueError if the values are not within a given tolerance.
15
+ Pre-made `ListStem` that reads a single header key from all files and raises a ValueError if the values are not within a given tolerance.
16
16
 
17
17
  This is intended for use with floats where the values may be slightly different, but should be the same.
18
18
 
@@ -54,26 +54,21 @@ class NearFloatBud(Stem):
54
54
  """
55
55
  return getattr(fits_obj, self.metadata_key)
56
56
 
57
- def getter(self, key):
57
+ def getter(self):
58
58
  """
59
59
  Get the value for this key and raise an error if the data spans more than the given tolerance.
60
60
 
61
- Parameters
62
- ----------
63
- key
64
- The input key
65
61
  Returns
66
62
  -------
67
63
  The mean value associated with this input key
68
64
  """
69
- value_list = list(self.key_to_petal_dict.values())
70
- biggest_value = max(value_list)
71
- smallest_value = min(value_list)
65
+ biggest_value = max(self.value_list)
66
+ smallest_value = min(self.value_list)
72
67
  if biggest_value - smallest_value > self.tolerance:
73
68
  raise ValueError(
74
69
  f"{self.stem_name} values are not close enough. Max: {biggest_value}, Min: {smallest_value}, Tolerance: {self.tolerance}"
75
70
  )
76
- return mean(value_list)
71
+ return mean(self.value_list)
77
72
 
78
73
 
79
74
  class TaskNearFloatBud(NearFloatBud):
@@ -2,7 +2,9 @@
2
2
 
3
3
  from dkist_processing_common.models.constants import BudName
4
4
  from dkist_processing_common.models.fits_access import MetadataKey
5
+ from dkist_processing_common.models.flower_pot import SpilledDirt
5
6
  from dkist_processing_common.models.task_name import TaskName
7
+ from dkist_processing_common.parsers.l0_fits_access import L0FitsAccess
6
8
  from dkist_processing_common.parsers.unique_bud import TaskUniqueBud
7
9
 
8
10
 
@@ -11,13 +13,9 @@ class RetarderNameBud(TaskUniqueBud):
11
13
  Bud for determining the name of the retarder used during a polcal Calibration Sequence (CS).
12
14
 
13
15
  This is *slightly* different than a simple `TaskUniqueBud` because we need to allow for CS steps when the retarder
14
- is out of the beam (i.g., "clear"). We do this by forcing the set of header values to be `{clear, RETARDER_NAME}`,
15
- where RETARDER_NAME is the value of this Bud.
16
+ is out of the beam (i.g., "clear"). We do this by returning `SpilledDirt` from the `setter` if the value is "clear".
16
17
  """
17
18
 
18
- # For type-hinting later
19
- key_to_petal_dict: dict[str, str]
20
-
21
19
  def __init__(self):
22
20
  super().__init__(
23
21
  constant_name=BudName.retarder_name,
@@ -25,12 +23,10 @@ class RetarderNameBud(TaskUniqueBud):
25
23
  ip_task_types=TaskName.polcal,
26
24
  )
27
25
 
28
- def getter(self, key) -> str:
29
- """Get the value for the retarder name and raise an Error if, ignoring "clear", that name is not unique."""
30
- value_set = set(self.key_to_petal_dict.values())
31
- value_set -= {"clear"}
32
- if len(value_set) > 1:
33
- raise ValueError(f"Multiple non-clear retarder names found. Names: {value_set}")
26
+ def setter(self, fits_obj: L0FitsAccess) -> type[SpilledDirt] | str:
27
+ """Drop the result if the retarder is out of the beam ("clear")."""
28
+ result = super().setter(fits_obj)
29
+ if result is not SpilledDirt and result.casefold() == "clear":
30
+ return SpilledDirt
34
31
 
35
- raw_retarder_name = value_set.pop()
36
- return raw_retarder_name
32
+ return result
@@ -4,15 +4,15 @@ from datetime import datetime
4
4
  from datetime import timezone
5
5
  from enum import StrEnum
6
6
  from typing import Callable
7
- from typing import Hashable
8
7
  from typing import Type
9
8
 
10
9
  import numpy as np
11
10
 
12
11
  from dkist_processing_common.models.constants import BudName
13
12
  from dkist_processing_common.models.fits_access import MetadataKey
13
+ from dkist_processing_common.models.flower_pot import ListStem
14
+ from dkist_processing_common.models.flower_pot import SetStem
14
15
  from dkist_processing_common.models.flower_pot import SpilledDirt
15
- from dkist_processing_common.models.flower_pot import Stem
16
16
  from dkist_processing_common.models.tags import EXP_TIME_ROUND_DIGITS
17
17
  from dkist_processing_common.models.tags import StemName
18
18
  from dkist_processing_common.models.task_name import TaskName
@@ -35,7 +35,7 @@ class ObsIpStartTimeBud(TaskUniqueBud):
35
35
  )
36
36
 
37
37
 
38
- class TaskDatetimeBudBase(Stem):
38
+ class TaskDatetimeBudBase(ListStem):
39
39
  """
40
40
  Base class for making datetime-related buds.
41
41
 
@@ -59,8 +59,6 @@ class TaskDatetimeBudBase(Stem):
59
59
  The function used to convert a header into an IP task type
60
60
  """
61
61
 
62
- key_to_petal_dict: dict[str, float]
63
-
64
62
  def __init__(
65
63
  self,
66
64
  stem_name: str,
@@ -101,20 +99,15 @@ class TaskDatetimeBudBase(Stem):
101
99
 
102
100
  return SpilledDirt
103
101
 
104
- def getter(self, key: Hashable) -> tuple[float, ...]:
102
+ def getter(self) -> tuple[float, ...]:
105
103
  """
106
104
  Return a tuple of sorted times in unix seconds.
107
105
 
108
- Parameters
109
- ----------
110
- key
111
- The input key
112
-
113
106
  Returns
114
107
  -------
115
108
  A tuple that is sorted times in unix seconds
116
109
  """
117
- return tuple(sorted(list(self.key_to_petal_dict.values())))
110
+ return tuple(sorted(self.value_list))
118
111
 
119
112
 
120
113
  class CadenceBudBase(TaskDatetimeBudBase):
@@ -134,20 +127,15 @@ class AverageCadenceBud(CadenceBudBase):
134
127
  def __init__(self):
135
128
  super().__init__(constant_name=BudName.average_cadence)
136
129
 
137
- def getter(self, key) -> np.float64:
130
+ def getter(self) -> np.float64:
138
131
  """
139
132
  Return the mean cadence between frames.
140
133
 
141
- Parameters
142
- ----------
143
- key
144
- The input key
145
-
146
134
  Returns
147
135
  -------
148
136
  The mean value of the cadences of the input frames
149
137
  """
150
- return np.mean(np.diff(super().getter(key)))
138
+ return np.mean(np.diff(super().getter()))
151
139
 
152
140
 
153
141
  class MaximumCadenceBud(CadenceBudBase):
@@ -156,20 +144,15 @@ class MaximumCadenceBud(CadenceBudBase):
156
144
  def __init__(self):
157
145
  super().__init__(constant_name=BudName.maximum_cadence)
158
146
 
159
- def getter(self, key) -> np.float64:
147
+ def getter(self) -> np.float64:
160
148
  """
161
149
  Return the maximum cadence between frames.
162
150
 
163
- Parameters
164
- ----------
165
- key
166
- The input key
167
-
168
151
  Returns
169
152
  -------
170
153
  The maximum cadence between frames
171
154
  """
172
- return np.max(np.diff(super().getter(key)))
155
+ return np.max(np.diff(super().getter()))
173
156
 
174
157
 
175
158
  class MinimumCadenceBud(CadenceBudBase):
@@ -178,20 +161,15 @@ class MinimumCadenceBud(CadenceBudBase):
178
161
  def __init__(self):
179
162
  super().__init__(constant_name=BudName.minimum_cadence)
180
163
 
181
- def getter(self, key) -> np.float64:
164
+ def getter(self) -> np.float64:
182
165
  """
183
166
  Return the minimum cadence between frames.
184
167
 
185
- Parameters
186
- ----------
187
- key
188
- The input key
189
-
190
168
  Returns
191
169
  -------
192
170
  The minimum cadence between frames
193
171
  """
194
- return np.min(np.diff(super().getter(key)))
172
+ return np.min(np.diff(super().getter()))
195
173
 
196
174
 
197
175
  class VarianceCadenceBud(CadenceBudBase):
@@ -200,19 +178,15 @@ class VarianceCadenceBud(CadenceBudBase):
200
178
  def __init__(self):
201
179
  super().__init__(constant_name=BudName.variance_cadence)
202
180
 
203
- def getter(self, key) -> np.float64:
181
+ def getter(self) -> np.float64:
204
182
  """
205
183
  Return the cadence variance between frames.
206
184
 
207
- Parameters
208
- ----------
209
- key
210
- The input key
211
185
  Returns
212
186
  -------
213
187
  Return the variance of the cadences over the input frames
214
188
  """
215
- return np.var(np.diff(super().getter(key)))
189
+ return np.var(np.diff(super().getter()))
216
190
 
217
191
 
218
192
  class TaskDateBeginBud(TaskDatetimeBudBase):
@@ -231,19 +205,16 @@ class TaskDateBeginBud(TaskDatetimeBudBase):
231
205
  task_type_parsing_function=task_type_parsing_function,
232
206
  )
233
207
 
234
- def getter(self, key) -> str:
208
+ def getter(self) -> str:
235
209
  """
236
210
  Return the earliest date begin for the ip task type converted from unix seconds to datetime string.
237
211
 
238
- Parameters
239
- ----------
240
- key
241
- The input key
242
212
  Returns
243
213
  -------
244
214
  Return the minimum date begin as a datetime string
245
215
  """
246
- min_time = super().getter(key)[0]
216
+ # super().getter() returns a sorted list
217
+ min_time = super().getter()[0]
247
218
  min_time_dt = datetime.fromtimestamp(min_time, tz=timezone.utc)
248
219
  return min_time_dt.strftime("%Y-%m-%dT%H:%M:%S.%f")
249
220
 
@@ -286,7 +257,7 @@ class ReadoutExpTimeFlower(RoundTimeFlowerBase):
286
257
  )
287
258
 
288
259
 
289
- class TaskRoundTimeBudBase(Stem):
260
+ class TaskRoundTimeBudBase(SetStem):
290
261
  """
291
262
  Base class for making buds that need a set of rounded times for computing for specific task types.
292
263
 
@@ -310,8 +281,6 @@ class TaskRoundTimeBudBase(Stem):
310
281
  The function used to convert a header into an IP task type
311
282
  """
312
283
 
313
- key_to_petal_dict: dict[str, float]
314
-
315
284
  def __init__(
316
285
  self,
317
286
  stem_name: str,
@@ -349,20 +318,15 @@ class TaskRoundTimeBudBase(Stem):
349
318
 
350
319
  return SpilledDirt
351
320
 
352
- def getter(self, key: Hashable) -> tuple[float, ...]:
321
+ def getter(self) -> tuple[float, ...]:
353
322
  """
354
323
  Return a tuple of the sorted unique values found.
355
324
 
356
- Parameters
357
- ----------
358
- key
359
- The input key
360
-
361
325
  Returns
362
326
  -------
363
327
  A tuple that is the sorted set of unique times
364
328
  """
365
- return tuple(sorted(set(self.key_to_petal_dict.values())))
329
+ return tuple(sorted(self.value_set))
366
330
 
367
331
 
368
332
  class TaskExposureTimesBud(TaskRoundTimeBudBase):
@@ -3,15 +3,15 @@
3
3
  from enum import StrEnum
4
4
  from typing import Callable
5
5
 
6
+ from dkist_processing_common.models.flower_pot import SetStem
6
7
  from dkist_processing_common.models.flower_pot import SpilledDirt
7
- from dkist_processing_common.models.flower_pot import Stem
8
8
  from dkist_processing_common.parsers.l0_fits_access import L0FitsAccess
9
9
  from dkist_processing_common.parsers.task import passthrough_header_ip_task
10
10
 
11
11
 
12
- class UniqueBud(Stem):
12
+ class UniqueBud(SetStem):
13
13
  """
14
- Pre-made flower that reads a single header key from all files and raises a ValueError if it is not unique.
14
+ Pre-made `SetStem` that reads a single header key from all files and raises a ValueError if it is not unique.
15
15
 
16
16
  Parameters
17
17
  ----------
@@ -46,24 +46,17 @@ class UniqueBud(Stem):
46
46
  """
47
47
  return getattr(fits_obj, self.metadata_key)
48
48
 
49
- def getter(self, key):
49
+ def getter(self):
50
50
  """
51
51
  Get the value for this key and raise an error if it is not unique.
52
52
 
53
- Parameters
54
- ----------
55
- key
56
- The input key
57
53
  Returns
58
54
  -------
59
55
  The value associated with this input key
60
56
  """
61
- value_set = set(self.key_to_petal_dict.values())
62
- if len(value_set) > 1:
63
- raise ValueError(
64
- f"Multiple {self.stem_name} values found for key {key}. Values: {value_set}"
65
- )
66
- return value_set.pop()
57
+ if len(self.value_set) > 1:
58
+ raise ValueError(f"Multiple {self.stem_name} values found! Values: {self.value_set}")
59
+ return self.value_set.pop()
67
60
 
68
61
 
69
62
  class TaskUniqueBud(UniqueBud):
@@ -2,18 +2,14 @@
2
2
 
3
3
  import logging
4
4
  from abc import ABC
5
- from itertools import chain
6
5
  from pathlib import Path
7
6
  from typing import Iterable
8
7
 
9
- from dkist_processing_common.codecs.quality import quality_data_decoder
10
8
  from dkist_processing_common.codecs.quality import quality_data_encoder
11
9
  from dkist_processing_common.models.message import CatalogFrameMessage
12
10
  from dkist_processing_common.models.message import CatalogFrameMessageBody
13
11
  from dkist_processing_common.models.message import CatalogObjectMessage
14
12
  from dkist_processing_common.models.message import CatalogObjectMessageBody
15
- from dkist_processing_common.models.message import CreateQualityReportMessage
16
- from dkist_processing_common.models.message import CreateQualityReportMessageBody
17
13
  from dkist_processing_common.models.tags import Tag
18
14
  from dkist_processing_common.tasks.mixin.globus import GlobusMixin
19
15
  from dkist_processing_common.tasks.mixin.interservice_bus import InterserviceBusMixin
@@ -62,15 +58,19 @@ class TransferL1Data(TransferDataBase, GlobusMixin):
62
58
  with self.telemetry_span("Upload quality data"):
63
59
  self.transfer_quality_data()
64
60
 
65
- with self.telemetry_span("Upload science frames"):
61
+ with self.telemetry_span("Upload output frames"):
66
62
  self.transfer_output_frames()
67
63
 
68
64
  def transfer_output_frames(self):
69
- """Create a Globus transfer for all output data."""
70
- transfer_items = self.build_output_frame_transfer_list()
65
+ """Create a Globus transfer for all output data, as well as any available dataset extras."""
66
+ output_transfer_items = self.build_output_frame_transfer_list()
67
+ dataset_extra_transfer_items = self.build_dataset_extra_transfer_list()
68
+ transfer_items = output_transfer_items + dataset_extra_transfer_items
71
69
 
72
70
  logger.info(
73
71
  f"Preparing globus transfer {len(transfer_items)} items: "
72
+ f"{len(output_transfer_items)} output frames. "
73
+ f"{len(dataset_extra_transfer_items)} dataset extras. "
74
74
  f"recipe_run_id={self.recipe_run_id}. "
75
75
  f"transfer_items={transfer_items[:3]}..."
76
76
  )
@@ -189,7 +189,9 @@ class SubmitDatasetMetadata(L1OutputDataBase):
189
189
  class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
190
190
  """Task class for publishing Catalog and Quality Messages."""
191
191
 
192
- def frame_messages(self, paths: Iterable[Path]) -> list[CatalogFrameMessage]:
192
+ def frame_messages(
193
+ self, paths: Iterable[Path], folder_modifier: str | None = None
194
+ ) -> list[CatalogFrameMessage]:
193
195
  """
194
196
  Create the frame messages.
195
197
 
@@ -197,6 +199,8 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
197
199
  ----------
198
200
  paths
199
201
  The input paths for which to publish frame messages
202
+ folder_modifier
203
+ A subdirectory to use if the files in paths are not in the base directory
200
204
 
201
205
  Returns
202
206
  -------
@@ -204,7 +208,7 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
204
208
  """
205
209
  message_bodies = [
206
210
  CatalogFrameMessageBody(
207
- objectName=self.format_object_key(path=p),
211
+ objectName=self.format_object_key(path=p, folder_modifier=folder_modifier),
208
212
  conversationId=str(self.recipe_run_id),
209
213
  bucket=self.destination_bucket,
210
214
  )
@@ -233,7 +237,7 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
233
237
  message_bodies = [
234
238
  CatalogObjectMessageBody(
235
239
  objectType=object_type,
236
- objectName=self.format_object_key(p),
240
+ objectName=self.format_object_key(path=p),
237
241
  bucket=self.destination_bucket,
238
242
  conversationId=str(self.recipe_run_id),
239
243
  groupId=self.constants.dataset_id,
@@ -246,19 +250,24 @@ class PublishCatalogAndQualityMessages(L1OutputDataBase, InterserviceBusMixin):
246
250
  def run(self) -> None:
247
251
  """Run method for this task."""
248
252
  with self.telemetry_span("Gather output data"):
249
- frames = self.read(tags=self.output_frame_tags)
250
- movies = self.read(tags=[Tag.output(), Tag.movie()])
253
+ frames = self.read(
254
+ tags=self.output_frame_tags
255
+ ) # frames is kept as a generator as it is much longer than the other file categories
256
+ extras = list(self.read(tags=self.extra_frame_tags))
257
+ movies = list(self.read(tags=[Tag.output(), Tag.movie()]))
251
258
  quality_data = self.read(tags=[Tag.output(), Tag.quality_data()])
252
259
  with self.telemetry_span("Create message objects"):
253
260
  messages = []
254
261
  messages += self.frame_messages(paths=frames)
255
262
  frame_message_count = len(messages)
263
+ messages += self.frame_messages(paths=extras, folder_modifier="extra")
264
+ extra_message_count = len(extras)
256
265
  messages += self.object_messages(paths=movies, object_type="MOVIE")
257
- object_message_count = len(messages) - frame_message_count
266
+ object_message_count = len(movies)
258
267
  dataset_has_quality_data = self.dataset_has_quality_data
259
268
  if dataset_has_quality_data:
260
269
  messages += self.object_messages(paths=quality_data, object_type="QDATA")
261
270
  with self.telemetry_span(
262
- f"Publish messages: {frame_message_count = }, {object_message_count = }, {dataset_has_quality_data = }"
271
+ f"Publish messages: {frame_message_count = }, {extra_message_count = }, {object_message_count = }, {dataset_has_quality_data = }"
263
272
  ):
264
273
  self.interservice_bus_publish(messages=messages)
@@ -22,19 +22,23 @@ class OutputDataBase(WorkflowTaskBase, ABC):
22
22
  """Get the destination bucket."""
23
23
  return self.metadata_store_recipe_run.configuration.destination_bucket
24
24
 
25
- def format_object_key(self, path: Path) -> str:
25
+ def format_object_key(self, path: Path, folder_modifier: str | None = None) -> str:
26
26
  """
27
27
  Convert output paths into object store keys.
28
28
 
29
29
  Parameters
30
30
  ----------
31
31
  path: the Path to convert
32
+ folder_modifier: optional folder name to insert into the path
32
33
 
33
34
  Returns
34
35
  -------
35
36
  formatted path in the object store
36
37
  """
37
- object_key = self.destination_folder / Path(path.name)
38
+ if folder_modifier:
39
+ object_key = self.destination_folder / Path(folder_modifier) / Path(path.name)
40
+ else:
41
+ object_key = self.destination_folder / Path(path.name)
38
42
  return str(object_key)
39
43
 
40
44
  @property
@@ -52,6 +56,11 @@ class OutputDataBase(WorkflowTaskBase, ABC):
52
56
  """Tags that uniquely identify L1 fits frames i.e. the dataset-inventory-able frames."""
53
57
  return [Tag.output(), Tag.frame()]
54
58
 
59
+ @property
60
+ def extra_frame_tags(self) -> list[str]:
61
+ """Tags that uniquely identify dataset extra fits frames."""
62
+ return [Tag.output(), Tag.extra()]
63
+
55
64
 
56
65
  class TransferDataBase(OutputDataBase, ObjectStoreMixin, ABC):
57
66
  """Base class for transferring data from scratch to somewhere else."""
@@ -73,9 +82,21 @@ class TransferDataBase(OutputDataBase, ObjectStoreMixin, ABC):
73
82
  """Build a list of GlobusTransfer items corresponding to all OUTPUT (i.e., L1) frames."""
74
83
  science_frame_paths: list[Path] = list(self.read(tags=self.output_frame_tags))
75
84
 
85
+ return self.build_transfer_list(science_frame_paths)
86
+
87
+ def build_dataset_extra_transfer_list(self) -> list[GlobusTransferItem]:
88
+ """Build a list of GlobusTransfer items corresponding to all extra dataset files."""
89
+ extra_paths: list[Path] = list(self.read(tags=self.extra_frame_tags))
90
+
91
+ return self.build_transfer_list(paths=extra_paths, destination_folder_modifier="extra")
92
+
93
+ def build_transfer_list(
94
+ self, paths: list[Path], destination_folder_modifier: str | None = None
95
+ ) -> list[GlobusTransferItem]:
96
+ """Given a list of paths, build a list of GlobusTransfer items."""
76
97
  transfer_items = []
77
- for p in science_frame_paths:
78
- object_key = self.format_object_key(p)
98
+ for p in paths:
99
+ object_key = self.format_object_key(path=p, folder_modifier=destination_folder_modifier)
79
100
  destination_path = Path(self.destination_bucket, object_key)
80
101
  item = GlobusTransferItem(
81
102
  source_path=p,
@@ -397,6 +397,8 @@ class ParseDataBase(WorkflowTaskBase, ABC):
397
397
  return self.read(
398
398
  tags=self.tags_for_input_frames,
399
399
  decoder=fits_access_decoder,
400
+ checksum=False,
401
+ disable_image_compression=True,
400
402
  fits_access_class=self.fits_parsing_class,
401
403
  )
402
404
 
@@ -419,8 +421,8 @@ class ParseDataBase(WorkflowTaskBase, ABC):
419
421
  """
420
422
  for stem in constant_pot:
421
423
  with self.telemetry_span(f"Setting value of constant {stem.stem_name}"):
422
- if len(stem.petals) == 0:
423
- # There are no petals so nothing to do
424
+ if not stem.can_be_picked:
425
+ # Nothing to do
424
426
  continue
425
427
  if stem.bud.value is Thorn:
426
428
  # Must've been a picky bud that passed. We don't want to pick it because it has no value
@@ -78,6 +78,7 @@ class TransferL0Data(WorkflowTaskBase, GlobusMixin):
78
78
  self.tag(obj_path, tags=obj.tag)
79
79
  else:
80
80
  self.tag(obj_path, tags=[Tag.input(), Tag.frame()])
81
+ logger.info(f"Tagged {len(input_dataset_objects)} input dataset objects in scratch")
81
82
 
82
83
  def run(self) -> None:
83
84
  """Execute the data transfer."""