dkist-processing-common 10.5.4__py3-none-any.whl → 12.1.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. changelog/280.misc.rst +1 -0
  2. changelog/282.feature.2.rst +2 -0
  3. changelog/282.feature.rst +2 -0
  4. changelog/284.feature.rst +1 -0
  5. changelog/285.feature.rst +2 -0
  6. changelog/285.misc.rst +2 -0
  7. changelog/286.feature.rst +2 -0
  8. changelog/287.misc.rst +1 -0
  9. dkist_processing_common/__init__.py +1 -0
  10. dkist_processing_common/_util/constants.py +1 -0
  11. dkist_processing_common/_util/graphql.py +1 -0
  12. dkist_processing_common/_util/scratch.py +9 -9
  13. dkist_processing_common/_util/tags.py +1 -0
  14. dkist_processing_common/codecs/array.py +20 -0
  15. dkist_processing_common/codecs/asdf.py +9 -3
  16. dkist_processing_common/codecs/basemodel.py +22 -0
  17. dkist_processing_common/codecs/bytes.py +1 -0
  18. dkist_processing_common/codecs/fits.py +37 -9
  19. dkist_processing_common/codecs/iobase.py +1 -0
  20. dkist_processing_common/codecs/json.py +1 -0
  21. dkist_processing_common/codecs/path.py +1 -0
  22. dkist_processing_common/codecs/quality.py +1 -1
  23. dkist_processing_common/codecs/str.py +1 -0
  24. dkist_processing_common/config.py +64 -25
  25. dkist_processing_common/manual.py +6 -8
  26. dkist_processing_common/models/constants.py +373 -37
  27. dkist_processing_common/models/dkist_location.py +27 -0
  28. dkist_processing_common/models/fits_access.py +48 -0
  29. dkist_processing_common/models/flower_pot.py +231 -9
  30. dkist_processing_common/models/fried_parameter.py +41 -0
  31. dkist_processing_common/models/graphql.py +66 -75
  32. dkist_processing_common/models/input_dataset.py +117 -0
  33. dkist_processing_common/models/message.py +1 -1
  34. dkist_processing_common/models/message_queue_binding.py +1 -1
  35. dkist_processing_common/models/metric_code.py +2 -0
  36. dkist_processing_common/models/parameters.py +65 -28
  37. dkist_processing_common/models/quality.py +50 -5
  38. dkist_processing_common/models/tags.py +23 -21
  39. dkist_processing_common/models/task_name.py +3 -2
  40. dkist_processing_common/models/telemetry.py +28 -0
  41. dkist_processing_common/models/wavelength.py +3 -1
  42. dkist_processing_common/parsers/average_bud.py +46 -0
  43. dkist_processing_common/parsers/cs_step.py +13 -12
  44. dkist_processing_common/parsers/dsps_repeat.py +6 -4
  45. dkist_processing_common/parsers/experiment_id_bud.py +12 -4
  46. dkist_processing_common/parsers/id_bud.py +42 -27
  47. dkist_processing_common/parsers/l0_fits_access.py +5 -3
  48. dkist_processing_common/parsers/l1_fits_access.py +51 -23
  49. dkist_processing_common/parsers/lookup_bud.py +125 -0
  50. dkist_processing_common/parsers/near_bud.py +21 -20
  51. dkist_processing_common/parsers/observing_program_id_bud.py +24 -0
  52. dkist_processing_common/parsers/proposal_id_bud.py +13 -5
  53. dkist_processing_common/parsers/quality.py +2 -0
  54. dkist_processing_common/parsers/retarder.py +32 -0
  55. dkist_processing_common/parsers/single_value_single_key_flower.py +6 -1
  56. dkist_processing_common/parsers/task.py +8 -6
  57. dkist_processing_common/parsers/time.py +178 -72
  58. dkist_processing_common/parsers/unique_bud.py +21 -22
  59. dkist_processing_common/parsers/wavelength.py +5 -3
  60. dkist_processing_common/tasks/__init__.py +3 -2
  61. dkist_processing_common/tasks/assemble_movie.py +4 -3
  62. dkist_processing_common/tasks/base.py +59 -60
  63. dkist_processing_common/tasks/l1_output_data.py +54 -53
  64. dkist_processing_common/tasks/mixin/globus.py +24 -27
  65. dkist_processing_common/tasks/mixin/interservice_bus.py +1 -0
  66. dkist_processing_common/tasks/mixin/metadata_store.py +108 -243
  67. dkist_processing_common/tasks/mixin/object_store.py +22 -0
  68. dkist_processing_common/tasks/mixin/quality/__init__.py +1 -0
  69. dkist_processing_common/tasks/mixin/quality/_base.py +8 -1
  70. dkist_processing_common/tasks/mixin/quality/_metrics.py +166 -14
  71. dkist_processing_common/tasks/output_data_base.py +4 -3
  72. dkist_processing_common/tasks/parse_l0_input_data.py +277 -15
  73. dkist_processing_common/tasks/quality_metrics.py +9 -9
  74. dkist_processing_common/tasks/teardown.py +7 -7
  75. dkist_processing_common/tasks/transfer_input_data.py +67 -69
  76. dkist_processing_common/tasks/trial_catalog.py +77 -17
  77. dkist_processing_common/tasks/trial_output_data.py +16 -17
  78. dkist_processing_common/tasks/write_l1.py +102 -72
  79. dkist_processing_common/tests/conftest.py +32 -173
  80. dkist_processing_common/tests/mock_metadata_store.py +271 -0
  81. dkist_processing_common/tests/test_assemble_movie.py +4 -4
  82. dkist_processing_common/tests/test_assemble_quality.py +32 -4
  83. dkist_processing_common/tests/test_base.py +5 -19
  84. dkist_processing_common/tests/test_codecs.py +103 -12
  85. dkist_processing_common/tests/test_constants.py +15 -0
  86. dkist_processing_common/tests/test_dkist_location.py +15 -0
  87. dkist_processing_common/tests/test_fits_access.py +56 -19
  88. dkist_processing_common/tests/test_flower_pot.py +147 -5
  89. dkist_processing_common/tests/test_fried_parameter.py +27 -0
  90. dkist_processing_common/tests/test_input_dataset.py +78 -361
  91. dkist_processing_common/tests/test_interservice_bus.py +1 -0
  92. dkist_processing_common/tests/test_interservice_bus_mixin.py +1 -1
  93. dkist_processing_common/tests/test_manual_processing.py +33 -0
  94. dkist_processing_common/tests/test_output_data_base.py +5 -7
  95. dkist_processing_common/tests/test_parameters.py +71 -22
  96. dkist_processing_common/tests/test_parse_l0_input_data.py +115 -32
  97. dkist_processing_common/tests/test_publish_catalog_messages.py +2 -24
  98. dkist_processing_common/tests/test_quality.py +1 -0
  99. dkist_processing_common/tests/test_quality_mixin.py +255 -23
  100. dkist_processing_common/tests/test_scratch.py +2 -1
  101. dkist_processing_common/tests/test_stems.py +511 -168
  102. dkist_processing_common/tests/test_submit_dataset_metadata.py +3 -7
  103. dkist_processing_common/tests/test_tags.py +1 -0
  104. dkist_processing_common/tests/test_task_name.py +1 -1
  105. dkist_processing_common/tests/test_task_parsing.py +17 -7
  106. dkist_processing_common/tests/test_teardown.py +28 -24
  107. dkist_processing_common/tests/test_transfer_input_data.py +270 -125
  108. dkist_processing_common/tests/test_transfer_l1_output_data.py +2 -3
  109. dkist_processing_common/tests/test_trial_catalog.py +83 -8
  110. dkist_processing_common/tests/test_trial_output_data.py +46 -73
  111. dkist_processing_common/tests/test_workflow_task_base.py +8 -10
  112. dkist_processing_common/tests/test_write_l1.py +298 -76
  113. dkist_processing_common-12.1.0rc1.dist-info/METADATA +265 -0
  114. dkist_processing_common-12.1.0rc1.dist-info/RECORD +134 -0
  115. {dkist_processing_common-10.5.4.dist-info → dkist_processing_common-12.1.0rc1.dist-info}/WHEEL +1 -1
  116. docs/conf.py +1 -0
  117. docs/index.rst +1 -1
  118. docs/landing_page.rst +13 -0
  119. dkist_processing_common/tasks/mixin/input_dataset.py +0 -166
  120. dkist_processing_common-10.5.4.dist-info/METADATA +0 -175
  121. dkist_processing_common-10.5.4.dist-info/RECORD +0 -112
  122. {dkist_processing_common-10.5.4.dist-info → dkist_processing_common-12.1.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,16 @@
1
1
  """
2
2
  Framework for grouping multiple keys and values with arbitrary logic.
3
3
 
4
- Defines:
5
- Stem -> ABC for groupings that depend on both the key and (maybe) value. Subgroups (Petals) are implied but not enforced.
4
+ The key components are:
5
+ **Stem:** ABC for groupings that depend on both the key and (maybe) value. Subgroups (Petals) are implied but not enforced.
6
6
 
7
- FlowerPot -> Container for Stem children (Flowers)
7
+ **ListStem:** ABC for groups that depend on value only. More limited, but faster than `Stem` for cases where the keys don't matter.
8
+
9
+ **SetStem:** ABC for groups that depend on value only and the values are well represented by a `set`. Even more limited, but faster than `Stem` for cases where the keys don't matter.
10
+
11
+ **FlowerPot:** Container for Stem children (Flowers)
8
12
  """
13
+
9
14
  from __future__ import annotations
10
15
 
11
16
  from abc import ABC
@@ -16,7 +21,7 @@ from typing import Any
16
21
 
17
22
 
18
23
  class FlowerPot:
19
- """Base class to hold multiple sets (stems) of key, value pairs."""
24
+ """Base class to hold multiple sets (Stems) of key, value pairs."""
20
25
 
21
26
  def __init__(self):
22
27
  self.stems: list[Stem] = list()
@@ -54,9 +59,9 @@ class FlowerPot:
54
59
 
55
60
  class SpilledDirt:
56
61
  """
57
- A custom class for when a Flower wants the FlowerPot to skip that particular key/value.
62
+ A custom class for when a Stem wants the FlowerPot to skip that particular key/value.
58
63
 
59
- Exists because None, False, [], (), etc. etc. are all valid Flower return values
64
+ Exists because None, False, [], (), etc. etc. are all valid Stem return values
60
65
  """
61
66
 
62
67
 
@@ -89,7 +94,7 @@ class Petal:
89
94
 
90
95
  class Stem(ABC):
91
96
  """
92
- Base group for grouping keys via arbitrary logic on the total collection of keys and values.
97
+ Base class for grouping keys via arbitrary logic on the total collection of keys and values.
93
98
 
94
99
  Parameters
95
100
  ----------
@@ -132,6 +137,15 @@ class Stem(ABC):
132
137
 
133
138
  return self._petal_cache
134
139
 
140
+ @property
141
+ def can_be_picked(self) -> bool:
142
+ """
143
+ Return True if there are any values to be picked.
144
+
145
+ A `Stem` could have no values even after dirt is added if all of the results were `SpilledDirt`.
146
+ """
147
+ return len(self.petals) > 0
148
+
135
149
  def _generate_petal_list(self) -> None:
136
150
  """
137
151
  Generate a list of petals.
@@ -145,7 +159,7 @@ class Stem(ABC):
145
159
  changes, `key_to_petal_dict`, is unhashable.
146
160
  """
147
161
  petal_to_key_dict = defaultdict(list)
148
- for key, petal in self.key_to_petal_dict.items():
162
+ for key in self.key_to_petal_dict.keys():
149
163
  petal = self.getter(key)
150
164
  petal_to_key_dict[petal].append(key)
151
165
 
@@ -179,7 +193,7 @@ class Stem(ABC):
179
193
  @abstractmethod
180
194
  def getter(self, key: Hashable) -> Hashable:
181
195
  """
182
- Logic to apply to all ingested values when picking the Flower.
196
+ Logic to apply to all ingested values when picking the Stem.
183
197
 
184
198
  Implemented in derived class.
185
199
 
@@ -192,3 +206,211 @@ class Stem(ABC):
192
206
  The value
193
207
  """
194
208
  pass
209
+
210
+
211
+ class ListStem(ABC):
212
+ """
213
+ Base class for collecting and applying logic to values in a `list` with a `Stem`-like interface.
214
+
215
+ Unlike the full `Stem`, this class does NOT retain information about the keys and thus does no grouping of keys based
216
+ on values. The direct consequence of this is that the `.petals` property is undefined and will raise an ``AttributeError``
217
+ if accessed. This also means there is no need to invert the `key_to_petal_dict` (because it doesn't exist), which,
218
+ in turn, means there is no need to run the `getter` for every key. The result is that the `bud` property only needs
219
+ one call to `getter`. Thus, the calculation of a single value derived from all values (i.e., `bud`) is much faster
220
+ than using a full `Stem`.
221
+
222
+ Parameters
223
+ ----------
224
+ stem_name
225
+ The name to be associated with the stem
226
+ """
227
+
228
+ def __init__(self, stem_name: Any):
229
+ self.stem_name = stem_name
230
+ self.value_list: list = list()
231
+ self._need_to_compute_bud_value: bool = True
232
+
233
+ def update(self, key: Any, value: Any) -> None:
234
+ """
235
+ Ingest a single key/value pair. Note that the ``key`` is not used.
236
+
237
+ Parameters
238
+ ----------
239
+ key
240
+ The key (unused)
241
+
242
+ value
243
+ The value
244
+
245
+ Returns
246
+ -------
247
+ None
248
+ """
249
+ result = self.setter(value)
250
+ if result is not SpilledDirt:
251
+ self.value_list.append(result)
252
+ self._need_to_compute_bud_value = True
253
+
254
+ @property
255
+ def petals(self) -> None:
256
+ """Raise an error because `ListStem` does not retain key information and therefore cannot group keys."""
257
+ raise AttributeError(
258
+ f"{self.__class__.__name__} subclasses ListStem and therefore does not define the `petals` property"
259
+ )
260
+
261
+ @property
262
+ def can_be_picked(self) -> bool:
263
+ """
264
+ Return True if there are any values to be picked.
265
+
266
+ A `Stem` could have no values even after dirt is added if all of the results were `SpilledDirt`.
267
+ """
268
+ return len(self.value_list) > 0
269
+
270
+ @property
271
+ def bud(self) -> Petal:
272
+ """Return the result of `getter` packaged in a `Petal` object."""
273
+ if self._need_to_compute_bud_value:
274
+ self._value_cache = self.getter()
275
+ self._need_to_compute_bud_value = False
276
+
277
+ return Petal((self._value_cache, "LISTSTEM_NOT_USED"))
278
+
279
+ @abstractmethod
280
+ def setter(self, value: Any) -> Any:
281
+ """
282
+ Logic to apply to a single value pair on ingest.
283
+
284
+ Implemented in derived class.
285
+
286
+ Parameters
287
+ ----------
288
+ value
289
+ The value to be added
290
+
291
+ Returns
292
+ -------
293
+ Any
294
+ """
295
+ pass
296
+
297
+ @abstractmethod
298
+ def getter(self) -> Any:
299
+ """
300
+ Logic to apply to all ingested values when computing the `bud`.
301
+
302
+ Implemented in derived class.
303
+
304
+ Returns
305
+ -------
306
+ The value of the bud
307
+ """
308
+ pass
309
+
310
+
311
+ class SetStem(ABC):
312
+ """
313
+ Base class for collecting and applying logic to values in a `set` with a `Stem`-like interface.
314
+
315
+ Unlike the full `Stem`, this class does NOT retain information about the keys and thus does no grouping of keys based
316
+ on values. The direct consequence of this is that the `.petals` property is undefined and will raise an ``AttributeError``
317
+ if accessed. This also means there is no need to invert the `key_to_petal_dict` (because it doesn't exist), which,
318
+ in turn, means there is no need to run the `getter` for every key. The result is that the `bud` property only needs
319
+ one call to `getter`. Combined with the efficiency of storing values in a `set`, the calculation of a single value
320
+ derived from all values (i.e., `bud`) is much faster than using a full `Stem`.
321
+
322
+ .. Note::
323
+ The use of a `set` as the underlying storage mechanism means information regarding how many times a particular value
324
+ is present will be lost. It also means the return type of `setter` must be hashable. Both of these constraints can
325
+ be avoided by using `ListStem`, which still gets a significant speedup over `Stem` by dropping key information.
326
+
327
+
328
+ Parameters
329
+ ----------
330
+ stem_name
331
+ The name to be associated with the stem
332
+ """
333
+
334
+ def __init__(self, stem_name: Any):
335
+ self.stem_name = stem_name
336
+ self.value_set: set = set()
337
+ self._need_to_compute_bud_value: bool = True
338
+
339
+ def update(self, key: Any, value: Any) -> None:
340
+ """
341
+ Ingest a single key/value pair. Note that the ``key`` is not used.
342
+
343
+ Parameters
344
+ ----------
345
+ key
346
+ The key (unused)
347
+
348
+ value
349
+ The value
350
+
351
+ Returns
352
+ -------
353
+ None
354
+ """
355
+ result = self.setter(value)
356
+ if result is not SpilledDirt:
357
+ self.value_set.add(result)
358
+ self._need_to_compute_bud_value = True
359
+
360
+ @property
361
+ def petals(self) -> None:
362
+ """Raise an error because `SetStem` does not retain key information and therefore cannot group keys."""
363
+ raise AttributeError(
364
+ f"{self.__class__.__name__} subclasses SetStem and therefore does not define the `petals` property"
365
+ )
366
+
367
+ @property
368
+ def can_be_picked(self) -> bool:
369
+ """
370
+ Return True if there are any values to be picked.
371
+
372
+ A `Stem` could have no values even after dirt is added if all of the results were `SpilledDirt`.
373
+ """
374
+ return len(self.value_set) > 0
375
+
376
+ @property
377
+ def bud(self) -> Petal:
378
+ """Return the result of `getter` packaged in a `Petal` object."""
379
+ if self._need_to_compute_bud_value:
380
+ self._value_cache = self.getter()
381
+ self._need_to_compute_bud_value = False
382
+
383
+ return Petal((self._value_cache, "SETSTEM_NOT_USED"))
384
+
385
+ @abstractmethod
386
+ def setter(self, value: Any) -> Hashable:
387
+ """
388
+ Logic to apply to a single value pair on ingest.
389
+
390
+ Must return a Hashable object because the result will be stored in a `set`.
391
+
392
+ Implemented in derived class.
393
+
394
+ Parameters
395
+ ----------
396
+ value
397
+ The value to be added
398
+
399
+ Returns
400
+ -------
401
+ Any
402
+ """
403
+ pass
404
+
405
+ @abstractmethod
406
+ def getter(self) -> Any:
407
+ """
408
+ Logic to apply to all ingested values when computing the `bud`.
409
+
410
+ Implemented in derived class.
411
+
412
+ Returns
413
+ -------
414
+ The value of the bud
415
+ """
416
+ pass
@@ -0,0 +1,41 @@
1
+ """Helper methods to handle fried parameter / r0 validity."""
2
+
3
+
4
+ def r0_valid(
5
+ r0: float | None = None,
6
+ ao_lock: bool | None = None,
7
+ num_out_of_bounds_ao_values: int | None = None,
8
+ ) -> bool:
9
+ """
10
+ Determine if the r0 value should be considered valid based on the following conditions.
11
+
12
+ * ATMOS_R0 does not exist in the header.
13
+ * the value of ATMOS_R0 is greater than 0.3m
14
+ * the AO is not locked
15
+ * the value of OOBSHIFT is greater than 100
16
+
17
+ When the adaptive optics system is not locked, the ATMOS_R0 keyword is still filled with the output of the
18
+ Fried parameter calculation. The inputs are not valid in this instance and the value should be removed.
19
+
20
+ Sometimes, due to timing differences between the calculation of the Fried parameter and the AO lock status being
21
+ updated, non-physical values can be recorded for ATMOS_R0 right on the edge of an AO_LOCK state change. To
22
+ combat this, any remaining R0 values greater than 30cm (which is beyond the realm of physical possibility for
23
+ solar observations) are also removed.
24
+
25
+ In addition, the number of AO out-of-bound values is given in the keyword OOBSHIFT and the AO team advises
26
+ that values under 100 are when the r0 value is considered reliable. If the OOBSHIFT key doesn't exist, this check
27
+ should be ignored.
28
+ """
29
+ if r0 is None:
30
+ return False
31
+
32
+ if r0 > 0.3:
33
+ return False
34
+
35
+ if ao_lock is not True:
36
+ return False
37
+
38
+ if num_out_of_bounds_ao_values is not None and num_out_of_bounds_ao_values > 100:
39
+ return False
40
+
41
+ return True
@@ -1,24 +1,35 @@
1
1
  """GraphQL Data models for the metadata store api."""
2
- from dataclasses import dataclass
3
2
 
3
+ from pydantic import BaseModel
4
+ from pydantic import ConfigDict
5
+ from pydantic import Json
6
+ from pydantic import field_serializer
7
+ from pydantic import field_validator
4
8
 
5
- @dataclass
6
- class RecipeRunMutation:
9
+ from dkist_processing_common.models.input_dataset import InputDatasetBaseModel
10
+ from dkist_processing_common.models.input_dataset import InputDatasetPartDocumentList
11
+
12
+
13
+ class GraphqlBaseModel(BaseModel):
14
+ """Custom BaseModel for input datasets."""
15
+
16
+ model_config = ConfigDict(validate_assignment=True)
17
+
18
+
19
+ class RecipeRunMutation(GraphqlBaseModel):
7
20
  """Recipe run mutation record."""
8
21
 
9
22
  recipeRunId: int
10
23
  recipeRunStatusId: int
11
24
 
12
25
 
13
- @dataclass
14
- class RecipeRunStatusQuery:
26
+ class RecipeRunStatusQuery(GraphqlBaseModel):
15
27
  """Recipe run status query for the recipeRunStatuses endpoint."""
16
28
 
17
29
  recipeRunStatusName: str
18
30
 
19
31
 
20
- @dataclass
21
- class RecipeRunStatusMutation:
32
+ class RecipeRunStatusMutation(GraphqlBaseModel):
22
33
  """Recipe run status mutation record."""
23
34
 
24
35
  recipeRunStatusName: str
@@ -26,38 +37,39 @@ class RecipeRunStatusMutation:
26
37
  recipeRunStatusDescription: str
27
38
 
28
39
 
29
- @dataclass
30
- class RecipeRunStatusResponse:
40
+ class RecipeRunStatusResponse(GraphqlBaseModel):
31
41
  """Response to a recipe run status query."""
32
42
 
33
43
  recipeRunStatusId: int
34
44
 
35
45
 
36
- @dataclass
37
- class InputDatasetPartTypeResponse:
46
+ class InputDatasetPartTypeResponse(GraphqlBaseModel):
38
47
  """Response class for the input dataset part type entity."""
39
48
 
40
49
  inputDatasetPartTypeName: str
41
50
 
42
51
 
43
- @dataclass
44
- class InputDatasetPartResponse:
52
+ class InputDatasetPartResponse(InputDatasetBaseModel):
45
53
  """Response class for the input dataset part entity."""
46
54
 
47
55
  inputDatasetPartId: int
48
- inputDatasetPartDocument: str
56
+ # inputDatasetPartDocument : Json[InputDatasetPartDocumentList] # will work in gqlclient v2
57
+ inputDatasetPartDocument: Json[list]
49
58
  inputDatasetPartType: InputDatasetPartTypeResponse
50
59
 
60
+ @field_validator("inputDatasetPartDocument", mode="after")
61
+ @classmethod
62
+ def _use_frame_or_parameter_model(cls, value_list): # not needed for gqlclient v2
63
+ return InputDatasetPartDocumentList(doc_list=value_list)
51
64
 
52
- @dataclass
53
- class InputDatasetInputDatasetPartResponse:
65
+
66
+ class InputDatasetInputDatasetPartResponse(GraphqlBaseModel):
54
67
  """Response class for the join entity between input datasets and input dataset parts."""
55
68
 
56
69
  inputDatasetPart: InputDatasetPartResponse
57
70
 
58
71
 
59
- @dataclass
60
- class InputDatasetResponse:
72
+ class InputDatasetResponse(GraphqlBaseModel):
61
73
  """Input dataset query response."""
62
74
 
63
75
  inputDatasetId: int
@@ -65,62 +77,78 @@ class InputDatasetResponse:
65
77
  inputDatasetInputDatasetParts: list[InputDatasetInputDatasetPartResponse]
66
78
 
67
79
 
68
- @dataclass
69
- class InputDatasetRecipeInstanceResponse:
80
+ class InputDatasetRecipeInstanceResponse(GraphqlBaseModel):
70
81
  """Recipe instance query response."""
71
82
 
72
83
  inputDataset: InputDatasetResponse
73
84
 
74
85
 
75
- @dataclass
76
- class InputDatasetRecipeRunResponse:
86
+ class InputDatasetRecipeRunResponse(GraphqlBaseModel):
77
87
  """Recipe run query response."""
78
88
 
79
89
  recipeInstance: InputDatasetRecipeInstanceResponse
80
90
 
81
91
 
82
- @dataclass
83
- class RecipeInstanceResponse:
92
+ class RecipeInstanceResponse(GraphqlBaseModel):
84
93
  """Recipe instance query response."""
85
94
 
86
95
  recipeId: int
87
96
  inputDatasetId: int
88
97
 
89
98
 
90
- @dataclass
91
- class RecipeRunProvenanceResponse:
99
+ class RecipeRunProvenanceResponse(GraphqlBaseModel):
92
100
  """Response for the metadata store recipeRunProvenances and mutations endpoints."""
93
101
 
94
102
  recipeRunProvenanceId: int
95
103
  isTaskManual: bool
96
104
 
97
105
 
98
- @dataclass
99
- class RecipeRunResponse:
106
+ class RecipeRunConfiguration(GraphqlBaseModel):
107
+ """Response class for a recipe run configuration dictionary."""
108
+
109
+ validate_l1_on_write: bool = True
110
+ destination_bucket: str = "data"
111
+ tile_size: int | None = None
112
+ trial_directory_name: str | None = None
113
+ trial_root_directory_name: str | None = None
114
+ teardown_enabled: bool = True
115
+ trial_exclusive_transfer_tag_lists: list[list[str]] | None = None
116
+
117
+
118
+ class RecipeRunResponse(GraphqlBaseModel):
100
119
  """Recipe run query response."""
101
120
 
102
121
  recipeInstance: RecipeInstanceResponse
103
122
  recipeInstanceId: int
104
123
  recipeRunProvenances: list[RecipeRunProvenanceResponse]
105
- configuration: str = None
124
+ # configuration: Json[RecipeRunConfiguration] | None # will work in gqlclient v2
125
+ configuration: Json[dict] | None
126
+
127
+ @field_validator("configuration", mode="after")
128
+ @classmethod
129
+ def _use_recipe_run_configuration_model(cls, value): # not needed for gqlclient v2
130
+ if value is None:
131
+ return RecipeRunConfiguration()
132
+ return RecipeRunConfiguration.model_validate(value)
106
133
 
134
+ @field_serializer("configuration")
135
+ def _serialize_as_basemodel(self, config: RecipeRunConfiguration):
136
+ return config.model_dump()
107
137
 
108
- @dataclass
109
- class RecipeRunMutationResponse:
138
+
139
+ class RecipeRunMutationResponse(GraphqlBaseModel):
110
140
  """Recipe run mutation response."""
111
141
 
112
142
  recipeRunId: int
113
143
 
114
144
 
115
- @dataclass
116
- class RecipeRunQuery:
145
+ class RecipeRunQuery(GraphqlBaseModel):
117
146
  """Query parameters for the metadata store endpoint recipeRuns."""
118
147
 
119
148
  recipeRunId: int
120
149
 
121
150
 
122
- @dataclass
123
- class DatasetCatalogReceiptAccountMutation:
151
+ class DatasetCatalogReceiptAccountMutation(GraphqlBaseModel):
124
152
  """
125
153
  Dataset catalog receipt account mutation record.
126
154
 
@@ -132,15 +160,13 @@ class DatasetCatalogReceiptAccountMutation:
132
160
  expectedObjectCount: int
133
161
 
134
162
 
135
- @dataclass
136
- class DatasetCatalogReceiptAccountResponse:
163
+ class DatasetCatalogReceiptAccountResponse(GraphqlBaseModel):
137
164
  """Dataset catalog receipt account response for query and mutation endpoints."""
138
165
 
139
166
  datasetCatalogReceiptAccountId: int
140
167
 
141
168
 
142
- @dataclass
143
- class RecipeRunProvenanceMutation:
169
+ class RecipeRunProvenanceMutation(GraphqlBaseModel):
144
170
  """Recipe run provenance mutation record."""
145
171
 
146
172
  inputDatasetId: int
@@ -149,39 +175,4 @@ class RecipeRunProvenanceMutation:
149
175
  taskName: str
150
176
  libraryVersions: str
151
177
  workflowVersion: str
152
- codeVersion: str = None
153
-
154
-
155
- @dataclass
156
- class QualityCreation:
157
- """Quality data creation record."""
158
-
159
- datasetId: str
160
- metricCode: str
161
- facet: str | None = None
162
- name: str | None = None
163
- description: str | None = None
164
- statement: str | None = None
165
- # JSON array
166
- warnings: str | None = None
167
- # JSON objects
168
- plotData: str | None = None
169
- tableData: str | None = None
170
- histogramData: str | None = None
171
- modmatData: str | None = None
172
- raincloudData: str | None = None
173
- efficiencyData: str | None = None
174
-
175
-
176
- @dataclass
177
- class QualitiesRequest:
178
- """Query parameters for quality data."""
179
-
180
- datasetId: str
181
-
182
-
183
- @dataclass
184
- class QualityResponse:
185
- """Query Response for quality data."""
186
-
187
- qualityId: int
178
+ codeVersion: str | None = None
@@ -0,0 +1,117 @@
1
+ """Input dataset models for the inputDatasetPartDocument from the metadata store api."""
2
+
3
+ import json
4
+ from datetime import datetime
5
+ from typing import Any
6
+
7
+ from pydantic import BaseModel
8
+ from pydantic import ConfigDict
9
+ from pydantic import Field
10
+ from pydantic import Json
11
+ from pydantic import PlainSerializer
12
+ from pydantic import field_serializer
13
+ from pydantic import field_validator
14
+ from pydantic.alias_generators import to_camel
15
+ from typing_extensions import Annotated
16
+
17
+
18
+ class InputDatasetBaseModel(BaseModel):
19
+ """Custom BaseModel for input datasets."""
20
+
21
+ model_config = ConfigDict(
22
+ alias_generator=to_camel,
23
+ validate_by_name=True,
24
+ validate_by_alias=True,
25
+ validate_assignment=True,
26
+ )
27
+
28
+ def model_dump(self, **kwargs) -> dict:
29
+ """Dump models as they were in the metadata store."""
30
+ kwargs.setdefault("exclude_defaults", True)
31
+ kwargs.setdefault("by_alias", True) # will not be needed in Pydantic v3
32
+ return super().model_dump(**kwargs)
33
+
34
+ def model_dump_json(self, **kwargs) -> str:
35
+ """Dump models as they were in the metadata store."""
36
+ kwargs.setdefault("exclude_defaults", True)
37
+ kwargs.setdefault("by_alias", True) # will not be needed in Pydantic v3
38
+ return super().model_dump_json(**kwargs)
39
+
40
+
41
+ class InputDatasetObject(InputDatasetBaseModel):
42
+ """Input dataset object validator for a single file."""
43
+
44
+ bucket: str
45
+ object_key: str
46
+ tag: str | None = None
47
+
48
+
49
+ class InputDatasetFilePointer(InputDatasetBaseModel):
50
+ """Wrapper for InputDatasetObject files."""
51
+
52
+ file_pointer: InputDatasetObject = Field(alias="__file__")
53
+
54
+
55
+ class InputDatasetParameterValue(InputDatasetBaseModel):
56
+ """Input dataset parameter value validator."""
57
+
58
+ parameter_value_id: int
59
+ # parameter_value: Json[InputDatasetFilePointer] | Json[Any] # will work in gqlclient v2
60
+ parameter_value: Json[Any]
61
+ parameter_value_start_date: Annotated[
62
+ datetime, Field(default=datetime(1, 1, 1)), PlainSerializer(lambda x: x.isoformat())
63
+ ]
64
+
65
+ @field_validator("parameter_value", mode="after")
66
+ @classmethod
67
+ def validate_parameter_value(cls, param_val):
68
+ """Decode and provide additional validation for parameter_value types."""
69
+ match param_val:
70
+ case {"__file__": _}:
71
+ return InputDatasetFilePointer.model_validate(param_val)
72
+ case _:
73
+ return param_val
74
+
75
+ @field_serializer("parameter_value")
76
+ def serialize_parameter_value(self, param_val):
77
+ """Serialize the parameter_value types."""
78
+ if isinstance(param_val, InputDatasetBaseModel):
79
+ return json.dumps(param_val.model_dump())
80
+ return json.dumps(param_val)
81
+
82
+
83
+ class InputDatasetParameter(InputDatasetBaseModel):
84
+ """Parsing of the inputDatasetPartDocument that is relevant for parameters."""
85
+
86
+ parameter_name: str
87
+ parameter_values: list[InputDatasetParameterValue]
88
+
89
+ @property
90
+ def input_dataset_objects(self) -> list[InputDatasetObject]:
91
+ """Find and return list of InputDatasetObjects."""
92
+ object_list = []
93
+ for param in self.parameter_values:
94
+ if isinstance(param.parameter_value, InputDatasetFilePointer):
95
+ object_list.append(param.parameter_value.file_pointer)
96
+ return object_list
97
+
98
+
99
+ class InputDatasetFrames(InputDatasetBaseModel):
100
+ """Parsing of the inputDatasetPartDocument that is relevant for frames."""
101
+
102
+ bucket: str
103
+ object_keys: list[str] = Field(alias="object_keys") # not camel case in metadata store
104
+
105
+ @property
106
+ def input_dataset_objects(self) -> list[InputDatasetObject]:
107
+ """Convert a single bucket and a list of object_keys list into a list of InputDatasetObjects."""
108
+ object_list = []
109
+ for frame in self.object_keys:
110
+ object_list.append(InputDatasetObject(bucket=self.bucket, object_key=frame))
111
+ return object_list
112
+
113
+
114
+ class InputDatasetPartDocumentList(InputDatasetBaseModel):
115
+ """List of either InputDatasetFrames or InputDatasetParameter objects."""
116
+
117
+ doc_list: list[InputDatasetFrames] | list[InputDatasetParameter] = Field(alias="doc_list")