dkist-processing-common 10.8.3__py3-none-any.whl → 10.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -45,7 +45,6 @@ from dkist_processing_common.models.graphql import RecipeRunStatusResponse
45
45
  from dkist_processing_common.models.tags import Tag
46
46
  from dkist_processing_common.parsers.l0_fits_access import L0FitsAccess
47
47
  from dkist_processing_common.tasks import WorkflowTaskBase
48
- from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetMixin
49
48
 
50
49
  TILE_SIZE = 64
51
50
 
@@ -359,7 +358,7 @@ class FakeGQLClient:
359
358
  {
360
359
  "parameterValueId": 1,
361
360
  "parameterValue": json.dumps([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
362
- "parameterValueStartDate": "2000-01-01",
361
+ "parameterValueStartDate": datetime(2000, 1, 1).isoformat(),
363
362
  }
364
363
  ],
365
364
  },
@@ -376,7 +375,7 @@ class FakeGQLClient:
376
375
  }
377
376
  }
378
377
  ),
379
- "parameterValueStartDate": "2000-01-01",
378
+ "parameterValueStartDate": datetime(2000, 1, 1).isoformat(),
380
379
  },
381
380
  {
382
381
  "parameterValueId": 3,
@@ -388,7 +387,7 @@ class FakeGQLClient:
388
387
  }
389
388
  }
390
389
  ),
391
- "parameterValueStartDate": "2000-01-02",
390
+ "parameterValueStartDate": datetime(2000, 1, 2).isoformat(),
392
391
  },
393
392
  ],
394
393
  },
@@ -400,7 +399,7 @@ class FakeGQLClient:
400
399
  "parameterValue": json.dumps(
401
400
  {"a": 1, "b": 3.14159, "c": "foo", "d": [1, 2, 3]}
402
401
  ),
403
- "parameterValueStartDate": "2000-01-01",
402
+ "parameterValueStartDate": datetime(2000, 1, 1).isoformat(),
404
403
  }
405
404
  ],
406
405
  },
@@ -796,7 +795,7 @@ def post_fit_polcal_fitter(
796
795
  return fitter
797
796
 
798
797
 
799
- class InputDatasetTask(WorkflowTaskBase, InputDatasetMixin):
798
+ class InputDatasetTask(WorkflowTaskBase):
800
799
  def run(self):
801
800
  pass
802
801
 
@@ -824,7 +823,7 @@ def task_with_input_dataset(
824
823
  task.scratch.workflow_base_path = tmp_path / str(recipe_run_id)
825
824
  for part, tag in input_dataset_parts:
826
825
  file_path = task.scratch.workflow_base_path / Path(f"{uuid4().hex[:6]}.ext")
827
- file_path.write_text(data=json.dumps(part))
826
+ file_path.write_text(data=json.dumps({"doc_list": part}))
828
827
  task.tag(path=file_path, tags=tag)
829
828
  yield task
830
829
 
@@ -851,6 +850,24 @@ def create_parameter_files(
851
850
  task.tag(path=file_path, tags=Tag.parameter(param_path))
852
851
 
853
852
 
853
+ def create_input_frames(
854
+ task: WorkflowTaskBase,
855
+ input_frame_docs: list[dict] = FakeGQLClient.observe_frames_doc_object
856
+ + FakeGQLClient.calibration_frames_doc_object,
857
+ ):
858
+ """
859
+ Create the observe and calibration frame files specified in the input dataset documents
860
+ returned by the metadata store.
861
+ """
862
+ for frame in input_frame_docs:
863
+ for object_key in frame["object_keys"]:
864
+ file_path = task.scratch.workflow_base_path / Path(object_key)
865
+ if not file_path.parent.exists():
866
+ file_path.parent.mkdir(parents=True, exist_ok=True)
867
+ file_path.write_text(data="")
868
+ task.tag(path=file_path, tags=[Tag.frame(), Tag.input()])
869
+
870
+
854
871
  @pytest.fixture()
855
872
  def fake_constants_db() -> dict:
856
873
  """
@@ -19,10 +19,15 @@ from astropy.io.fits import CompImageHDU
19
19
  from astropy.io.fits import HDUList
20
20
  from astropy.io.fits import Header
21
21
  from astropy.io.fits import PrimaryHDU
22
+ from pydantic import BaseModel
23
+ from pydantic import create_model
24
+ from pydantic import Field
22
25
 
23
26
  from dkist_processing_common.codecs.asdf import asdf_decoder
24
27
  from dkist_processing_common.codecs.asdf import asdf_encoder
25
28
  from dkist_processing_common.codecs.asdf import asdf_fileobj_encoder
29
+ from dkist_processing_common.codecs.basemodel import basemodel_decoder
30
+ from dkist_processing_common.codecs.basemodel import basemodel_encoder
26
31
  from dkist_processing_common.codecs.bytes import bytes_decoder
27
32
  from dkist_processing_common.codecs.bytes import bytes_encoder
28
33
  from dkist_processing_common.codecs.fits import fits_access_decoder
@@ -100,6 +105,14 @@ def path_to_json(dictionary, tmp_file) -> Path:
100
105
  return tmp_file
101
106
 
102
107
 
108
+ @pytest.fixture
109
+ def pydantic_basemodel() -> BaseModel:
110
+ class Foo(BaseModel):
111
+ bar: int
112
+
113
+ return Foo(bar=123)
114
+
115
+
103
116
  @pytest.fixture
104
117
  def string() -> str:
105
118
  return "string"
@@ -356,6 +369,7 @@ class DummyFitsAccess(FitsAccessBase):
356
369
  pytest.param("primary_hdu_list", fits_hdulist_encoder, id="fits uncompressed HDUList"),
357
370
  pytest.param("compressed_hdu_list", fits_hdulist_encoder, id="fits compressed HDUList"),
358
371
  pytest.param("dictionary", json_encoder, id="json"),
372
+ pytest.param("pydantic_basemodel", basemodel_encoder, id="pydantic basemodel"),
359
373
  pytest.param("string", str_encoder, id="str"),
360
374
  pytest.param("asdf_tree", asdf_encoder, id="asdf"),
361
375
  pytest.param("asdf_obj", asdf_fileobj_encoder, id="asdf_obj"),
@@ -600,6 +614,30 @@ def test_json_encoder_invalid(python_object: Any, expected_exception_type: type[
600
614
  json_encoder(python_object)
601
615
 
602
616
 
617
+ def test_basemodel_decoder(valid_json_codec, path_to_text_file):
618
+ """
619
+ Given: a python object that can be validated to a Pydantic BaseModel object is written to file as json
620
+ When: basemodel decoding is applied to the json file
621
+ Then: the string gets decoded to the correct Pydantic BaseModel object
622
+ """
623
+ # write python object to file as json string
624
+ python_object = valid_json_codec["python_object"]
625
+ path = path_to_text_file(json.dumps({"foo": python_object}))
626
+
627
+ # create basemodel on the fly
628
+ DynamicBaseModel = create_model(
629
+ "DynamicBaseModel", foo=(Any, Field(default_factory=type(python_object)))
630
+ )
631
+
632
+ # get the same object via the basemodel decoder
633
+ decoded_obj = basemodel_decoder(path, model=DynamicBaseModel)
634
+ if python_object is nan:
635
+ # By definition, nan != nan
636
+ assert isnan(decoded_obj.foo)
637
+ else:
638
+ assert decoded_obj.foo == python_object
639
+
640
+
603
641
  def test_quality_data_encoder_valid(valid_quality_codec):
604
642
  """
605
643
  Given: a python object that can be encoded as a json string
@@ -1,17 +1,13 @@
1
1
  import json
2
- from copy import copy
3
2
  from datetime import datetime
4
- from pathlib import Path
5
3
  from typing import Any
6
4
  from uuid import uuid4
7
5
 
8
6
  import pytest
9
7
 
8
+ from dkist_processing_common.codecs.basemodel import basemodel_decoder
9
+ from dkist_processing_common.models.input_dataset import InputDatasetPartDocumentList
10
10
  from dkist_processing_common.models.tags import Tag
11
- from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetMixin
12
- from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetObject
13
- from dkist_processing_common.tests.conftest import create_parameter_files
14
- from dkist_processing_common.tests.conftest import InputDatasetTask
15
11
 
16
12
 
17
13
  def input_dataset_frames_part_factory(bucket_count: int = 1) -> list[dict]:
@@ -47,7 +43,9 @@ def input_dataset_parameters_part_factory(
47
43
  ]
48
44
  if has_date:
49
45
  for data in result:
50
- data["parameterValueStartDate"] = datetime(2022, 9, 14).isoformat()[:10]
46
+ param_list = data["parameterValues"]
47
+ for item in param_list:
48
+ item["parameterValueStartDate"] = datetime(2022, 9, 14).isoformat()
51
49
  if has_file:
52
50
  for data in result:
53
51
  param_list = data["parameterValues"]
@@ -66,99 +64,42 @@ def input_dataset_parameters_part_factory(
66
64
  @pytest.mark.parametrize(
67
65
  "input_dataset_parts",
68
66
  [
69
- pytest.param((None, Tag.input_dataset_observe_frames()), id="empty"),
70
67
  pytest.param(
71
68
  (input_dataset_frames_part_factory(), Tag.input_dataset_observe_frames()),
72
- id="single_bucket",
69
+ id="observe_single_bucket",
73
70
  ),
74
71
  pytest.param(
75
72
  (input_dataset_frames_part_factory(bucket_count=2), Tag.input_dataset_observe_frames()),
76
- id="multi_bucket",
73
+ id="observe_multi_bucket",
77
74
  ),
78
- ],
79
- )
80
- def test_input_dataset_observe_frames_part_document(
81
- task_with_input_dataset, input_dataset_parts: tuple[Any, str]
82
- ):
83
- """
84
- Given: A task with an input dataset observe frames part document tagged as such
85
- When: Accessing the document via the InputDatasetMixIn
86
- Then: The contents of the file are returned
87
- """
88
- doc_part, _ = input_dataset_parts
89
- task = task_with_input_dataset
90
- assert task.input_dataset_observe_frames_part_document == doc_part
91
-
92
-
93
- @pytest.mark.parametrize(
94
- "input_dataset_parts",
95
- [
96
- pytest.param((None, Tag.input_dataset_calibration_frames()), id="empty"),
97
75
  pytest.param(
98
76
  (input_dataset_frames_part_factory(), Tag.input_dataset_calibration_frames()),
99
- id="single_bucket",
77
+ id="calib_single_bucket",
100
78
  ),
101
79
  pytest.param(
102
80
  (
103
81
  input_dataset_frames_part_factory(bucket_count=2),
104
82
  Tag.input_dataset_calibration_frames(),
105
83
  ),
106
- id="multi_bucket",
107
- ),
108
- ],
109
- )
110
- def test_input_dataset_calibration_frames_part_document(
111
- task_with_input_dataset, input_dataset_parts: tuple[Any, str]
112
- ):
113
- """
114
- Given: A task with an input dataset calibration frames part document tagged as such
115
- When: Accessing the document via the InputDatasetMixIn
116
- Then: The contents of the file are returned
117
- """
118
- doc_part, _ = input_dataset_parts
119
- task = task_with_input_dataset
120
- assert task.input_dataset_calibration_frames_part_document == doc_part
121
-
122
-
123
- @pytest.mark.parametrize(
124
- "input_dataset_parts",
125
- [
126
- pytest.param((None, Tag.input_dataset_parameters()), id="empty"),
127
- pytest.param(
128
- (input_dataset_parameters_part_factory(), Tag.input_dataset_parameters()),
129
- id="single_param_no_date",
130
- ),
131
- pytest.param(
132
- (
133
- input_dataset_parameters_part_factory(parameter_count=2),
134
- Tag.input_dataset_parameters(),
135
- ),
136
- id="multi_param_no_date",
137
- ),
138
- pytest.param(
139
- (input_dataset_parameters_part_factory(has_date=True), Tag.input_dataset_parameters()),
140
- id="single_param_with_date",
141
- ),
142
- pytest.param(
143
- (
144
- input_dataset_parameters_part_factory(parameter_count=2, has_date=True),
145
- Tag.input_dataset_parameters(),
146
- ),
147
- id="multi_param_with_date",
84
+ id="calib_multi_bucket",
148
85
  ),
149
86
  ],
150
87
  )
151
- def test_input_dataset_parameters_part_document(
88
+ def test_input_dataset_frames_part_document(
152
89
  task_with_input_dataset, input_dataset_parts: tuple[Any, str]
153
90
  ):
154
91
  """
155
- Given: A task with an input dataset parameters part document tagged as such
156
- When: Accessing the document via the InputDatasetMixIn
157
- Then: The contents of the file are returned
92
+ Given: A task with an input dataset frames part document already written to file
93
+ When: Reading the file into a validated model
94
+ Then: The correct contents of the file are loaded
158
95
  """
159
- doc_part, _ = input_dataset_parts
96
+ doc_part, tag = input_dataset_parts
160
97
  task = task_with_input_dataset
161
- assert task.input_dataset_parameters_part_document == doc_part
98
+ doc_from_file = next(
99
+ task.read(tags=tag, decoder=basemodel_decoder, model=InputDatasetPartDocumentList)
100
+ )
101
+ frames = [frames.model_dump() for frames in doc_from_file.doc_list]
102
+ assert frames == doc_part
162
103
 
163
104
 
164
105
  @pytest.mark.parametrize(
@@ -174,24 +115,15 @@ def test_input_dataset_parameters_part_document(
174
115
  pytest.param(
175
116
  [
176
117
  (input_dataset_frames_part_factory(), Tag.input_dataset_observe_frames()),
177
- (None, Tag.input_dataset_calibration_frames()),
178
118
  ],
179
119
  id="observe1_cal0_single_bucket",
180
120
  ),
181
121
  pytest.param(
182
122
  [
183
- (None, Tag.input_dataset_observe_frames()),
184
123
  (input_dataset_frames_part_factory(), Tag.input_dataset_calibration_frames()),
185
124
  ],
186
125
  id="observe0_cal1_single_bucket",
187
126
  ),
188
- pytest.param(
189
- [
190
- (None, Tag.input_dataset_observe_frames()),
191
- (None, Tag.input_dataset_calibration_frames()),
192
- ],
193
- id="observe0_cal0_single_bucket",
194
- ),
195
127
  pytest.param(
196
128
  [
197
129
  (
@@ -211,13 +143,11 @@ def test_input_dataset_parameters_part_document(
211
143
  input_dataset_frames_part_factory(bucket_count=2),
212
144
  Tag.input_dataset_observe_frames(),
213
145
  ),
214
- (None, Tag.input_dataset_calibration_frames()),
215
146
  ],
216
147
  id="observe1_cal0_multi_bucket",
217
148
  ),
218
149
  pytest.param(
219
150
  [
220
- (None, Tag.input_dataset_observe_frames()),
221
151
  (
222
152
  input_dataset_frames_part_factory(bucket_count=2),
223
153
  Tag.input_dataset_calibration_frames(),
@@ -225,21 +155,17 @@ def test_input_dataset_parameters_part_document(
225
155
  ],
226
156
  id="observe0_cal1_multi_bucket",
227
157
  ),
228
- pytest.param(
229
- [
230
- (None, Tag.input_dataset_observe_frames()),
231
- (None, Tag.input_dataset_calibration_frames()),
232
- ],
233
- id="observe0_cal0_multi_bucket",
234
- ),
235
158
  ],
236
159
  )
237
- def test_input_dataset_frames(task_with_input_dataset, input_dataset_parts: list[tuple[Any, str]]):
160
+ def test_input_dataset_frames_combination(
161
+ task_with_input_dataset, input_dataset_parts: list[tuple[Any, str]]
162
+ ):
238
163
  """
239
- Given: a task with the InputDatasetMixin
240
- When: getting the frames in the input dataset
241
- Then: it matches the frames used to create the input dataset
164
+ Given: A task with both types of input dataset frame documents written to files
165
+ When: Reading the file and validating into models
166
+ Then: The correct files are returned by the input_dataset_objects method of InputDatasetFrames
242
167
  """
168
+ # Given
243
169
  doc_parts = [part for part, _ in input_dataset_parts]
244
170
  task = task_with_input_dataset
245
171
  expected = []
@@ -247,7 +173,29 @@ def test_input_dataset_frames(task_with_input_dataset, input_dataset_parts: list
247
173
  if part:
248
174
  expected.extend(flatten_frame_parts(part))
249
175
  expected_set = set(expected)
250
- actual = [(frame.bucket, frame.object_key) for frame in task.input_dataset_frames]
176
+ # When
177
+ frames = []
178
+ observe_frames = next(
179
+ task.read(
180
+ tags=Tag.input_dataset_observe_frames(),
181
+ decoder=basemodel_decoder,
182
+ model=InputDatasetPartDocumentList,
183
+ ),
184
+ None,
185
+ )
186
+ frames += observe_frames.doc_list if observe_frames else []
187
+ calibration_frames = next(
188
+ task.read(
189
+ tags=Tag.input_dataset_calibration_frames(),
190
+ decoder=basemodel_decoder,
191
+ model=InputDatasetPartDocumentList,
192
+ ),
193
+ None,
194
+ )
195
+ frames += calibration_frames.doc_list if calibration_frames else []
196
+ # Then
197
+ frames_objects = sum([f.input_dataset_objects for f in frames], [])
198
+ actual = [(frame.bucket, frame.object_key) for frame in frames_objects]
251
199
  actual_set = set(actual)
252
200
  assert len(actual) == len(actual_set)
253
201
  assert actual_set.difference(expected_set) == set()
@@ -256,114 +204,45 @@ def test_input_dataset_frames(task_with_input_dataset, input_dataset_parts: list
256
204
  @pytest.mark.parametrize(
257
205
  "input_dataset_parts",
258
206
  [
259
- pytest.param((None, Tag.input_dataset_parameters()), id="empty"),
260
207
  pytest.param(
261
208
  (input_dataset_parameters_part_factory(), Tag.input_dataset_parameters()),
262
209
  id="single_param_no_date_no_file",
263
210
  ),
264
211
  pytest.param(
265
212
  (input_dataset_parameters_part_factory(has_file=True), Tag.input_dataset_parameters()),
266
- id="single_param_no_date",
267
- ),
268
- pytest.param(
269
- (
270
- input_dataset_parameters_part_factory(parameter_count=2, has_file=True),
271
- Tag.input_dataset_parameters(),
272
- ),
273
- id="multi_param_no_date",
213
+ id="single_param_no_date_with_file",
274
214
  ),
275
215
  pytest.param(
276
- (
277
- input_dataset_parameters_part_factory(parameter_value_count=2, has_file=True),
278
- Tag.input_dataset_parameters(),
279
- ),
280
- id="multi_param_values_no_date",
216
+ (input_dataset_parameters_part_factory(has_date=True), Tag.input_dataset_parameters()),
217
+ id="single_param_with_date_no_file",
281
218
  ),
282
219
  pytest.param(
283
220
  (
284
221
  input_dataset_parameters_part_factory(has_date=True, has_file=True),
285
222
  Tag.input_dataset_parameters(),
286
223
  ),
287
- id="single_param_with_date",
224
+ id="single_param_with_date_with_file",
288
225
  ),
289
226
  pytest.param(
290
227
  (
291
- input_dataset_parameters_part_factory(
292
- parameter_count=2, has_date=True, has_file=True
293
- ),
294
- Tag.input_dataset_parameters(),
295
- ),
296
- id="multi_param_with_date",
297
- ),
298
- ],
299
- )
300
- def test_input_dataset_parameters(
301
- task_with_input_dataset, input_dataset_parts: list[tuple[Any, str]]
302
- ):
303
- """
304
- Given: a task with the InputDatasetMixin
305
- When: getting the parameters in the input dataset
306
- Then: the names of the parameters match the keys in the returned dictionary
307
- """
308
- task = task_with_input_dataset
309
- doc_part, _ = input_dataset_parts
310
- doc_part = doc_part or [] # None case parsing of expected values
311
- create_parameter_files(task, doc_part)
312
- expected_parameters = {item["parameterName"]: item["parameterValues"] for item in doc_part}
313
- for key, values in task.input_dataset_parameters.items():
314
- assert key in expected_parameters
315
- expected_values = expected_parameters[key]
316
- # Iterate through multiple values if they exist
317
- for value in values:
318
- # Find the matching expected value for this value object
319
- expected_value = [
320
- item
321
- for item in expected_values
322
- if value.parameter_value_id == item["parameterValueId"]
323
- ]
324
- # Make sure there's only one value
325
- assert len(expected_value) == 1
326
- # Now check the value
327
- expected_value = expected_value[0]
328
- assert value.parameter_value == json.loads(
329
- expected_value["parameterValue"], object_hook=task._decode_parameter_value
330
- )
331
- expected_date = expected_value.get("parameterValueStartDate", datetime(1, 1, 1))
332
- assert value.parameter_value_start_date == expected_date
333
-
334
-
335
- @pytest.mark.parametrize(
336
- "input_dataset_parts",
337
- [
338
- pytest.param((None, Tag.input_dataset_parameters()), id="empty"),
339
- pytest.param(
340
- (input_dataset_parameters_part_factory(), Tag.input_dataset_parameters()),
341
- id="single_param_no_date_no_file",
342
- ),
343
- pytest.param(
344
- (input_dataset_parameters_part_factory(has_file=True), Tag.input_dataset_parameters()),
345
- id="single_param_no_date",
346
- ),
347
- pytest.param(
348
- (
349
- input_dataset_parameters_part_factory(parameter_count=2, has_file=True),
228
+ input_dataset_parameters_part_factory(parameter_count=2),
350
229
  Tag.input_dataset_parameters(),
351
230
  ),
352
- id="multi_param_no_date",
231
+ id="multi_param_no_date_no_file",
353
232
  ),
354
233
  pytest.param(
355
234
  (
356
- input_dataset_parameters_part_factory(parameter_value_count=2, has_file=True),
235
+ input_dataset_parameters_part_factory(parameter_count=2, has_date=True),
357
236
  Tag.input_dataset_parameters(),
358
237
  ),
359
- id="multi_param_values_no_date",
238
+ id="multi_param_with_date_no_file",
360
239
  ),
361
240
  pytest.param(
362
241
  (
363
- input_dataset_parameters_part_factory(has_date=True, has_file=True),
242
+ input_dataset_parameters_part_factory(parameter_count=2, has_file=True),
364
243
  Tag.input_dataset_parameters(),
365
244
  ),
366
- id="single_param_with_date",
245
+ id="multi_param_no_date_with_file",
367
246
  ),
368
247
  pytest.param(
369
248
  (
@@ -372,138 +251,30 @@ def test_input_dataset_parameters(
372
251
  ),
373
252
  Tag.input_dataset_parameters(),
374
253
  ),
375
- id="multi_param_with_date",
254
+ id="multi_param_with_date_with_file",
376
255
  ),
377
256
  ],
378
257
  )
379
- def test_input_dataset_parameter_objects(
380
- task_with_input_dataset, input_dataset_parts: list[tuple[Any, str]]
381
- ):
258
+ def test_input_dataset_parameters(task_with_input_dataset, input_dataset_parts: tuple[Any, str]):
382
259
  """
383
- Given: a task with the InputDatasetMixin
384
- When: getting the parameters objects in the input dataset
385
- Then: the InputDatsetObjects returned by the task method match the objects defined by the input
386
- dataset doc part
260
+ Given: A task with an input dataset parameters part document written to file
261
+ When: Reading the file and validating into models
262
+ Then: The correct contents of the file, including file parameters, are loaded
387
263
  """
264
+ doc_part, tag = input_dataset_parts
388
265
  task = task_with_input_dataset
389
- doc_part, _ = input_dataset_parts
390
- doc_part = doc_part or [] # None case parsing of expected values
391
-
392
- # Create a list of InputDatasetObjects from the input dataset doc part
393
- expected_parameters = list()
394
- for param_item in doc_part:
395
- param_values_list = param_item["parameterValues"]
396
- for param_value_dict in param_values_list:
397
- if "__file__" in param_value_dict["parameterValue"]:
398
- file_dict = json.loads(
399
- param_value_dict["parameterValue"], object_hook=task._decode_parameter_value
400
- )
401
- expected_parameters.append(
402
- InputDatasetObject(
403
- bucket=file_dict["bucket"], object_key=file_dict["objectKey"]
404
- )
405
- )
406
- # Check that each InputDatasetObject returned by the task is in the list of expected parameters
407
- input_dataset_parameter_objects = task.input_dataset_parameter_objects
408
- assert len(input_dataset_parameter_objects) == len(expected_parameters)
409
- for input_dataset_object in input_dataset_parameter_objects:
410
- assert input_dataset_object in expected_parameters
266
+ doc_from_file = next(
267
+ task.read(tags=tag, decoder=basemodel_decoder, model=InputDatasetPartDocumentList)
268
+ )
411
269
 
412
-
413
- @pytest.mark.parametrize(
414
- "input_parameter_dict",
415
- [
416
- {"bucket": "data", "objectKey": "parameters/805c46/714ff939158b4253859cde5e5d6f62c3.dat"},
417
- {
418
- "__file__": {
419
- "bucket": "data",
420
- "objectKey": "parameters/805c46/714ff939158b4253859cde5e5d6f62c3.dat",
421
- }
422
- },
423
- {"key_name_1": "value_1", "key_name_2": "value_2", "key_name_3": "value_3"},
424
- ],
425
- )
426
- def test_convert_parameter_file_to_path(recipe_run_id, input_parameter_dict: dict):
427
- """
428
- Given: a parameter value field to be json decoded
429
- When: passing the parameter value string to the json decoder hook
430
- Then: the hook passes non-file parameter strings without change and modifies file parameter strings
431
- by replacing the __file__ dict in the value string with a bucket field, an objectKey field
432
- and adds a param_path field and an is_file field
433
- """
434
- # Initial test with no tags
435
- with InputDatasetTask(
436
- recipe_run_id=recipe_run_id,
437
- workflow_name="workflow_name",
438
- workflow_version="workflow_version",
439
- ) as task:
440
- # Test with no tags...
441
- input_dict = input_parameter_dict
442
- output_dict = task._decode_parameter_value(input_dict)
443
- if "__file__" not in input_dict:
444
- assert input_dict == output_dict
445
- else:
446
- value_dict = input_dict["__file__"]
447
- assert output_dict["bucket"] == value_dict["bucket"]
448
- assert output_dict["objectKey"] == value_dict["objectKey"]
449
- assert output_dict["is_file"]
450
- assert output_dict["param_path"] is None
451
- # Test with tags
452
- if "__file__" not in input_dict:
453
- output_dict = task._decode_parameter_value(input_dict)
454
- assert input_dict == output_dict
455
- else:
456
- # Create the destination path
457
- param_path = input_dict["__file__"]["objectKey"]
458
- destination_path = task.scratch.absolute_path(param_path)
459
- if not destination_path.parent.exists():
460
- destination_path.parent.mkdir(parents=True, exist_ok=True)
461
- destination_path.write_text(data="")
462
- task.tag(path=destination_path, tags=Tag.parameter(destination_path.name))
463
- output_dict = task._decode_parameter_value(input_dict)
464
- value_dict = input_dict["__file__"]
465
- assert output_dict["bucket"] == value_dict["bucket"]
466
- assert output_dict["objectKey"] == value_dict["objectKey"]
467
- assert output_dict["is_file"]
468
- assert output_dict["param_path"] == destination_path
469
-
470
-
471
- @pytest.mark.parametrize(
472
- "input_dataset_parts",
473
- [
474
- pytest.param(
475
- [
476
- (input_dataset_frames_part_factory(), Tag.input_dataset_observe_frames()),
477
- (input_dataset_frames_part_factory(), Tag.input_dataset_observe_frames()),
478
- ],
479
- id="observe",
480
- ),
481
- pytest.param(
482
- [
483
- (input_dataset_frames_part_factory(), Tag.input_dataset_calibration_frames()),
484
- (input_dataset_frames_part_factory(), Tag.input_dataset_calibration_frames()),
485
- ],
486
- id="calibration",
487
- ),
488
- pytest.param(
489
- [
490
- (input_dataset_frames_part_factory(), Tag.input_dataset_parameters()),
491
- (input_dataset_frames_part_factory(), Tag.input_dataset_parameters()),
492
- ],
493
- id="params",
494
- ),
495
- ],
496
- )
497
- def test_multiple_input_dataset_parts(
498
- task_with_input_dataset, input_dataset_parts: list[tuple[Any, str]]
499
- ):
500
- """
501
- Given: a task with the InputDatasetMixin and multiple tagged input datasets
502
- When: reading the input dataset document
503
- Then: an error is raised
504
- """
505
- task = task_with_input_dataset
506
- with pytest.raises(ValueError):
507
- task.input_dataset_parameters_part_document
508
- task.input_dataset_observe_frames_part_document
509
- task.input_dataset_calibration_frames_part_document
270
+ params = [params.model_dump() for params in doc_from_file.doc_list]
271
+ assert params == doc_part
272
+ expected_files = []
273
+ for item in doc_part or []:
274
+ for val in item["parameterValues"]:
275
+ if "__file__" in val["parameterValue"]:
276
+ file_dict = json.loads(val["parameterValue"])
277
+ expected_files.append(file_dict["__file__"])
278
+ file_objects = sum([d.input_dataset_objects for d in doc_from_file.doc_list], [])
279
+ file_objects_dump = [f.model_dump() for f in file_objects]
280
+ assert file_objects_dump == expected_files