dkist-processing-common 10.8.2__py3-none-any.whl → 10.8.4rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. changelog/235.feature.rst +3 -0
  2. changelog/235.misc.1.rst +2 -0
  3. changelog/235.misc.rst +1 -0
  4. dkist_processing_common/codecs/array.py +19 -0
  5. dkist_processing_common/codecs/basemodel.py +21 -0
  6. dkist_processing_common/codecs/fits.py +12 -6
  7. dkist_processing_common/manual.py +3 -5
  8. dkist_processing_common/models/fried_parameter.py +41 -0
  9. dkist_processing_common/models/graphql.py +13 -3
  10. dkist_processing_common/models/input_dataset.py +113 -0
  11. dkist_processing_common/models/parameters.py +65 -28
  12. dkist_processing_common/parsers/quality.py +1 -0
  13. dkist_processing_common/tasks/mixin/metadata_store.py +7 -4
  14. dkist_processing_common/tasks/mixin/quality/_metrics.py +19 -14
  15. dkist_processing_common/tasks/quality_metrics.py +1 -1
  16. dkist_processing_common/tasks/transfer_input_data.py +61 -70
  17. dkist_processing_common/tasks/write_l1.py +9 -2
  18. dkist_processing_common/tests/conftest.py +24 -7
  19. dkist_processing_common/tests/test_codecs.py +38 -0
  20. dkist_processing_common/tests/test_fried_parameter.py +27 -0
  21. dkist_processing_common/tests/test_input_dataset.py +79 -308
  22. dkist_processing_common/tests/test_parameters.py +71 -22
  23. dkist_processing_common/tests/test_quality_mixin.py +32 -22
  24. dkist_processing_common/tests/test_transfer_input_data.py +131 -45
  25. dkist_processing_common/tests/test_write_l1.py +35 -10
  26. {dkist_processing_common-10.8.2.dist-info → dkist_processing_common-10.8.4rc1.dist-info}/METADATA +1 -1
  27. {dkist_processing_common-10.8.2.dist-info → dkist_processing_common-10.8.4rc1.dist-info}/RECORD +29 -22
  28. dkist_processing_common/tasks/mixin/input_dataset.py +0 -166
  29. {dkist_processing_common-10.8.2.dist-info → dkist_processing_common-10.8.4rc1.dist-info}/WHEEL +0 -0
  30. {dkist_processing_common-10.8.2.dist-info → dkist_processing_common-10.8.4rc1.dist-info}/top_level.txt +0 -0
@@ -2,35 +2,44 @@
2
2
  import logging
3
3
  from pathlib import Path
4
4
 
5
- from dkist_processing_common.codecs.json import json_encoder
5
+ from dkist_processing_common.codecs.basemodel import basemodel_decoder
6
+ from dkist_processing_common.codecs.basemodel import basemodel_encoder
7
+ from dkist_processing_common.models.input_dataset import InputDatasetObject
8
+ from dkist_processing_common.models.input_dataset import InputDatasetPartDocumentList
6
9
  from dkist_processing_common.models.tags import Tag
7
10
  from dkist_processing_common.tasks.base import WorkflowTaskBase
8
11
  from dkist_processing_common.tasks.mixin.globus import GlobusMixin
9
12
  from dkist_processing_common.tasks.mixin.globus import GlobusTransferItem
10
- from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetMixin
11
- from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetObject
13
+
12
14
 
13
15
  __all__ = ["TransferL0Data"]
14
16
 
15
17
  logger = logging.getLogger(__name__)
16
18
 
17
19
 
18
- class TransferL0Data(WorkflowTaskBase, GlobusMixin, InputDatasetMixin):
20
+ class TransferL0Data(WorkflowTaskBase, GlobusMixin):
19
21
  """Transfers Level 0 data and required parameter files to the scratch store."""
20
22
 
21
23
  def download_input_dataset(self):
22
- """Get the input dataset document parts and save it to scratch with the appropriate tags."""
23
- if observe_frames := self.metadata_store_input_dataset_observe_frames:
24
- observe_doc = observe_frames.inputDatasetPartDocument
25
- self.write(observe_doc, tags=Tag.input_dataset_observe_frames(), encoder=json_encoder)
26
- if calibration_frames := self.metadata_store_input_dataset_calibration_frames:
27
- calibration_doc = calibration_frames.inputDatasetPartDocument
24
+ """Write the input dataset part documents to scratch with appropriate tags."""
25
+ if observe_frames_part := self.metadata_store_input_dataset_observe_frames:
26
+ doc = observe_frames_part.inputDatasetPartDocument
27
+ self.write(data=doc, tags=Tag.input_dataset_observe_frames(), encoder=basemodel_encoder)
28
+ if calibration_frames_part := self.metadata_store_input_dataset_calibration_frames:
29
+ doc = calibration_frames_part.inputDatasetPartDocument
28
30
  self.write(
29
- calibration_doc, tags=Tag.input_dataset_calibration_frames(), encoder=json_encoder
31
+ data=doc, tags=Tag.input_dataset_calibration_frames(), encoder=basemodel_encoder
30
32
  )
31
- if parameters := self.metadata_store_input_dataset_parameters:
32
- parameters_doc = parameters.inputDatasetPartDocument
33
- self.write(parameters_doc, tags=Tag.input_dataset_parameters(), encoder=json_encoder)
33
+ if parameters_part := self.metadata_store_input_dataset_parameters:
34
+ doc = parameters_part.inputDatasetPartDocument
35
+ self.add_file_tags_to_parameters_doc(param_doc=doc)
36
+ self.write(data=doc, tags=Tag.input_dataset_parameters(), encoder=basemodel_encoder)
37
+
38
+ def add_file_tags_to_parameters_doc(self, param_doc: InputDatasetPartDocumentList):
39
+ """Update the input dataset document with the location of the file parameters."""
40
+ for doc_item in param_doc.doc_list:
41
+ for obj in doc_item.input_dataset_objects:
42
+ obj.tag = Tag.parameter(Path(obj.object_key).name)
34
43
 
35
44
  def format_transfer_items(
36
45
  self, input_dataset_objects: list[InputDatasetObject]
@@ -49,77 +58,59 @@ class TransferL0Data(WorkflowTaskBase, GlobusMixin, InputDatasetMixin):
49
58
  )
50
59
  return transfer_items
51
60
 
52
- def format_frame_transfer_items(self) -> list[GlobusTransferItem]:
61
+ def build_transfer_list(self, doc_tag: str) -> list[InputDatasetObject]:
53
62
  """Format the list of frames as transfer items to be used by globus."""
54
- return self.format_transfer_items(self.input_dataset_frames)
55
-
56
- def format_parameter_transfer_items(self) -> list[GlobusTransferItem]:
57
- """Format the list of parameter objects as transfer items to be used by globus."""
58
- return self.format_transfer_items(self.input_dataset_parameter_objects)
59
-
60
- def tag_input_frames(self, transfer_items: list[GlobusTransferItem]) -> None:
61
- """
62
- Tag all the input files with 'frame' and 'input' tags.
63
-
64
- Parameters
65
- ----------
66
- transfer_items
67
- List of items to be tagged
68
-
69
- Returns
70
- -------
71
- None
72
- """
73
- scratch_items = [
74
- self.scratch.scratch_base_path / ti.destination_path for ti in transfer_items
75
- ]
76
- for si in scratch_items:
77
- self.tag(si, tags=[Tag.input(), Tag.frame()])
78
-
79
- def tag_parameter_objects(self, transfer_items: list[GlobusTransferItem]) -> None:
80
- """
81
- Tag all the parameter files with 'parameter'.
82
-
83
- Parameters
84
- ----------
85
- transfer_items
86
- List of items to be tagged
87
-
88
- Returns
89
- -------
90
- None
91
- """
92
- scratch_items = [
93
- self.scratch.scratch_base_path / ti.destination_path for ti in transfer_items
94
- ]
95
- for si in scratch_items:
96
- self.tag(si, tags=[Tag.parameter(si.name)])
63
+ doc = next(
64
+ self.read(tags=doc_tag, decoder=basemodel_decoder, model=InputDatasetPartDocumentList),
65
+ None,
66
+ )
67
+ doc_list = doc.doc_list if doc else []
68
+ input_dataset_objects = []
69
+ for doc_item in doc_list:
70
+ input_dataset_objects += doc_item.input_dataset_objects
71
+ return input_dataset_objects
72
+
73
+ def tag_transfer_objects(self, input_dataset_objects: list[InputDatasetObject]) -> None:
74
+ """Tag all the transferred input files."""
75
+ for obj in input_dataset_objects:
76
+ obj_path = self.scratch.absolute_path(obj.object_key)
77
+ if obj.tag:
78
+ self.tag(obj_path, tags=obj.tag)
79
+ else:
80
+ self.tag(obj_path, tags=[Tag.input(), Tag.frame()])
97
81
 
98
82
  def run(self) -> None:
99
83
  """Execute the data transfer."""
100
84
  with self.apm_task_step("Change Status to InProgress"):
101
85
  self.metadata_store_change_recipe_run_to_inprogress()
102
86
 
103
- with self.apm_task_step("Download Input Dataset"):
87
+ with self.apm_task_step("Download Input Dataset Documents"):
104
88
  self.download_input_dataset()
105
89
 
106
- with self.apm_task_step("Format Frame Transfer Items"):
107
- frame_transfer_items = self.format_frame_transfer_items()
108
- if not frame_transfer_items:
109
- raise ValueError("No input dataset frames found")
110
-
111
- with self.apm_task_step("Format Parameter Transfer Items"):
112
- parameter_transfer_items = self.format_parameter_transfer_items()
90
+ with self.apm_task_step("Build Input Dataset Transfer List"):
91
+ observe_transfer_objects = self.build_transfer_list(
92
+ doc_tag=Tag.input_dataset_observe_frames()
93
+ )
94
+ calibration_transfer_objects = self.build_transfer_list(
95
+ doc_tag=Tag.input_dataset_calibration_frames()
96
+ )
97
+ parameter_transfer_objects = self.build_transfer_list(
98
+ doc_tag=Tag.input_dataset_parameters()
99
+ )
100
+ transfer_objects = (
101
+ observe_transfer_objects + calibration_transfer_objects + parameter_transfer_objects
102
+ )
103
+ if len(observe_transfer_objects + calibration_transfer_objects) == 0:
104
+ raise ValueError("No input dataset frames found to transfer")
113
105
 
114
106
  with self.apm_task_step("Transfer Input Frames and Parameter Files via Globus"):
115
107
  self.globus_transfer_object_store_to_scratch(
116
- transfer_items=frame_transfer_items + parameter_transfer_items,
117
- label=f"Transfer Inputs for Recipe Run {self.recipe_run_id}",
108
+ transfer_items=self.format_transfer_items(input_dataset_objects=transfer_objects),
109
+ label=f"Transfer Input Objects for Recipe Run {self.recipe_run_id}",
118
110
  )
119
111
 
120
112
  with self.apm_processing_step("Tag Input Frames and Parameter Files"):
121
- self.tag_input_frames(transfer_items=frame_transfer_items)
122
- self.tag_parameter_objects(transfer_items=parameter_transfer_items)
113
+ self.tag_transfer_objects(input_dataset_objects=transfer_objects)
123
114
 
124
115
  def rollback(self):
125
116
  """Warn that depending on the progress of the task all data may not be removed because it hadn't been tagged."""
@@ -29,6 +29,7 @@ from sunpy.coordinates import Helioprojective
29
29
 
30
30
  from dkist_processing_common.codecs.fits import fits_access_decoder
31
31
  from dkist_processing_common.codecs.fits import fits_hdulist_encoder
32
+ from dkist_processing_common.models.fried_parameter import r0_valid
32
33
  from dkist_processing_common.models.tags import Tag
33
34
  from dkist_processing_common.models.wavelength import WavelengthRange
34
35
  from dkist_processing_common.parsers.l0_fits_access import L0FitsAccess
@@ -373,8 +374,14 @@ class WriteL1Frame(WorkflowTaskBase, MetadataStoreMixin, ABC):
373
374
  """
374
375
  # Replace header values in place
375
376
  header = self.replace_header_values(header=header, data=data)
376
- # Remove r0 value if AO not locked
377
- header = self.remove_invalid_r0_values(header=header)
377
+ # Remove r0 value if r0 conditions are not met
378
+ r0_is_valid = r0_valid(
379
+ r0=header["ATMOS_R0"],
380
+ ao_lock=header.get("AO_LOCK", None),
381
+ num_out_of_bounds_ao_values=header.get("OOBSHIFT", None),
382
+ )
383
+ if not r0_is_valid:
384
+ header.pop("ATMOS_R0", None)
378
385
  # Add the stats table
379
386
  header = self.add_stats_headers(header=header, data=data)
380
387
  # Add the datacenter table
@@ -45,7 +45,6 @@ from dkist_processing_common.models.graphql import RecipeRunStatusResponse
45
45
  from dkist_processing_common.models.tags import Tag
46
46
  from dkist_processing_common.parsers.l0_fits_access import L0FitsAccess
47
47
  from dkist_processing_common.tasks import WorkflowTaskBase
48
- from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetMixin
49
48
 
50
49
  TILE_SIZE = 64
51
50
 
@@ -359,7 +358,7 @@ class FakeGQLClient:
359
358
  {
360
359
  "parameterValueId": 1,
361
360
  "parameterValue": json.dumps([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
362
- "parameterValueStartDate": "2000-01-01",
361
+ "parameterValueStartDate": datetime(2000, 1, 1).isoformat(),
363
362
  }
364
363
  ],
365
364
  },
@@ -376,7 +375,7 @@ class FakeGQLClient:
376
375
  }
377
376
  }
378
377
  ),
379
- "parameterValueStartDate": "2000-01-01",
378
+ "parameterValueStartDate": datetime(2000, 1, 1).isoformat(),
380
379
  },
381
380
  {
382
381
  "parameterValueId": 3,
@@ -388,7 +387,7 @@ class FakeGQLClient:
388
387
  }
389
388
  }
390
389
  ),
391
- "parameterValueStartDate": "2000-01-02",
390
+ "parameterValueStartDate": datetime(2000, 1, 2).isoformat(),
392
391
  },
393
392
  ],
394
393
  },
@@ -400,7 +399,7 @@ class FakeGQLClient:
400
399
  "parameterValue": json.dumps(
401
400
  {"a": 1, "b": 3.14159, "c": "foo", "d": [1, 2, 3]}
402
401
  ),
403
- "parameterValueStartDate": "2000-01-01",
402
+ "parameterValueStartDate": datetime(2000, 1, 1).isoformat(),
404
403
  }
405
404
  ],
406
405
  },
@@ -796,7 +795,7 @@ def post_fit_polcal_fitter(
796
795
  return fitter
797
796
 
798
797
 
799
- class InputDatasetTask(WorkflowTaskBase, InputDatasetMixin):
798
+ class InputDatasetTask(WorkflowTaskBase):
800
799
  def run(self):
801
800
  pass
802
801
 
@@ -824,7 +823,7 @@ def task_with_input_dataset(
824
823
  task.scratch.workflow_base_path = tmp_path / str(recipe_run_id)
825
824
  for part, tag in input_dataset_parts:
826
825
  file_path = task.scratch.workflow_base_path / Path(f"{uuid4().hex[:6]}.ext")
827
- file_path.write_text(data=json.dumps(part))
826
+ file_path.write_text(data=json.dumps({"doc_list": part}))
828
827
  task.tag(path=file_path, tags=tag)
829
828
  yield task
830
829
 
@@ -851,6 +850,24 @@ def create_parameter_files(
851
850
  task.tag(path=file_path, tags=Tag.parameter(param_path))
852
851
 
853
852
 
853
+ def create_input_frames(
854
+ task: WorkflowTaskBase,
855
+ input_frame_docs: list[dict] = FakeGQLClient.observe_frames_doc_object
856
+ + FakeGQLClient.calibration_frames_doc_object,
857
+ ):
858
+ """
859
+ Create the observe and calibration frame files specified in the input dataset documents
860
+ returned by the metadata store.
861
+ """
862
+ for frame in input_frame_docs:
863
+ for object_key in frame["object_keys"]:
864
+ file_path = task.scratch.workflow_base_path / Path(object_key)
865
+ if not file_path.parent.exists():
866
+ file_path.parent.mkdir(parents=True, exist_ok=True)
867
+ file_path.write_text(data="")
868
+ task.tag(path=file_path, tags=[Tag.frame(), Tag.input()])
869
+
870
+
854
871
  @pytest.fixture()
855
872
  def fake_constants_db() -> dict:
856
873
  """
@@ -19,10 +19,15 @@ from astropy.io.fits import CompImageHDU
19
19
  from astropy.io.fits import HDUList
20
20
  from astropy.io.fits import Header
21
21
  from astropy.io.fits import PrimaryHDU
22
+ from pydantic import BaseModel
23
+ from pydantic import create_model
24
+ from pydantic import Field
22
25
 
23
26
  from dkist_processing_common.codecs.asdf import asdf_decoder
24
27
  from dkist_processing_common.codecs.asdf import asdf_encoder
25
28
  from dkist_processing_common.codecs.asdf import asdf_fileobj_encoder
29
+ from dkist_processing_common.codecs.basemodel import basemodel_decoder
30
+ from dkist_processing_common.codecs.basemodel import basemodel_encoder
26
31
  from dkist_processing_common.codecs.bytes import bytes_decoder
27
32
  from dkist_processing_common.codecs.bytes import bytes_encoder
28
33
  from dkist_processing_common.codecs.fits import fits_access_decoder
@@ -100,6 +105,14 @@ def path_to_json(dictionary, tmp_file) -> Path:
100
105
  return tmp_file
101
106
 
102
107
 
108
+ @pytest.fixture
109
+ def pydantic_basemodel() -> BaseModel:
110
+ class Foo(BaseModel):
111
+ bar: int
112
+
113
+ return Foo(bar=123)
114
+
115
+
103
116
  @pytest.fixture
104
117
  def string() -> str:
105
118
  return "string"
@@ -356,6 +369,7 @@ class DummyFitsAccess(FitsAccessBase):
356
369
  pytest.param("primary_hdu_list", fits_hdulist_encoder, id="fits uncompressed HDUList"),
357
370
  pytest.param("compressed_hdu_list", fits_hdulist_encoder, id="fits compressed HDUList"),
358
371
  pytest.param("dictionary", json_encoder, id="json"),
372
+ pytest.param("pydantic_basemodel", basemodel_encoder, id="pydantic basemodel"),
359
373
  pytest.param("string", str_encoder, id="str"),
360
374
  pytest.param("asdf_tree", asdf_encoder, id="asdf"),
361
375
  pytest.param("asdf_obj", asdf_fileobj_encoder, id="asdf_obj"),
@@ -600,6 +614,30 @@ def test_json_encoder_invalid(python_object: Any, expected_exception_type: type[
600
614
  json_encoder(python_object)
601
615
 
602
616
 
617
+ def test_basemodel_decoder(valid_json_codec, path_to_text_file):
618
+ """
619
+ Given: a python object that can be validated to a Pydantic BaseModel object is written to file as json
620
+ When: basemodel decoding is applied to the json file
621
+ Then: the string gets decoded to the correct Pydantic BaseModel object
622
+ """
623
+ # write python object to file as json string
624
+ python_object = valid_json_codec["python_object"]
625
+ path = path_to_text_file(json.dumps({"foo": python_object}))
626
+
627
+ # create basemodel on the fly
628
+ DynamicBaseModel = create_model(
629
+ "DynamicBaseModel", foo=(Any, Field(default_factory=type(python_object)))
630
+ )
631
+
632
+ # get the same object via the basemodel decoder
633
+ decoded_obj = basemodel_decoder(path, model=DynamicBaseModel)
634
+ if python_object is nan:
635
+ # By definition, nan != nan
636
+ assert isnan(decoded_obj.foo)
637
+ else:
638
+ assert decoded_obj.foo == python_object
639
+
640
+
603
641
  def test_quality_data_encoder_valid(valid_quality_codec):
604
642
  """
605
643
  Given: a python object that can be encoded as a json string
@@ -0,0 +1,27 @@
1
+ import pytest
2
+
3
+ from dkist_processing_common.models.fried_parameter import r0_valid
4
+
5
+
6
+ @pytest.mark.parametrize(
7
+ "r0, ao_lock, oob_shift, should_r0_exist",
8
+ [
9
+ pytest.param(0.2, True, 17, True, id="AO_LOCK_True_good_R0_good_oob"),
10
+ pytest.param(1, True, 17, False, id="AO_LOCK_True_bad_R0_good_oob"),
11
+ pytest.param(0.2, False, 17, False, id="AO_LOCK_False_good_R0_good_oob"),
12
+ pytest.param(1, False, 17, False, id="AO_LOCK_False_bad_R0_good_oob"),
13
+ pytest.param(0.2, True, 150, False, id="AO_LOCK_True_good_R0_bad_oob"),
14
+ pytest.param(1, True, 150, False, id="AO_LOCK_True_bad_R0_bad_oob"),
15
+ pytest.param(0.2, False, 150, False, id="AO_LOCK_False_good_R0_bad_oob"),
16
+ pytest.param(1, False, 150, False, id="AO_LOCK_False_bad_R0_bad_oob"),
17
+ pytest.param(0.2, None, 17, False, id="AO_LOCK_missing"),
18
+ pytest.param(0.2, True, None, True, id="OOBSHIFT_missing"),
19
+ ],
20
+ )
21
+ def test_check_r0_valid(r0, ao_lock, oob_shift, should_r0_exist):
22
+ """
23
+ :Given: values for r0, the ao_lock status, and the ao out of bound shift value
24
+ :When: checking for a valid state to use r0
25
+ :Then: valid conditions are marked True, invalid conditions marked False
26
+ """
27
+ assert r0_valid(r0, ao_lock, oob_shift) == should_r0_exist