dkist-processing-common 10.8.1rc1__py3-none-any.whl → 10.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. dkist_processing_common/codecs/fits.py +6 -12
  2. dkist_processing_common/manual.py +5 -3
  3. dkist_processing_common/models/fried_parameter.py +41 -0
  4. dkist_processing_common/models/graphql.py +3 -13
  5. dkist_processing_common/models/parameters.py +28 -65
  6. dkist_processing_common/parsers/quality.py +1 -0
  7. dkist_processing_common/tasks/mixin/input_dataset.py +166 -0
  8. dkist_processing_common/tasks/mixin/metadata_store.py +4 -7
  9. dkist_processing_common/tasks/mixin/quality/_metrics.py +19 -14
  10. dkist_processing_common/tasks/quality_metrics.py +1 -1
  11. dkist_processing_common/tasks/transfer_input_data.py +70 -61
  12. dkist_processing_common/tasks/write_l1.py +29 -3
  13. dkist_processing_common/tests/conftest.py +7 -24
  14. dkist_processing_common/tests/test_codecs.py +0 -38
  15. dkist_processing_common/tests/test_fried_parameter.py +27 -0
  16. dkist_processing_common/tests/test_input_dataset.py +308 -79
  17. dkist_processing_common/tests/test_parameters.py +22 -71
  18. dkist_processing_common/tests/test_quality_mixin.py +32 -22
  19. dkist_processing_common/tests/test_transfer_input_data.py +45 -131
  20. dkist_processing_common/tests/test_write_l1.py +143 -10
  21. {dkist_processing_common-10.8.1rc1.dist-info → dkist_processing_common-10.8.3.dist-info}/METADATA +2 -2
  22. {dkist_processing_common-10.8.1rc1.dist-info → dkist_processing_common-10.8.3.dist-info}/RECORD +24 -27
  23. {dkist_processing_common-10.8.1rc1.dist-info → dkist_processing_common-10.8.3.dist-info}/WHEEL +1 -1
  24. changelog/235.feature.rst +0 -3
  25. changelog/235.misc.1.rst +0 -2
  26. changelog/235.misc.rst +0 -1
  27. dkist_processing_common/codecs/array.py +0 -19
  28. dkist_processing_common/codecs/basemodel.py +0 -21
  29. dkist_processing_common/models/input_dataset.py +0 -113
  30. {dkist_processing_common-10.8.1rc1.dist-info → dkist_processing_common-10.8.3.dist-info}/top_level.txt +0 -0
@@ -2,44 +2,35 @@
2
2
  import logging
3
3
  from pathlib import Path
4
4
 
5
- from dkist_processing_common.codecs.basemodel import basemodel_decoder
6
- from dkist_processing_common.codecs.basemodel import basemodel_encoder
7
- from dkist_processing_common.models.input_dataset import InputDatasetObject
8
- from dkist_processing_common.models.input_dataset import InputDatasetPartDocumentList
5
+ from dkist_processing_common.codecs.json import json_encoder
9
6
  from dkist_processing_common.models.tags import Tag
10
7
  from dkist_processing_common.tasks.base import WorkflowTaskBase
11
8
  from dkist_processing_common.tasks.mixin.globus import GlobusMixin
12
9
  from dkist_processing_common.tasks.mixin.globus import GlobusTransferItem
13
-
10
+ from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetMixin
11
+ from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetObject
14
12
 
15
13
  __all__ = ["TransferL0Data"]
16
14
 
17
15
  logger = logging.getLogger(__name__)
18
16
 
19
17
 
20
- class TransferL0Data(WorkflowTaskBase, GlobusMixin):
18
+ class TransferL0Data(WorkflowTaskBase, GlobusMixin, InputDatasetMixin):
21
19
  """Transfers Level 0 data and required parameter files to the scratch store."""
22
20
 
23
21
  def download_input_dataset(self):
24
- """Write the input dataset part documents to scratch with appropriate tags."""
25
- if observe_frames_part := self.metadata_store_input_dataset_observe_frames:
26
- doc = observe_frames_part.inputDatasetPartDocument
27
- self.write(data=doc, tags=Tag.input_dataset_observe_frames(), encoder=basemodel_encoder)
28
- if calibration_frames_part := self.metadata_store_input_dataset_calibration_frames:
29
- doc = calibration_frames_part.inputDatasetPartDocument
22
+ """Get the input dataset document parts and save it to scratch with the appropriate tags."""
23
+ if observe_frames := self.metadata_store_input_dataset_observe_frames:
24
+ observe_doc = observe_frames.inputDatasetPartDocument
25
+ self.write(observe_doc, tags=Tag.input_dataset_observe_frames(), encoder=json_encoder)
26
+ if calibration_frames := self.metadata_store_input_dataset_calibration_frames:
27
+ calibration_doc = calibration_frames.inputDatasetPartDocument
30
28
  self.write(
31
- data=doc, tags=Tag.input_dataset_calibration_frames(), encoder=basemodel_encoder
29
+ calibration_doc, tags=Tag.input_dataset_calibration_frames(), encoder=json_encoder
32
30
  )
33
- if parameters_part := self.metadata_store_input_dataset_parameters:
34
- doc = parameters_part.inputDatasetPartDocument
35
- self.add_file_tags_to_parameters_doc(param_doc=doc)
36
- self.write(data=doc, tags=Tag.input_dataset_parameters(), encoder=basemodel_encoder)
37
-
38
- def add_file_tags_to_parameters_doc(self, param_doc: InputDatasetPartDocumentList):
39
- """Update the input dataset document with the location of the file parameters."""
40
- for doc_item in param_doc.doc_list:
41
- for obj in doc_item.input_dataset_objects:
42
- obj.tag = Tag.parameter(Path(obj.object_key).name)
31
+ if parameters := self.metadata_store_input_dataset_parameters:
32
+ parameters_doc = parameters.inputDatasetPartDocument
33
+ self.write(parameters_doc, tags=Tag.input_dataset_parameters(), encoder=json_encoder)
43
34
 
44
35
  def format_transfer_items(
45
36
  self, input_dataset_objects: list[InputDatasetObject]
@@ -58,59 +49,77 @@ class TransferL0Data(WorkflowTaskBase, GlobusMixin):
58
49
  )
59
50
  return transfer_items
60
51
 
61
- def build_transfer_list(self, doc_tag: str) -> list[InputDatasetObject]:
52
+ def format_frame_transfer_items(self) -> list[GlobusTransferItem]:
62
53
  """Format the list of frames as transfer items to be used by globus."""
63
- doc = next(
64
- self.read(tags=doc_tag, decoder=basemodel_decoder, model=InputDatasetPartDocumentList),
65
- None,
66
- )
67
- doc_list = doc.doc_list if doc else []
68
- input_dataset_objects = []
69
- for doc_item in doc_list:
70
- input_dataset_objects += doc_item.input_dataset_objects
71
- return input_dataset_objects
72
-
73
- def tag_transfer_objects(self, input_dataset_objects: list[InputDatasetObject]) -> None:
74
- """Tag all the transferred input files."""
75
- for obj in input_dataset_objects:
76
- obj_path = self.scratch.absolute_path(obj.object_key)
77
- if obj.tag:
78
- self.tag(obj_path, tags=obj.tag)
79
- else:
80
- self.tag(obj_path, tags=[Tag.input(), Tag.frame()])
54
+ return self.format_transfer_items(self.input_dataset_frames)
55
+
56
+ def format_parameter_transfer_items(self) -> list[GlobusTransferItem]:
57
+ """Format the list of parameter objects as transfer items to be used by globus."""
58
+ return self.format_transfer_items(self.input_dataset_parameter_objects)
59
+
60
+ def tag_input_frames(self, transfer_items: list[GlobusTransferItem]) -> None:
61
+ """
62
+ Tag all the input files with 'frame' and 'input' tags.
63
+
64
+ Parameters
65
+ ----------
66
+ transfer_items
67
+ List of items to be tagged
68
+
69
+ Returns
70
+ -------
71
+ None
72
+ """
73
+ scratch_items = [
74
+ self.scratch.scratch_base_path / ti.destination_path for ti in transfer_items
75
+ ]
76
+ for si in scratch_items:
77
+ self.tag(si, tags=[Tag.input(), Tag.frame()])
78
+
79
+ def tag_parameter_objects(self, transfer_items: list[GlobusTransferItem]) -> None:
80
+ """
81
+ Tag all the parameter files with 'parameter'.
82
+
83
+ Parameters
84
+ ----------
85
+ transfer_items
86
+ List of items to be tagged
87
+
88
+ Returns
89
+ -------
90
+ None
91
+ """
92
+ scratch_items = [
93
+ self.scratch.scratch_base_path / ti.destination_path for ti in transfer_items
94
+ ]
95
+ for si in scratch_items:
96
+ self.tag(si, tags=[Tag.parameter(si.name)])
81
97
 
82
98
  def run(self) -> None:
83
99
  """Execute the data transfer."""
84
100
  with self.apm_task_step("Change Status to InProgress"):
85
101
  self.metadata_store_change_recipe_run_to_inprogress()
86
102
 
87
- with self.apm_task_step("Download Input Dataset Documents"):
103
+ with self.apm_task_step("Download Input Dataset"):
88
104
  self.download_input_dataset()
89
105
 
90
- with self.apm_task_step("Build Input Dataset Transfer List"):
91
- observe_transfer_objects = self.build_transfer_list(
92
- doc_tag=Tag.input_dataset_observe_frames()
93
- )
94
- calibration_transfer_objects = self.build_transfer_list(
95
- doc_tag=Tag.input_dataset_calibration_frames()
96
- )
97
- parameter_transfer_objects = self.build_transfer_list(
98
- doc_tag=Tag.input_dataset_parameters()
99
- )
100
- transfer_objects = (
101
- observe_transfer_objects + calibration_transfer_objects + parameter_transfer_objects
102
- )
103
- if len(observe_transfer_objects + calibration_transfer_objects) == 0:
104
- raise ValueError("No input dataset frames found to transfer")
106
+ with self.apm_task_step("Format Frame Transfer Items"):
107
+ frame_transfer_items = self.format_frame_transfer_items()
108
+ if not frame_transfer_items:
109
+ raise ValueError("No input dataset frames found")
110
+
111
+ with self.apm_task_step("Format Parameter Transfer Items"):
112
+ parameter_transfer_items = self.format_parameter_transfer_items()
105
113
 
106
114
  with self.apm_task_step("Transfer Input Frames and Parameter Files via Globus"):
107
115
  self.globus_transfer_object_store_to_scratch(
108
- transfer_items=self.format_transfer_items(input_dataset_objects=transfer_objects),
109
- label=f"Transfer Input Objects for Recipe Run {self.recipe_run_id}",
116
+ transfer_items=frame_transfer_items + parameter_transfer_items,
117
+ label=f"Transfer Inputs for Recipe Run {self.recipe_run_id}",
110
118
  )
111
119
 
112
120
  with self.apm_processing_step("Tag Input Frames and Parameter Files"):
113
- self.tag_transfer_objects(input_dataset_objects=transfer_objects)
121
+ self.tag_input_frames(transfer_items=frame_transfer_items)
122
+ self.tag_parameter_objects(transfer_items=parameter_transfer_items)
114
123
 
115
124
  def rollback(self):
116
125
  """Warn that depending on the progress of the task all data may not be removed because it hadn't been tagged."""
@@ -29,6 +29,7 @@ from sunpy.coordinates import Helioprojective
29
29
 
30
30
  from dkist_processing_common.codecs.fits import fits_access_decoder
31
31
  from dkist_processing_common.codecs.fits import fits_hdulist_encoder
32
+ from dkist_processing_common.models.fried_parameter import r0_valid
32
33
  from dkist_processing_common.models.tags import Tag
33
34
  from dkist_processing_common.models.wavelength import WavelengthRange
34
35
  from dkist_processing_common.parsers.l0_fits_access import L0FitsAccess
@@ -295,10 +296,14 @@ class WriteL1Frame(WorkflowTaskBase, MetadataStoreMixin, ABC):
295
296
  header["SOLARRAD"] = self.calculate_solar_angular_radius(obstime=obstime)
296
297
  header["SPECSYS"] = "TOPOCENT" # no wavelength correction made due to doppler velocity
297
298
  header["VELOSYS"] = 0.0 # no wavelength correction made due to doppler velocity
298
- header["WAVEBAND"] = get_closest_spectral_line(wavelength=header["LINEWAV"] * u.nm).name
299
299
  wavelength_range = self.get_wavelength_range(header=header)
300
300
  header["WAVEMIN"] = wavelength_range.min.to_value(u.nm)
301
301
  header["WAVEMAX"] = wavelength_range.max.to_value(u.nm)
302
+ waveband: str | None = self.get_waveband(
303
+ wavelength=header["LINEWAV"] * u.nm, wavelength_range=wavelength_range
304
+ )
305
+ if waveband:
306
+ header["WAVEBAND"] = waveband
302
307
  return header
303
308
 
304
309
  def l1_filename(self, header: fits.Header, stokes: Literal["I", "Q", "U", "V"]):
@@ -369,8 +374,14 @@ class WriteL1Frame(WorkflowTaskBase, MetadataStoreMixin, ABC):
369
374
  """
370
375
  # Replace header values in place
371
376
  header = self.replace_header_values(header=header, data=data)
372
- # Remove r0 value if AO not locked
373
- header = self.remove_invalid_r0_values(header=header)
377
+ # Remove r0 value if r0 conditions are not met
378
+ r0_is_valid = r0_valid(
379
+ r0=header["ATMOS_R0"],
380
+ ao_lock=header.get("AO_LOCK", None),
381
+ num_out_of_bounds_ao_values=header.get("OOBSHIFT", None),
382
+ )
383
+ if not r0_is_valid:
384
+ header.pop("ATMOS_R0", None)
374
385
  # Add the stats table
375
386
  header = self.add_stats_headers(header=header, data=data)
376
387
  # Add the datacenter table
@@ -515,3 +526,18 @@ class WriteL1Frame(WorkflowTaskBase, MetadataStoreMixin, ABC):
515
526
  if header.get("AO_LOCK") is not True:
516
527
  header.pop("ATMOS_R0", None)
517
528
  return header
529
+
530
+ @staticmethod
531
+ def get_waveband(wavelength: u.Quantity, wavelength_range: WavelengthRange) -> str | None:
532
+ """
533
+ Get the spectral line information of the closest spectral line to the wavelength argument.
534
+
535
+ If the spectral line rest wavelength in air does not fall in the wavelength range of the data,
536
+ do not populate the keyword.
537
+ """
538
+ print(wavelength_range)
539
+ closest_line = get_closest_spectral_line(wavelength=wavelength)
540
+ rest_wavelength = closest_line.rest_wavelength_in_air
541
+ if rest_wavelength < wavelength_range.min or rest_wavelength > wavelength_range.max:
542
+ return None
543
+ return closest_line.name
@@ -45,6 +45,7 @@ from dkist_processing_common.models.graphql import RecipeRunStatusResponse
45
45
  from dkist_processing_common.models.tags import Tag
46
46
  from dkist_processing_common.parsers.l0_fits_access import L0FitsAccess
47
47
  from dkist_processing_common.tasks import WorkflowTaskBase
48
+ from dkist_processing_common.tasks.mixin.input_dataset import InputDatasetMixin
48
49
 
49
50
  TILE_SIZE = 64
50
51
 
@@ -358,7 +359,7 @@ class FakeGQLClient:
358
359
  {
359
360
  "parameterValueId": 1,
360
361
  "parameterValue": json.dumps([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
361
- "parameterValueStartDate": datetime(2000, 1, 1).isoformat(),
362
+ "parameterValueStartDate": "2000-01-01",
362
363
  }
363
364
  ],
364
365
  },
@@ -375,7 +376,7 @@ class FakeGQLClient:
375
376
  }
376
377
  }
377
378
  ),
378
- "parameterValueStartDate": datetime(2000, 1, 1).isoformat(),
379
+ "parameterValueStartDate": "2000-01-01",
379
380
  },
380
381
  {
381
382
  "parameterValueId": 3,
@@ -387,7 +388,7 @@ class FakeGQLClient:
387
388
  }
388
389
  }
389
390
  ),
390
- "parameterValueStartDate": datetime(2000, 1, 2).isoformat(),
391
+ "parameterValueStartDate": "2000-01-02",
391
392
  },
392
393
  ],
393
394
  },
@@ -399,7 +400,7 @@ class FakeGQLClient:
399
400
  "parameterValue": json.dumps(
400
401
  {"a": 1, "b": 3.14159, "c": "foo", "d": [1, 2, 3]}
401
402
  ),
402
- "parameterValueStartDate": datetime(2000, 1, 1).isoformat(),
403
+ "parameterValueStartDate": "2000-01-01",
403
404
  }
404
405
  ],
405
406
  },
@@ -795,7 +796,7 @@ def post_fit_polcal_fitter(
795
796
  return fitter
796
797
 
797
798
 
798
- class InputDatasetTask(WorkflowTaskBase):
799
+ class InputDatasetTask(WorkflowTaskBase, InputDatasetMixin):
799
800
  def run(self):
800
801
  pass
801
802
 
@@ -823,7 +824,7 @@ def task_with_input_dataset(
823
824
  task.scratch.workflow_base_path = tmp_path / str(recipe_run_id)
824
825
  for part, tag in input_dataset_parts:
825
826
  file_path = task.scratch.workflow_base_path / Path(f"{uuid4().hex[:6]}.ext")
826
- file_path.write_text(data=json.dumps({"doc_list": part}))
827
+ file_path.write_text(data=json.dumps(part))
827
828
  task.tag(path=file_path, tags=tag)
828
829
  yield task
829
830
 
@@ -850,24 +851,6 @@ def create_parameter_files(
850
851
  task.tag(path=file_path, tags=Tag.parameter(param_path))
851
852
 
852
853
 
853
- def create_input_frames(
854
- task: WorkflowTaskBase,
855
- input_frame_docs: list[dict] = FakeGQLClient.observe_frames_doc_object
856
- + FakeGQLClient.calibration_frames_doc_object,
857
- ):
858
- """
859
- Create the observe and calibration frame files specified in the input dataset documents
860
- returned by the metadata store.
861
- """
862
- for frame in input_frame_docs:
863
- for object_key in frame["object_keys"]:
864
- file_path = task.scratch.workflow_base_path / Path(object_key)
865
- if not file_path.parent.exists():
866
- file_path.parent.mkdir(parents=True, exist_ok=True)
867
- file_path.write_text(data="")
868
- task.tag(path=file_path, tags=[Tag.frame(), Tag.input()])
869
-
870
-
871
854
  @pytest.fixture()
872
855
  def fake_constants_db() -> dict:
873
856
  """
@@ -19,15 +19,10 @@ from astropy.io.fits import CompImageHDU
19
19
  from astropy.io.fits import HDUList
20
20
  from astropy.io.fits import Header
21
21
  from astropy.io.fits import PrimaryHDU
22
- from pydantic import BaseModel
23
- from pydantic import create_model
24
- from pydantic import Field
25
22
 
26
23
  from dkist_processing_common.codecs.asdf import asdf_decoder
27
24
  from dkist_processing_common.codecs.asdf import asdf_encoder
28
25
  from dkist_processing_common.codecs.asdf import asdf_fileobj_encoder
29
- from dkist_processing_common.codecs.basemodel import basemodel_decoder
30
- from dkist_processing_common.codecs.basemodel import basemodel_encoder
31
26
  from dkist_processing_common.codecs.bytes import bytes_decoder
32
27
  from dkist_processing_common.codecs.bytes import bytes_encoder
33
28
  from dkist_processing_common.codecs.fits import fits_access_decoder
@@ -105,14 +100,6 @@ def path_to_json(dictionary, tmp_file) -> Path:
105
100
  return tmp_file
106
101
 
107
102
 
108
- @pytest.fixture
109
- def pydantic_basemodel() -> BaseModel:
110
- class Foo(BaseModel):
111
- bar: int
112
-
113
- return Foo(bar=123)
114
-
115
-
116
103
  @pytest.fixture
117
104
  def string() -> str:
118
105
  return "string"
@@ -369,7 +356,6 @@ class DummyFitsAccess(FitsAccessBase):
369
356
  pytest.param("primary_hdu_list", fits_hdulist_encoder, id="fits uncompressed HDUList"),
370
357
  pytest.param("compressed_hdu_list", fits_hdulist_encoder, id="fits compressed HDUList"),
371
358
  pytest.param("dictionary", json_encoder, id="json"),
372
- pytest.param("pydantic_basemodel", basemodel_encoder, id="pydantic basemodel"),
373
359
  pytest.param("string", str_encoder, id="str"),
374
360
  pytest.param("asdf_tree", asdf_encoder, id="asdf"),
375
361
  pytest.param("asdf_obj", asdf_fileobj_encoder, id="asdf_obj"),
@@ -614,30 +600,6 @@ def test_json_encoder_invalid(python_object: Any, expected_exception_type: type[
614
600
  json_encoder(python_object)
615
601
 
616
602
 
617
- def test_basemodel_decoder(valid_json_codec, path_to_text_file):
618
- """
619
- Given: a python object that can be validated to a Pydantic BaseModel object is written to file as json
620
- When: basemodel decoding is applied to the json file
621
- Then: the string gets decoded to the correct Pydantic BaseModel object
622
- """
623
- # write python object to file as json string
624
- python_object = valid_json_codec["python_object"]
625
- path = path_to_text_file(json.dumps({"foo": python_object}))
626
-
627
- # create basemodel on the fly
628
- DynamicBaseModel = create_model(
629
- "DynamicBaseModel", foo=(Any, Field(default_factory=type(python_object)))
630
- )
631
-
632
- # get the same object via the basemodel decoder
633
- decoded_obj = basemodel_decoder(path, model=DynamicBaseModel)
634
- if python_object is nan:
635
- # By definition, nan != nan
636
- assert isnan(decoded_obj.foo)
637
- else:
638
- assert decoded_obj.foo == python_object
639
-
640
-
641
603
  def test_quality_data_encoder_valid(valid_quality_codec):
642
604
  """
643
605
  Given: a python object that can be encoded as a json string
@@ -0,0 +1,27 @@
1
+ import pytest
2
+
3
+ from dkist_processing_common.models.fried_parameter import r0_valid
4
+
5
+
6
+ @pytest.mark.parametrize(
7
+ "r0, ao_lock, oob_shift, should_r0_exist",
8
+ [
9
+ pytest.param(0.2, True, 17, True, id="AO_LOCK_True_good_R0_good_oob"),
10
+ pytest.param(1, True, 17, False, id="AO_LOCK_True_bad_R0_good_oob"),
11
+ pytest.param(0.2, False, 17, False, id="AO_LOCK_False_good_R0_good_oob"),
12
+ pytest.param(1, False, 17, False, id="AO_LOCK_False_bad_R0_good_oob"),
13
+ pytest.param(0.2, True, 150, False, id="AO_LOCK_True_good_R0_bad_oob"),
14
+ pytest.param(1, True, 150, False, id="AO_LOCK_True_bad_R0_bad_oob"),
15
+ pytest.param(0.2, False, 150, False, id="AO_LOCK_False_good_R0_bad_oob"),
16
+ pytest.param(1, False, 150, False, id="AO_LOCK_False_bad_R0_bad_oob"),
17
+ pytest.param(0.2, None, 17, False, id="AO_LOCK_missing"),
18
+ pytest.param(0.2, True, None, True, id="OOBSHIFT_missing"),
19
+ ],
20
+ )
21
+ def test_check_r0_valid(r0, ao_lock, oob_shift, should_r0_exist):
22
+ """
23
+ :Given: values for r0, the ao_lock status, and the ao out of bound shift value
24
+ :When: checking for a valid state to use r0
25
+ :Then: valid conditions are marked True, invalid conditions marked False
26
+ """
27
+ assert r0_valid(r0, ao_lock, oob_shift) == should_r0_exist