imap-processing 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of imap-processing might be problematic. Click here for more details.

Files changed (99) hide show
  1. imap_processing/_version.py +2 -2
  2. imap_processing/ccsds/excel_to_xtce.py +2 -0
  3. imap_processing/cdf/config/imap_hi_variable_attrs.yaml +100 -1
  4. imap_processing/cdf/config/imap_hit_global_cdf_attrs.yaml +14 -0
  5. imap_processing/cdf/config/imap_hit_l1a_variable_attrs.yaml +63 -1
  6. imap_processing/cdf/config/imap_idex_global_cdf_attrs.yaml +7 -0
  7. imap_processing/cdf/config/imap_idex_l1a_variable_attrs.yaml +574 -231
  8. imap_processing/cdf/config/imap_idex_l1b_variable_attrs.yaml +326 -0
  9. imap_processing/cdf/config/imap_lo_l1a_variable_attrs.yaml +33 -23
  10. imap_processing/cdf/config/imap_ultra_l1b_variable_attrs.yaml +7 -4
  11. imap_processing/cdf/utils.py +3 -5
  12. imap_processing/cli.py +13 -4
  13. imap_processing/codice/codice_l1a.py +5 -5
  14. imap_processing/codice/constants.py +9 -9
  15. imap_processing/codice/decompress.py +6 -2
  16. imap_processing/glows/l1a/glows_l1a.py +1 -2
  17. imap_processing/hi/l1a/hi_l1a.py +4 -4
  18. imap_processing/hi/l1a/histogram.py +106 -108
  19. imap_processing/hi/l1a/science_direct_event.py +91 -224
  20. imap_processing/hi/packet_definitions/TLM_HI_COMBINED_SCI.xml +3994 -0
  21. imap_processing/hit/l0/constants.py +2 -2
  22. imap_processing/hit/l0/decom_hit.py +12 -101
  23. imap_processing/hit/l1a/hit_l1a.py +164 -23
  24. imap_processing/ialirt/l0/process_codicelo.py +153 -0
  25. imap_processing/ialirt/l0/process_hit.py +5 -5
  26. imap_processing/ialirt/packet_definitions/ialirt_codicelo.xml +281 -0
  27. imap_processing/ialirt/process_ephemeris.py +212 -0
  28. imap_processing/idex/idex_l1a.py +55 -75
  29. imap_processing/idex/idex_l1b.py +192 -0
  30. imap_processing/idex/idex_variable_unpacking_and_eu_conversion.csv +33 -0
  31. imap_processing/idex/packet_definitions/idex_packet_definition.xml +97 -595
  32. imap_processing/lo/l0/decompression_tables/decompression_tables.py +16 -0
  33. imap_processing/lo/l0/lo_science.py +44 -12
  34. imap_processing/lo/l1a/lo_l1a.py +76 -8
  35. imap_processing/lo/packet_definitions/lo_xtce.xml +9877 -87
  36. imap_processing/mag/l1a/mag_l1a.py +1 -2
  37. imap_processing/mag/l1a/mag_l1a_data.py +1 -2
  38. imap_processing/mag/l1b/mag_l1b.py +2 -1
  39. imap_processing/spice/geometry.py +37 -19
  40. imap_processing/spice/time.py +144 -2
  41. imap_processing/swapi/l1/swapi_l1.py +3 -3
  42. imap_processing/swapi/packet_definitions/swapi_packet_definition.xml +1535 -446
  43. imap_processing/swe/l2/swe_l2.py +134 -17
  44. imap_processing/tests/ccsds/test_data/expected_output.xml +1 -1
  45. imap_processing/tests/codice/test_codice_l1a.py +8 -8
  46. imap_processing/tests/codice/test_decompress.py +4 -4
  47. imap_processing/tests/conftest.py +46 -43
  48. imap_processing/tests/hi/test_data/l0/H90_NHK_20241104.bin +0 -0
  49. imap_processing/tests/hi/test_data/l0/H90_sci_cnt_20241104.bin +0 -0
  50. imap_processing/tests/hi/test_data/l0/H90_sci_de_20241104.bin +0 -0
  51. imap_processing/tests/hi/test_hi_l1b.py +2 -2
  52. imap_processing/tests/hi/test_l1a.py +31 -58
  53. imap_processing/tests/hi/test_science_direct_event.py +58 -0
  54. imap_processing/tests/hit/test_data/sci_sample1.ccsds +0 -0
  55. imap_processing/tests/hit/test_decom_hit.py +60 -50
  56. imap_processing/tests/hit/test_hit_l1a.py +327 -12
  57. imap_processing/tests/hit/test_hit_l1b.py +76 -0
  58. imap_processing/tests/hit/validation_data/hskp_sample_eu.csv +89 -0
  59. imap_processing/tests/hit/validation_data/sci_sample_raw1.csv +29 -0
  60. imap_processing/tests/ialirt/test_data/l0/apid01152.tlm +0 -0
  61. imap_processing/tests/ialirt/test_data/l0/imap_codice_l1a_lo-ialirt_20241110193700_v0.0.0.cdf +0 -0
  62. imap_processing/tests/ialirt/unit/test_process_codicelo.py +106 -0
  63. imap_processing/tests/ialirt/unit/test_process_ephemeris.py +109 -0
  64. imap_processing/tests/ialirt/unit/test_process_hit.py +9 -6
  65. imap_processing/tests/idex/conftest.py +1 -1
  66. imap_processing/tests/idex/test_idex_l0.py +1 -1
  67. imap_processing/tests/idex/test_idex_l1a.py +7 -1
  68. imap_processing/tests/idex/test_idex_l1b.py +126 -0
  69. imap_processing/tests/lo/test_lo_l1a.py +7 -16
  70. imap_processing/tests/lo/test_lo_science.py +67 -3
  71. imap_processing/tests/lo/test_pkts/imap_lo_l0_raw_20240803_v002.pkts +0 -0
  72. imap_processing/tests/lo/validation_data/Instrument_FM1_T104_R129_20240803_ILO_SCI_DE_dec_DN_with_fills.csv +1999 -0
  73. imap_processing/tests/mag/test_mag_l1b.py +39 -5
  74. imap_processing/tests/spice/test_geometry.py +32 -6
  75. imap_processing/tests/spice/test_time.py +135 -6
  76. imap_processing/tests/swapi/test_swapi_decom.py +75 -69
  77. imap_processing/tests/swapi/test_swapi_l1.py +4 -4
  78. imap_processing/tests/swe/test_swe_l2.py +64 -8
  79. imap_processing/tests/test_utils.py +1 -1
  80. imap_processing/tests/ultra/test_data/l0/ultra45_raw_sc_ultrarawimg_withFSWcalcs_FM45_40P_Phi28p5_BeamCal_LinearScan_phi2850_theta-000_20240207T102740.csv +3314 -3314
  81. imap_processing/tests/ultra/unit/test_de.py +8 -3
  82. imap_processing/tests/ultra/unit/test_spatial_utils.py +125 -0
  83. imap_processing/tests/ultra/unit/test_ultra_l1b_extended.py +39 -29
  84. imap_processing/tests/ultra/unit/test_ultra_l1c_pset_bins.py +2 -25
  85. imap_processing/ultra/constants.py +4 -0
  86. imap_processing/ultra/l1b/de.py +8 -14
  87. imap_processing/ultra/l1b/ultra_l1b_extended.py +29 -70
  88. imap_processing/ultra/l1c/ultra_l1c_pset_bins.py +1 -36
  89. imap_processing/ultra/utils/spatial_utils.py +221 -0
  90. {imap_processing-0.8.0.dist-info → imap_processing-0.9.0.dist-info}/METADATA +1 -1
  91. {imap_processing-0.8.0.dist-info → imap_processing-0.9.0.dist-info}/RECORD +94 -76
  92. imap_processing/hi/l0/__init__.py +0 -0
  93. imap_processing/hi/l0/decom_hi.py +0 -24
  94. imap_processing/hi/packet_definitions/hi_packet_definition.xml +0 -482
  95. imap_processing/tests/hi/test_decom.py +0 -55
  96. imap_processing/tests/hi/test_l1a_sci_de.py +0 -72
  97. {imap_processing-0.8.0.dist-info → imap_processing-0.9.0.dist-info}/LICENSE +0 -0
  98. {imap_processing-0.8.0.dist-info → imap_processing-0.9.0.dist-info}/WHEEL +0 -0
  99. {imap_processing-0.8.0.dist-info → imap_processing-0.9.0.dist-info}/entry_points.txt +0 -0
@@ -9,8 +9,8 @@ MOD_10_MAPPING = {
9
9
  0: {"species": "H", "energy_min": 1.8, "energy_max": 3.6},
10
10
  1: {"species": "H", "energy_min": 4, "energy_max": 6},
11
11
  2: {"species": "H", "energy_min": 6, "energy_max": 10},
12
- 3: {"species": "4He", "energy_min": 4, "energy_max": 6},
13
- 4: {"species": "4He", "energy_min": 6, "energy_max": 12},
12
+ 3: {"species": "He4", "energy_min": 4, "energy_max": 6},
13
+ 4: {"species": "He4", "energy_min": 6, "energy_max": 12},
14
14
  5: {"species": "CNO", "energy_min": 4, "energy_max": 6},
15
15
  6: {"species": "CNO", "energy_min": 6, "energy_max": 12},
16
16
  7: {"species": "NeMgSi", "energy_min": 4, "energy_max": 6},
@@ -9,106 +9,10 @@ from imap_processing.hit.l0.constants import (
9
9
  FLAG_PATTERN,
10
10
  FRAME_SIZE,
11
11
  MANTISSA_BITS,
12
- MOD_10_MAPPING,
13
12
  )
14
13
  from imap_processing.utils import convert_to_binary_string
15
14
 
16
15
 
17
- def subcom_sectorates(sci_dataset: xr.Dataset) -> None:
18
- """
19
- Subcommutate sectorates data.
20
-
21
- Sector rates data contains rates for 5 species and 10
22
- energy ranges. This function subcommutates the sector
23
- rates data by organizing the rates by species. Which
24
- species and energy range the data belongs to is determined
25
- by taking the mod 10 value of the corresponding header
26
- minute count value in the dataset. A mapping of mod 10
27
- values to species and energy ranges is provided in constants.py.
28
-
29
- MOD_10_MAPPING = {
30
- 0: {"species": "H", "energy_min": 1.8, "energy_max": 3.6},
31
- 1: {"species": "H", "energy_min": 4, "energy_max": 6},
32
- 2: {"species": "H", "energy_min": 6, "energy_max": 10},
33
- 3: {"species": "4He", "energy_min": 4, "energy_max": 6},
34
- ...
35
- 9: {"species": "Fe", "energy_min": 4, "energy_max": 12}}
36
-
37
- The data is added to the dataset as new data fields named
38
- according to their species. They have 4 dimensions: epoch
39
- energy index, declination, and azimuth. The energy index
40
- dimension is used to distinguish between the different energy
41
- ranges the data belongs to. The energy min and max values for
42
- each species are also added to the dataset as new data fields.
43
-
44
- Parameters
45
- ----------
46
- sci_dataset : xr.Dataset
47
- Xarray dataset containing parsed HIT science data.
48
- """
49
- # TODO:
50
- # - Update to use fill values defined in attribute manager which
51
- # isn't passed into this module nor defined for L1A sci data yet
52
- # - Determine naming convention for species data fields in dataset
53
- # (i.e. h, H, hydrogen, Hydrogen, etc.)
54
- # - Remove raw "sectorates" data from dataset after processing is complete?
55
- # - consider moving this function to hit_l1a.py
56
-
57
- # Calculate mod 10 values
58
- hdr_min_count_mod_10 = sci_dataset.hdr_minute_cnt.values % 10
59
-
60
- # Reference mod 10 mapping to initialize data structure for species and
61
- # energy ranges and add 8x15 arrays with fill values for each science frame.
62
- num_frames = len(hdr_min_count_mod_10)
63
- data_by_species_and_energy_range = {
64
- key: {**value, "rates": np.full((num_frames, 8, 15), fill_value=np.nan)}
65
- for key, value in MOD_10_MAPPING.items()
66
- }
67
-
68
- # Update rates for science frames where data is available
69
- for i, mod_10 in enumerate(hdr_min_count_mod_10):
70
- data_by_species_and_energy_range[mod_10]["rates"][i] = sci_dataset[
71
- "sectorates"
72
- ].values[i]
73
-
74
- # H has 3 energy ranges, 4He, CNO, NeMgSi have 2, and Fe has 1.
75
- # Aggregate sector rates and energy min/max values for each species.
76
- # First, initialize dictionaries to store rates and min/max energy values by species
77
- data_by_species: dict = {
78
- value["species"]: {"rates": [], "energy_min": [], "energy_max": []}
79
- for value in data_by_species_and_energy_range.values()
80
- }
81
-
82
- for value in data_by_species_and_energy_range.values():
83
- species = value["species"]
84
- data_by_species[species]["rates"].append(value["rates"])
85
- data_by_species[species]["energy_min"].append(value["energy_min"])
86
- data_by_species[species]["energy_max"].append(value["energy_max"])
87
-
88
- # Add sector rates by species to the dataset
89
- for species, data in data_by_species.items():
90
- # Rates data has shape: energy_index, epoch, declination, azimuth
91
- # Convert rates to numpy array and transpose axes to get
92
- # shape: epoch, energy_index, declination, azimuth
93
- rates_data = np.transpose(np.array(data["rates"]), axes=(1, 0, 2, 3))
94
-
95
- sci_dataset[species] = xr.DataArray(
96
- data=rates_data,
97
- dims=["epoch", f"{species}_energy_index", "declination", "azimuth"],
98
- name=species,
99
- )
100
- sci_dataset[f"{species}_energy_min"] = xr.DataArray(
101
- data=np.array(data["energy_min"]),
102
- dims=[f"{species}_energy_index"],
103
- name=f"{species}_energy_min",
104
- )
105
- sci_dataset[f"{species}_energy_max"] = xr.DataArray(
106
- data=np.array(data["energy_max"]),
107
- dims=[f"{species}_energy_index"],
108
- name=f"{species}_energy_max",
109
- )
110
-
111
-
112
16
  def parse_data(bin_str: str, bits_per_index: int, start: int, end: int) -> list:
113
17
  """
114
18
  Parse binary data.
@@ -197,6 +101,16 @@ def parse_count_rates(sci_dataset: xr.Dataset) -> None:
197
101
  dims = ["epoch"]
198
102
 
199
103
  sci_dataset[field] = xr.DataArray(parsed_data, dims=dims, name=field)
104
+ # Add dimensions to coordinates
105
+ # TODO: confirm that dtype int16 is correct
106
+ for dim in dims:
107
+ if dim not in sci_dataset.coords:
108
+ sci_dataset.coords[dim] = xr.DataArray(
109
+ np.arange(sci_dataset.sizes[dim], dtype=np.int16),
110
+ dims=[dim],
111
+ name=dim,
112
+ )
113
+
200
114
  # increment the start of the next section of data to parse
201
115
  section_start += field_meta.section_length
202
116
 
@@ -495,10 +409,7 @@ def decom_hit(sci_dataset: xr.Dataset) -> xr.Dataset:
495
409
  # Parse count rates data from binary and add to dataset
496
410
  parse_count_rates(sci_dataset)
497
411
 
498
- # Further organize sector rates by species type
499
- subcom_sectorates(sci_dataset)
500
-
501
- # TODO:
502
- # -clean up dataset - remove raw binary data, raw sectorates? Any other fields?
412
+ # Remove raw binary data and unused spare bits from dataset
413
+ sci_dataset = sci_dataset.drop_vars(["count_rates_raw", "science_data", "spare"])
503
414
 
504
415
  return sci_dataset
@@ -2,6 +2,7 @@
2
2
 
3
3
  import logging
4
4
 
5
+ import numpy as np
5
6
  import xarray as xr
6
7
 
7
8
  from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
@@ -11,6 +12,7 @@ from imap_processing.hit.hit_utils import (
11
12
  get_datasets_by_apid,
12
13
  process_housekeeping_data,
13
14
  )
15
+ from imap_processing.hit.l0.constants import MOD_10_MAPPING
14
16
  from imap_processing.hit.l0.decom_hit import decom_hit
15
17
 
16
18
  logger = logging.getLogger(__name__)
@@ -40,26 +42,131 @@ def hit_l1a(packet_file: str, data_version: str) -> list[xr.Dataset]:
40
42
  # Create the attribute manager for this data level
41
43
  attr_mgr = get_attribute_manager(data_version, "l1a")
42
44
 
45
+ l1a_datasets = []
46
+
43
47
  # Process l1a data products
44
48
  if HitAPID.HIT_HSKP in datasets_by_apid:
45
49
  logger.info("Creating HIT L1A housekeeping dataset")
46
- datasets_by_apid[HitAPID.HIT_HSKP] = process_housekeeping_data(
47
- datasets_by_apid[HitAPID.HIT_HSKP], attr_mgr, "imap_hit_l1a_hk"
50
+ l1a_datasets.append(
51
+ process_housekeeping_data(
52
+ datasets_by_apid[HitAPID.HIT_HSKP], attr_mgr, "imap_hit_l1a_hk"
53
+ )
48
54
  )
49
-
50
55
  if HitAPID.HIT_SCIENCE in datasets_by_apid:
51
- # TODO complete science data processing
52
- print("Skipping science data for now")
53
- datasets_by_apid[HitAPID.HIT_SCIENCE] = process_science(
54
- datasets_by_apid[HitAPID.HIT_SCIENCE], attr_mgr
56
+ l1a_datasets.extend(
57
+ process_science(datasets_by_apid[HitAPID.HIT_SCIENCE], attr_mgr)
55
58
  )
59
+ return l1a_datasets
60
+
61
+
62
+ def subcom_sectorates(sci_dataset: xr.Dataset) -> None:
63
+ """
64
+ Subcommutate sectorates data.
65
+
66
+ Sector rates data contains rates for 5 species and 10
67
+ energy ranges. This function subcommutates the sector
68
+ rates data by organizing the rates by species. Which
69
+ species and energy range the data belongs to is determined
70
+ by taking the mod 10 value of the corresponding header
71
+ minute count value in the dataset. A mapping of mod 10
72
+ values to species and energy ranges is provided in constants.py.
73
+
74
+ MOD_10_MAPPING = {
75
+ 0: {"species": "H", "energy_min": 1.8, "energy_max": 3.6},
76
+ 1: {"species": "H", "energy_min": 4, "energy_max": 6},
77
+ 2: {"species": "H", "energy_min": 6, "energy_max": 10},
78
+ 3: {"species": "4He", "energy_min": 4, "energy_max": 6},
79
+ ...
80
+ 9: {"species": "Fe", "energy_min": 4, "energy_max": 12}}
81
+
82
+ The data is added to the dataset as new data fields named
83
+ according to their species. They have 4 dimensions: epoch
84
+ energy index, declination, and azimuth. The energy index
85
+ dimension is used to distinguish between the different energy
86
+ ranges the data belongs to. The energy min and max values for
87
+ each species are also added to the dataset as new data fields.
56
88
 
57
- return list(datasets_by_apid.values())
89
+ Parameters
90
+ ----------
91
+ sci_dataset : xarray.Dataset
92
+ Xarray dataset containing parsed HIT science data.
93
+ """
94
+ # TODO:
95
+ # - Update to use fill values defined in attribute manager which
96
+ # isn't defined for L1A science data yet
97
+ # - fix issues with fe_counts_sectored. The array has shape
98
+ # (epoch: 28, fe_energy_index: 1, declination: 8, azimuth: 15),
99
+ # but cdflib drops second dimension of size 1 and recognizes
100
+ # only 3 total dimensions. Are dimensions of 1 ignored?
101
+
102
+ # Calculate mod 10 values
103
+ hdr_min_count_mod_10 = sci_dataset.hdr_minute_cnt.values % 10
104
+
105
+ # Reference mod 10 mapping to initialize data structure for species and
106
+ # energy ranges and add 8x15 arrays with fill values for each science frame.
107
+ num_frames = len(hdr_min_count_mod_10)
108
+ # TODO: add more specific dtype for rates (ex. int16) once this is defined by HIT
109
+ data_by_species_and_energy_range = {
110
+ key: {**value, "rates": np.full((num_frames, 8, 15), fill_value=-1, dtype=int)}
111
+ for key, value in MOD_10_MAPPING.items()
112
+ }
113
+
114
+ # Update rates for science frames where data is available
115
+ for i, mod_10 in enumerate(hdr_min_count_mod_10):
116
+ data_by_species_and_energy_range[mod_10]["rates"][i] = sci_dataset[
117
+ "sectorates"
118
+ ].values[i]
119
+
120
+ # H has 3 energy ranges, 4He, CNO, NeMgSi have 2, and Fe has 1.
121
+ # Aggregate sector rates and energy min/max values for each species.
122
+ # First, initialize dictionaries to store rates and min/max energy values by species
123
+ data_by_species: dict = {
124
+ value["species"]: {"rates": [], "energy_min": [], "energy_max": []}
125
+ for value in data_by_species_and_energy_range.values()
126
+ }
127
+
128
+ for value in data_by_species_and_energy_range.values():
129
+ species = value["species"]
130
+ data_by_species[species]["rates"].append(value["rates"])
131
+ data_by_species[species]["energy_min"].append(value["energy_min"])
132
+ data_by_species[species]["energy_max"].append(value["energy_max"])
133
+
134
+ # Add sector rates by species to the dataset
135
+ for species_type, data in data_by_species.items():
136
+ # Rates data has shape: energy_index, epoch, declination, azimuth
137
+ # Convert rates to numpy array and transpose axes to get
138
+ # shape: epoch, energy_index, declination, azimuth
139
+ rates_data = np.transpose(np.array(data["rates"]), axes=(1, 0, 2, 3))
140
+
141
+ species = species_type.lower()
142
+ sci_dataset[f"{species}_counts_sectored"] = xr.DataArray(
143
+ data=rates_data,
144
+ dims=["epoch", f"{species}_energy_index", "declination", "azimuth"],
145
+ name=f"{species}_counts_sectored",
146
+ )
147
+ sci_dataset[f"{species}_energy_min"] = xr.DataArray(
148
+ data=np.array(data["energy_min"], dtype=np.int8),
149
+ dims=[f"{species}_energy_index"],
150
+ name=f"{species}_energy_min",
151
+ )
152
+ sci_dataset[f"{species}_energy_max"] = xr.DataArray(
153
+ data=np.array(data["energy_max"], dtype=np.int8),
154
+ dims=[f"{species}_energy_index"],
155
+ name=f"{species}_energy_max",
156
+ )
157
+ # add energy index coordinate to the dataset
158
+ sci_dataset.coords[f"{species}_energy_index"] = xr.DataArray(
159
+ np.arange(sci_dataset.sizes[f"{species}_energy_index"], dtype=np.int8),
160
+ dims=[f"{species}_energy_index"],
161
+ name=f"{species}_energy_index",
162
+ )
58
163
 
59
164
 
60
- def process_science(dataset: xr.Dataset, attr_mgr: ImapCdfAttributes) -> xr.Dataset:
165
+ def process_science(
166
+ dataset: xr.Dataset, attr_mgr: ImapCdfAttributes
167
+ ) -> list[xr.Dataset]:
61
168
  """
62
- Will process science dataset for CDF product.
169
+ Will process science datasets for CDF products.
63
170
 
64
171
  Process binary science data for CDF creation. The data is
65
172
  grouped into science frames, decommutated and decompressed,
@@ -70,30 +177,64 @@ def process_science(dataset: xr.Dataset, attr_mgr: ImapCdfAttributes) -> xr.Data
70
177
  Parameters
71
178
  ----------
72
179
  dataset : xarray.Dataset
73
- Dataset containing HIT science data.
180
+ A dataset containing HIT science data.
74
181
 
75
182
  attr_mgr : ImapCdfAttributes
76
183
  Attribute manager used to get the data product field's attributes.
77
184
 
78
185
  Returns
79
186
  -------
80
- dataset : xarray.Dataset
81
- An updated dataset ready for CDF conversion.
187
+ dataset : list
188
+ A list of science datasets ready for CDF conversion.
82
189
  """
83
190
  logger.info("Creating HIT L1A science datasets")
84
191
 
85
- # Logical sources for the two products.
86
- # logical_sources = ["imap_hit_l1a_count-rates", "imap_hit_l1a_pulse-height-event"]
87
-
88
192
  # Decommutate and decompress the science data
89
193
  sci_dataset = decom_hit(dataset)
90
194
 
91
- # TODO: Complete this function
92
- # - split the science data into count rates and event datasets
93
- # - update dimensions and add attributes to the dataset and data arrays
94
- # - return list of two datasets (count rates and events)?
195
+ # Organize sector rates by species type
196
+ subcom_sectorates(sci_dataset)
95
197
 
96
- # logger.info("HIT L1A event dataset created")
97
- # logger.info("HIT L1A count rates dataset created")
198
+ # Split the science data into count rates and event datasets
199
+ pha_raw_dataset = xr.Dataset(
200
+ {"pha_raw": sci_dataset["pha_raw"]}, coords={"epoch": sci_dataset["epoch"]}
201
+ )
202
+ count_rates_dataset = sci_dataset.drop_vars("pha_raw")
98
203
 
99
- return sci_dataset
204
+ # Logical sources for the two products.
205
+ logical_sources = ["imap_hit_l1a_count-rates", "imap_hit_l1a_pulse-height-events"]
206
+
207
+ datasets = []
208
+ # Update attributes and dimensions
209
+ for dataset, logical_source in zip(
210
+ [count_rates_dataset, pha_raw_dataset], logical_sources
211
+ ):
212
+ dataset.attrs = attr_mgr.get_global_attributes(logical_source)
213
+
214
+ # TODO: Add CDF attributes to yaml once they're defined for L1A science data
215
+ # Assign attributes and dimensions to each data array in the Dataset
216
+ for field in dataset.data_vars.keys():
217
+ try:
218
+ # Create a dict of dimensions using the DEPEND_I keys in the
219
+ # attributes
220
+ dims = {
221
+ key: value
222
+ for key, value in attr_mgr.get_variable_attributes(field).items()
223
+ if "DEPEND" in key
224
+ }
225
+ dataset[field].attrs = attr_mgr.get_variable_attributes(field)
226
+ dataset[field].assign_coords(dims)
227
+ except KeyError:
228
+ print(f"Field {field} not found in attribute manager.")
229
+ logger.warning(f"Field {field} not found in attribute manager.")
230
+
231
+ dataset.epoch.attrs = attr_mgr.get_variable_attributes("epoch")
232
+ # Remove DEPEND_0 attribute from epoch variable added by attr_mgr.
233
+ # Not required for epoch
234
+ del dataset["epoch"].attrs["DEPEND_0"]
235
+
236
+ datasets.append(dataset)
237
+
238
+ logger.info(f"HIT L1A dataset created for {logical_source}")
239
+
240
+ return datasets
@@ -0,0 +1,153 @@
1
+ """Functions to support I-ALiRT CoDICE Lo processing."""
2
+
3
+ import logging
4
+ from typing import Any
5
+
6
+ import numpy as np
7
+ import xarray as xr
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def find_groups(data: xr.Dataset) -> xr.Dataset:
13
+ """
14
+ Find all occurrences of the sequential set of 233 values 0-232.
15
+
16
+ If a value is missing, or we are starting/ending
17
+ in the middle of a sequence we do not count that as a valid group.
18
+
19
+ Parameters
20
+ ----------
21
+ data : xr.Dataset
22
+ CoDICE Lo Dataset.
23
+
24
+ Returns
25
+ -------
26
+ grouped_data : xr.Dataset
27
+ Grouped data.
28
+ """
29
+ subcom_range = (0, 232)
30
+
31
+ data = data.sortby("cod_lo_acq", ascending=True)
32
+
33
+ # Use cod_lo_counter == 0 to define the beginning of the group.
34
+ # Find cod_lo_acq at this index and use it as the beginning time for the group.
35
+ start_sc_ticks = data["cod_lo_acq"][(data["cod_lo_counter"] == subcom_range[0])]
36
+ start_sc_tick = start_sc_ticks.min()
37
+ # Use cod_lo_counter == 232 to define the end of the group.
38
+ last_sc_ticks = data["cod_lo_acq"][
39
+ ([data["cod_lo_counter"] == subcom_range[-1]][-1])
40
+ ]
41
+ last_sc_tick = last_sc_ticks.max()
42
+
43
+ # Filter out data before the first cod_lo_counter=0 and
44
+ # after the last cod_lo_counter=232.
45
+ grouped_data = data.where(
46
+ (data["cod_lo_acq"] >= start_sc_tick) & (data["cod_lo_acq"] <= last_sc_tick),
47
+ drop=True,
48
+ )
49
+
50
+ # Assign labels based on the cod_lo_acq times.
51
+ group_labels = np.searchsorted(
52
+ start_sc_ticks, grouped_data["cod_lo_acq"], side="right"
53
+ )
54
+ # Example:
55
+ # grouped_data.coords
56
+ # Coordinates:
57
+ # * epoch (epoch) int64 7kB 315922822184000000 ... 315923721184000000
58
+ # * group (group) int64 7kB 1 1 1 1 1 1 1 1 1 ... 15 15 15 15 15 15 15 15 15
59
+ grouped_data["group"] = ("group", group_labels)
60
+
61
+ return grouped_data
62
+
63
+
64
+ def append_cod_lo_data(dataset: xr.Dataset) -> xr.Dataset:
65
+ """
66
+ Append the cod_lo_## data values and create an xarray.
67
+
68
+ Parameters
69
+ ----------
70
+ dataset : xr.Dataset
71
+ Original dataset of group.
72
+
73
+ Returns
74
+ -------
75
+ appended_dataset : xr.Dataset
76
+ Dataset with cod_lo_## stacked.
77
+ """
78
+ # Number of codice lo data rows
79
+ num_cod_lo_rows = 15
80
+ cod_lo_data = np.stack(
81
+ [dataset[f"cod_lo_data_{i:02}"].values for i in range(num_cod_lo_rows)], axis=1
82
+ )
83
+
84
+ repeated_data = {
85
+ var: np.repeat(dataset[var].values, num_cod_lo_rows)
86
+ for var in dataset.data_vars
87
+ if not var.startswith("cod_lo_data_")
88
+ }
89
+
90
+ repeated_data["cod_lo_appended"] = cod_lo_data.flatten()
91
+ repeated_epoch = np.repeat(dataset["epoch"].values, num_cod_lo_rows)
92
+
93
+ appended_dataset = xr.Dataset(
94
+ data_vars={name: ("epoch", values) for name, values in repeated_data.items()},
95
+ coords={"epoch": repeated_epoch},
96
+ )
97
+
98
+ return appended_dataset
99
+
100
+
101
+ def process_codicelo(xarray_data: xr.Dataset) -> list[dict]:
102
+ """
103
+ Create final data products.
104
+
105
+ Parameters
106
+ ----------
107
+ xarray_data : xr.Dataset
108
+ Parsed data.
109
+
110
+ Returns
111
+ -------
112
+ codicelo_data : list[dict]
113
+ Dictionary of final data product.
114
+
115
+ Notes
116
+ -----
117
+ This function is incomplete and will need to be updated to include the
118
+ necessary calculations and data products.
119
+ - Calculate species counts (pg 27 of Algorithm Document)
120
+ - Calculate rates (assume 4 minutes per group)
121
+ - Calculate L2 CoDICE pseudodensities (pg 37 of Algorithm Document)
122
+ - Calculate the public data products
123
+ """
124
+ grouped_data = find_groups(xarray_data)
125
+ unique_groups = np.unique(grouped_data["group"])
126
+ codicelo_data: list[dict[str, Any]] = [{}]
127
+
128
+ for group in unique_groups:
129
+ # cod_lo_counter values for the group should be 0-232 with no duplicates.
130
+ subcom_values = grouped_data["cod_lo_counter"][
131
+ (grouped_data["group"] == group).values
132
+ ]
133
+
134
+ # Ensure no duplicates and all values from 0 to 232 are present
135
+ if not np.array_equal(subcom_values, np.arange(233)):
136
+ logger.warning(
137
+ f"Group {group} does not contain all values from 0 to "
138
+ f"232 without duplicates."
139
+ )
140
+ continue
141
+
142
+ mask = grouped_data["group"] == group
143
+ filtered_indices = np.where(mask)[0]
144
+ group_data = grouped_data.isel(epoch=filtered_indices)
145
+
146
+ append_cod_lo_data(group_data)
147
+
148
+ # TODO: calculate species counts
149
+ # TODO: calculate rates
150
+ # TODO: calculate L2 CoDICE pseudodensities
151
+ # TODO: calculate the public data products
152
+
153
+ return codicelo_data
@@ -161,13 +161,12 @@ def process_hit(xarray_data: xr.Dataset) -> list[dict]:
161
161
 
162
162
  Parameters
163
163
  ----------
164
- xarray_data : dict(xr.Dataset)
165
- Dictionary of xarray data including a single
166
- set for processing.
164
+ xarray_data : xr.Dataset
165
+ Parsed data.
167
166
 
168
167
  Returns
169
168
  -------
170
- hit_data : dict
169
+ hit_data : list[dict]
171
170
  Dictionary final data product.
172
171
  """
173
172
  hit_data = []
@@ -182,10 +181,11 @@ def process_hit(xarray_data: xr.Dataset) -> list[dict]:
182
181
 
183
182
  # Ensure no duplicates and all values from 0 to 59 are present
184
183
  if not np.array_equal(subcom_values, np.arange(60)):
185
- raise ValueError(
184
+ logger.warning(
186
185
  f"Group {group} does not contain all values from 0 to "
187
186
  f"59 without duplicates."
188
187
  )
188
+ continue
189
189
 
190
190
  fast_rate_1 = grouped_data["hit_fast_rate_1"][
191
191
  (grouped_data["group"] == group).values