imap-processing 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of imap-processing might be problematic. Click here for more details.

Files changed (104) hide show
  1. imap_processing/_version.py +2 -2
  2. imap_processing/ancillary/ancillary_dataset_combiner.py +161 -1
  3. imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml +301 -274
  4. imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml +28 -28
  5. imap_processing/cdf/config/imap_codice_l2_variable_attrs.yaml +1044 -203
  6. imap_processing/cdf/config/imap_constant_attrs.yaml +4 -2
  7. imap_processing/cdf/config/imap_glows_l1b_variable_attrs.yaml +12 -0
  8. imap_processing/cdf/config/imap_hi_global_cdf_attrs.yaml +5 -0
  9. imap_processing/cdf/config/imap_hit_global_cdf_attrs.yaml +10 -4
  10. imap_processing/cdf/config/imap_idex_l2a_variable_attrs.yaml +33 -4
  11. imap_processing/cdf/config/imap_idex_l2b_variable_attrs.yaml +8 -91
  12. imap_processing/cdf/config/imap_idex_l2c_variable_attrs.yaml +106 -16
  13. imap_processing/cdf/config/imap_lo_l1a_variable_attrs.yaml +4 -15
  14. imap_processing/cdf/config/imap_lo_l1c_variable_attrs.yaml +189 -98
  15. imap_processing/cdf/config/imap_mag_global_cdf_attrs.yaml +85 -2
  16. imap_processing/cdf/config/imap_mag_l1c_variable_attrs.yaml +24 -1
  17. imap_processing/cdf/config/imap_ultra_l1b_variable_attrs.yaml +12 -4
  18. imap_processing/cdf/config/imap_ultra_l1c_variable_attrs.yaml +50 -7
  19. imap_processing/cli.py +95 -41
  20. imap_processing/codice/codice_l1a.py +131 -31
  21. imap_processing/codice/codice_l2.py +118 -10
  22. imap_processing/codice/constants.py +740 -595
  23. imap_processing/decom.py +1 -4
  24. imap_processing/ena_maps/ena_maps.py +32 -25
  25. imap_processing/ena_maps/utils/naming.py +8 -2
  26. imap_processing/glows/ancillary/imap_glows_exclusions-by-instr-team_20250923_v002.dat +10 -0
  27. imap_processing/glows/ancillary/imap_glows_map-of-excluded-regions_20250923_v002.dat +393 -0
  28. imap_processing/glows/ancillary/imap_glows_map-of-uv-sources_20250923_v002.dat +593 -0
  29. imap_processing/glows/ancillary/imap_glows_pipeline_settings_20250923_v002.json +54 -0
  30. imap_processing/glows/ancillary/imap_glows_suspected-transients_20250923_v002.dat +10 -0
  31. imap_processing/glows/l1b/glows_l1b.py +99 -9
  32. imap_processing/glows/l1b/glows_l1b_data.py +350 -38
  33. imap_processing/glows/l2/glows_l2.py +11 -0
  34. imap_processing/hi/hi_l1a.py +124 -3
  35. imap_processing/hi/hi_l1b.py +154 -71
  36. imap_processing/hi/hi_l2.py +84 -51
  37. imap_processing/hi/utils.py +153 -8
  38. imap_processing/hit/l0/constants.py +3 -0
  39. imap_processing/hit/l0/decom_hit.py +3 -6
  40. imap_processing/hit/l1a/hit_l1a.py +311 -21
  41. imap_processing/hit/l1b/hit_l1b.py +54 -126
  42. imap_processing/hit/l2/hit_l2.py +6 -6
  43. imap_processing/ialirt/calculate_ingest.py +219 -0
  44. imap_processing/ialirt/constants.py +12 -2
  45. imap_processing/ialirt/generate_coverage.py +15 -2
  46. imap_processing/ialirt/l0/ialirt_spice.py +5 -2
  47. imap_processing/ialirt/l0/parse_mag.py +293 -42
  48. imap_processing/ialirt/l0/process_hit.py +5 -3
  49. imap_processing/ialirt/l0/process_swapi.py +41 -25
  50. imap_processing/ialirt/process_ephemeris.py +70 -14
  51. imap_processing/idex/idex_l0.py +2 -2
  52. imap_processing/idex/idex_l1a.py +2 -3
  53. imap_processing/idex/idex_l1b.py +2 -3
  54. imap_processing/idex/idex_l2a.py +130 -4
  55. imap_processing/idex/idex_l2b.py +158 -143
  56. imap_processing/idex/idex_utils.py +1 -3
  57. imap_processing/lo/l0/lo_science.py +25 -24
  58. imap_processing/lo/l1b/lo_l1b.py +3 -3
  59. imap_processing/lo/l1c/lo_l1c.py +116 -50
  60. imap_processing/lo/l2/lo_l2.py +29 -29
  61. imap_processing/lo/lo_ancillary.py +55 -0
  62. imap_processing/mag/l1a/mag_l1a.py +1 -0
  63. imap_processing/mag/l1a/mag_l1a_data.py +26 -0
  64. imap_processing/mag/l1b/mag_l1b.py +3 -2
  65. imap_processing/mag/l1c/interpolation_methods.py +14 -15
  66. imap_processing/mag/l1c/mag_l1c.py +23 -6
  67. imap_processing/mag/l1d/mag_l1d.py +57 -14
  68. imap_processing/mag/l1d/mag_l1d_data.py +167 -30
  69. imap_processing/mag/l2/mag_l2_data.py +10 -2
  70. imap_processing/quality_flags.py +9 -1
  71. imap_processing/spice/geometry.py +76 -33
  72. imap_processing/spice/pointing_frame.py +0 -6
  73. imap_processing/spice/repoint.py +29 -2
  74. imap_processing/spice/spin.py +28 -8
  75. imap_processing/spice/time.py +12 -22
  76. imap_processing/swapi/l1/swapi_l1.py +10 -4
  77. imap_processing/swapi/l2/swapi_l2.py +15 -17
  78. imap_processing/swe/l1b/swe_l1b.py +1 -2
  79. imap_processing/ultra/constants.py +1 -24
  80. imap_processing/ultra/l0/ultra_utils.py +9 -11
  81. imap_processing/ultra/l1a/ultra_l1a.py +1 -2
  82. imap_processing/ultra/l1b/cullingmask.py +6 -3
  83. imap_processing/ultra/l1b/de.py +81 -23
  84. imap_processing/ultra/l1b/extendedspin.py +13 -10
  85. imap_processing/ultra/l1b/lookup_utils.py +281 -28
  86. imap_processing/ultra/l1b/quality_flag_filters.py +10 -1
  87. imap_processing/ultra/l1b/ultra_l1b_culling.py +161 -3
  88. imap_processing/ultra/l1b/ultra_l1b_extended.py +253 -47
  89. imap_processing/ultra/l1c/helio_pset.py +97 -24
  90. imap_processing/ultra/l1c/l1c_lookup_utils.py +256 -0
  91. imap_processing/ultra/l1c/spacecraft_pset.py +83 -16
  92. imap_processing/ultra/l1c/ultra_l1c.py +6 -2
  93. imap_processing/ultra/l1c/ultra_l1c_culling.py +85 -0
  94. imap_processing/ultra/l1c/ultra_l1c_pset_bins.py +385 -277
  95. imap_processing/ultra/l2/ultra_l2.py +0 -1
  96. imap_processing/ultra/utils/ultra_l1_utils.py +28 -3
  97. imap_processing/utils.py +3 -4
  98. {imap_processing-0.18.0.dist-info → imap_processing-0.19.0.dist-info}/METADATA +2 -2
  99. {imap_processing-0.18.0.dist-info → imap_processing-0.19.0.dist-info}/RECORD +102 -95
  100. imap_processing/idex/idex_l2c.py +0 -84
  101. imap_processing/spice/kernels.py +0 -187
  102. {imap_processing-0.18.0.dist-info → imap_processing-0.19.0.dist-info}/LICENSE +0 -0
  103. {imap_processing-0.18.0.dist-info → imap_processing-0.19.0.dist-info}/WHEEL +0 -0
  104. {imap_processing-0.18.0.dist-info → imap_processing-0.19.0.dist-info}/entry_points.txt +0 -0
@@ -1,12 +1,12 @@
1
1
  """IMAP-Hi utils functions."""
2
2
 
3
3
  import re
4
- from collections.abc import Sequence
4
+ from collections.abc import Iterable, Sequence
5
5
  from dataclasses import dataclass
6
6
  from enum import IntEnum
7
- from typing import Optional, Union
8
7
 
9
8
  import numpy as np
9
+ import pandas as pd
10
10
  import xarray as xr
11
11
 
12
12
  from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
@@ -15,11 +15,13 @@ from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
15
15
  class HIAPID(IntEnum):
16
16
  """Create ENUM for apid."""
17
17
 
18
+ H45_MEMDMP = 740
18
19
  H45_APP_NHK = 754
19
20
  H45_SCI_CNT = 769
20
21
  H45_SCI_DE = 770
21
22
  H45_DIAG_FEE = 772
22
23
 
24
+ H90_MEMDMP = 804
23
25
  H90_APP_NHK = 818
24
26
  H90_SCI_CNT = 833
25
27
  H90_SCI_DE = 834
@@ -100,9 +102,9 @@ def parse_sensor_number(full_string: str) -> int:
100
102
  def full_dataarray(
101
103
  name: str,
102
104
  attrs: dict,
103
- coords: Optional[dict[str, xr.DataArray]] = None,
104
- shape: Optional[Union[int, Sequence[int]]] = None,
105
- fill_value: Optional[float] = None,
105
+ coords: dict[str, xr.DataArray] | None = None,
106
+ shape: int | Sequence[int] | None = None,
107
+ fill_value: float | None = None,
106
108
  ) -> xr.DataArray:
107
109
  """
108
110
  Generate an empty xarray.DataArray with appropriate attributes.
@@ -158,9 +160,9 @@ def full_dataarray(
158
160
 
159
161
  def create_dataset_variables(
160
162
  variable_names: list[str],
161
- variable_shape: Optional[Union[int, Sequence[int]]] = None,
162
- coords: Optional[dict[str, xr.DataArray]] = None,
163
- fill_value: Optional[float] = None,
163
+ variable_shape: int | Sequence[int] | None = None,
164
+ coords: dict[str, xr.DataArray] | None = None,
165
+ fill_value: float | None = None,
164
166
  att_manager_lookup_str: str = "{0}",
165
167
  ) -> dict[str, xr.DataArray]:
166
168
  """
@@ -247,3 +249,146 @@ class CoincidenceBitmap(IntEnum):
247
249
  matches = re.findall(pattern, detector_hit_str)
248
250
  # Sum the integer value assigned to the detector name for each match
249
251
  return sum(CoincidenceBitmap[m] for m in matches)
252
+
253
+
254
+ class EsaEnergyStepLookupTable:
255
+ """Class for holding a esa_step to esa_energy lookup table."""
256
+
257
+ def __init__(self) -> None:
258
+ self.df = pd.DataFrame(
259
+ columns=["start_met", "end_met", "esa_step", "esa_energy_step"]
260
+ )
261
+ self._indexed = False
262
+
263
+ # Get the FILLVAL from the CDF attribute manager that will be returned
264
+ # for queries without matches
265
+ attr_mgr = ImapCdfAttributes()
266
+ attr_mgr.add_instrument_global_attrs("hi")
267
+ attr_mgr.add_instrument_variable_attrs(instrument="hi", level=None)
268
+ var_attrs = attr_mgr.get_variable_attributes(
269
+ "hi_de_esa_energy_step", check_schema=False
270
+ )
271
+ self._fillval = var_attrs["FILLVAL"]
272
+ self._esa_energy_step_dtype = var_attrs["dtype"]
273
+
274
+ def add_entry(
275
+ self, start_met: float, end_met: float, esa_step: int, esa_energy_step: int
276
+ ) -> None:
277
+ """
278
+ Add a single entry to the lookup table.
279
+
280
+ Parameters
281
+ ----------
282
+ start_met : float
283
+ Start mission elapsed time of the time range.
284
+ end_met : float
285
+ End mission elapsed time of the time range.
286
+ esa_step : int
287
+ ESA step value.
288
+ esa_energy_step : int
289
+ ESA energy step value to be stored.
290
+ """
291
+ new_row = pd.DataFrame(
292
+ {
293
+ "start_met": [start_met],
294
+ "end_met": [end_met],
295
+ "esa_step": [esa_step],
296
+ "esa_energy_step": [esa_energy_step],
297
+ }
298
+ )
299
+ self.df = pd.concat([self.df, new_row], ignore_index=True)
300
+ self._indexed = False
301
+
302
+ def _ensure_indexed(self) -> None:
303
+ """
304
+ Create index for faster queries if not already done.
305
+
306
+ Notes
307
+ -----
308
+ This method sorts the internal DataFrame by start_met and esa_step
309
+ for improved query performance.
310
+ """
311
+ if not self._indexed:
312
+ # Sort by start_met and esa_step for better query performance
313
+ self.df = self.df.sort_values(["start_met", "esa_step"]).reset_index(
314
+ drop=True
315
+ )
316
+ self._indexed = True
317
+
318
+ def query(
319
+ self,
320
+ query_met: float | Iterable[float],
321
+ esa_step: int | Iterable[float],
322
+ ) -> float | np.ndarray:
323
+ """
324
+ Query MET(s) and esa_step(s) to retrieve esa_energy_step(s).
325
+
326
+ Parameters
327
+ ----------
328
+ query_met : float or array_like
329
+ Mission elapsed time value(s) to query.
330
+ Can be a single float or array-like of floats.
331
+ esa_step : int or array_like
332
+ ESA step value(s) to match. Can be a single int or array-like of ints.
333
+ Must be same type (scalar or array-like) as query_met.
334
+
335
+ Returns
336
+ -------
337
+ float or numpy.ndarray
338
+ - If inputs are scalars: returns float (esa_energy_step)
339
+ - If inputs are array-like: returns numpy array of esa_energy_steps
340
+ with same length as inputs.
341
+ Contains FILLVAL for queries with no matches.
342
+
343
+ Raises
344
+ ------
345
+ ValueError
346
+ If one input is scalar and the other is array-like, or if both are
347
+ array-like but have different lengths.
348
+
349
+ Notes
350
+ -----
351
+ If multiple entries match a query, returns the first match found.
352
+ """
353
+ self._ensure_indexed()
354
+
355
+ # Check if inputs are scalars
356
+ is_scalar_met = np.isscalar(query_met)
357
+ is_scalar_step = np.isscalar(esa_step)
358
+
359
+ # Check for mismatched input types
360
+ if is_scalar_met != is_scalar_step:
361
+ raise ValueError(
362
+ "query_met and esa_step must both be scalars or both be array-like"
363
+ )
364
+
365
+ # Convert to arrays for uniform processing
366
+ query_mets = np.atleast_1d(query_met)
367
+ esa_steps = np.atleast_1d(esa_step)
368
+
369
+ # Ensure both arrays have the same shape
370
+ if query_mets.shape != esa_steps.shape:
371
+ raise ValueError(
372
+ "query_met and esa_step must have the same "
373
+ "length when both are array-like"
374
+ )
375
+
376
+ results = np.full_like(query_mets, self._fillval)
377
+
378
+ # Lookup esa_energy_steps for queries
379
+ for i, (qm, es) in enumerate(zip(query_mets, esa_steps, strict=False)):
380
+ mask = (
381
+ (self.df["start_met"] <= qm)
382
+ & (self.df["end_met"] >= qm)
383
+ & (self.df["esa_step"] == es)
384
+ )
385
+
386
+ matches = self.df[mask]
387
+ if not matches.empty:
388
+ results[i] = matches["esa_energy_step"].iloc[0]
389
+
390
+ # Return scalar for scalar inputs, array for array inputs
391
+ if is_scalar_met and is_scalar_step:
392
+ return results.astype(self._esa_energy_step_dtype)[0]
393
+ else:
394
+ return results.astype(self._esa_energy_step_dtype)
@@ -114,6 +114,9 @@ FLAG_PATTERN = np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
114
114
  # Define size of science frame (num of packets)
115
115
  FRAME_SIZE = len(FLAG_PATTERN)
116
116
 
117
+ # Mod 10 pattern
118
+ MOD_10_PATTERN = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
119
+
117
120
  # Define the number of bits in the mantissa and exponent for
118
121
  # decompressing data
119
122
  MANTISSA_BITS = 12
@@ -260,12 +260,9 @@ def assemble_science_frames(sci_dataset: xr.Dataset) -> xr.Dataset:
260
260
  height event data per valid science frame added as new
261
261
  data variables.
262
262
  """
263
- # TODO: Figure out how to handle partial science frames at the
264
- # beginning and end of CCSDS files. These science frames are split
265
- # across CCSDS files and still need to be processed with packets
266
- # from the previous file. Only discard incomplete science frames
267
- # in the middle of the CCSDS file. The code currently skips all
268
- # incomplete science frames.
263
+ # TODO: The code currently skips all incomplete science frames.
264
+ # Only discard incomplete science frames in the middle of the CCSDS file or
265
+ # use fill values?
269
266
 
270
267
  # Convert sequence flags and counters to NumPy arrays for vectorized operations
271
268
  seq_flgs = sci_dataset.seq_flgs.values
@@ -1,6 +1,8 @@
1
1
  """Decommutate HIT CCSDS data and create L1a data products."""
2
2
 
3
3
  import logging
4
+ from datetime import datetime
5
+ from pathlib import Path
4
6
 
5
7
  import numpy as np
6
8
  import xarray as xr
@@ -19,6 +21,11 @@ from imap_processing.hit.l0.constants import (
19
21
  ZENITH_ANGLES,
20
22
  )
21
23
  from imap_processing.hit.l0.decom_hit import decom_hit
24
+ from imap_processing.spice.time import (
25
+ et_to_datetime64,
26
+ met_to_datetime64,
27
+ ttj2000ns_to_et,
28
+ )
22
29
 
23
30
  logger = logging.getLogger(__name__)
24
31
 
@@ -28,7 +35,7 @@ logger = logging.getLogger(__name__)
28
35
  fillval = -9223372036854775808
29
36
 
30
37
 
31
- def hit_l1a(packet_file: str) -> list[xr.Dataset]:
38
+ def hit_l1a(packet_file: Path, packet_date: str | None) -> list[xr.Dataset]:
32
39
  """
33
40
  Will process HIT L0 data into L1A data products.
34
41
 
@@ -36,31 +43,42 @@ def hit_l1a(packet_file: str) -> list[xr.Dataset]:
36
43
  ----------
37
44
  packet_file : str
38
45
  Path to the CCSDS data packet file.
46
+ packet_date : str
47
+ The date of the packet data in 'YYYYMMDD' format. This is used to filter
48
+ data to the correct processing day since L0 will have a 20-minute
49
+ buffer before and after the processing day.
39
50
 
40
51
  Returns
41
52
  -------
42
53
  processed_data : list[xarray.Dataset]
43
54
  List of Datasets of L1A processed data.
44
55
  """
56
+ if not packet_date:
57
+ raise ValueError("Packet date is required for processing L1A data.")
58
+
45
59
  # Unpack ccsds file to xarray datasets
46
- datasets_by_apid = get_datasets_by_apid(packet_file)
60
+ datasets_by_apid = get_datasets_by_apid(str(packet_file))
47
61
 
48
62
  # Create the attribute manager for this data level
49
63
  attr_mgr = get_attribute_manager("l1a")
50
64
 
51
- l1a_datasets = []
52
-
53
65
  # Process l1a data products
66
+ l1a_datasets = []
54
67
  if HitAPID.HIT_HSKP in datasets_by_apid:
55
68
  logger.info("Creating HIT L1A housekeeping dataset")
56
- l1a_datasets.append(
57
- process_housekeeping_data(
58
- datasets_by_apid[HitAPID.HIT_HSKP], attr_mgr, "imap_hit_l1a_hk"
59
- )
69
+ hk_dataset = process_housekeeping_data(
70
+ datasets_by_apid[HitAPID.HIT_HSKP], attr_mgr, "imap_hit_l1a_hk"
71
+ )
72
+ # filter the housekeeping dataset to the processing day
73
+ hk_dataset = filter_dataset_to_processing_day(
74
+ hk_dataset, str(packet_date), epoch_vals=hk_dataset["epoch"].values
60
75
  )
76
+ l1a_datasets.append(hk_dataset)
61
77
  if HitAPID.HIT_SCIENCE in datasets_by_apid:
62
78
  l1a_datasets.extend(
63
- process_science(datasets_by_apid[HitAPID.HIT_SCIENCE], attr_mgr)
79
+ process_science(
80
+ datasets_by_apid[HitAPID.HIT_SCIENCE], attr_mgr, str(packet_date)
81
+ )
64
82
  )
65
83
  return l1a_datasets
66
84
 
@@ -102,7 +120,15 @@ def subcom_sectorates(sci_dataset: xr.Dataset) -> xr.Dataset:
102
120
  sci_dataset : xarray.Dataset
103
121
  Xarray dataset with sectored rates data organized by species.
104
122
  """
105
- updated_dataset = sci_dataset.copy()
123
+ # Initialize the dataset with the required variables
124
+ updated_dataset = sci_dataset[
125
+ [
126
+ "sectorates",
127
+ "hdr_minute_cnt",
128
+ "livetime_counter",
129
+ "hdr_dynamic_threshold_state",
130
+ ]
131
+ ].copy(deep=True)
106
132
 
107
133
  # Calculate mod 10 values
108
134
  hdr_min_count_mod_10 = updated_dataset.hdr_minute_cnt.values % 10
@@ -308,25 +334,272 @@ def add_cdf_attributes(
308
334
  return dataset
309
335
 
310
336
 
337
+ def find_complete_mod10_sets(mod_vals: np.ndarray) -> np.ndarray:
338
+ """
339
+ Find start indices where mod values match [0,1,2,3,4,5,6,7,8,9] pattern.
340
+
341
+ Parameters
342
+ ----------
343
+ mod_vals : np.ndarray
344
+ 1D array of mod 10 values from the hdr_minute_cnt field in the L1A counts data.
345
+
346
+ Returns
347
+ -------
348
+ np.ndarray
349
+ Indices in mod_vals where the complete pattern [0, 1, ..., 9] starts.
350
+ """
351
+ # The pattern to match is an array from 0-9
352
+ window_size = 10
353
+
354
+ if mod_vals.size < window_size:
355
+ logger.warning(
356
+ "Mod 10 array is smaller than the required window size for "
357
+ "pattern matching."
358
+ )
359
+ return np.array([], dtype=int)
360
+ # Use sliding windows to find pattern matches
361
+ sw_view = np.lib.stride_tricks.sliding_window_view(mod_vals, window_size)
362
+ matches = np.all(sw_view == np.arange(window_size), axis=1)
363
+ return np.where(matches)[0]
364
+
365
+
366
+ def subset_sectored_counts(
367
+ sectored_counts_dataset: xr.Dataset, packet_date: str
368
+ ) -> xr.Dataset:
369
+ """
370
+ Subset data for complete sets of sectored counts and corresponding livetime values.
371
+
372
+ A set of sectored data starts with hydrogen and ends with iron and correspond to
373
+ the mod 10 values 0-9. The livetime values from the previous 10 minutes are used
374
+ to calculate the rates for each set since those counts are transmitted 10 minutes
375
+ after they were collected. Therefore, only complete sets of sectored counts where
376
+ livetime from the previous 10 minutes are available are included in the output.
377
+
378
+ Parameters
379
+ ----------
380
+ sectored_counts_dataset : xarray.Dataset
381
+ The sectored counts dataset.
382
+
383
+ packet_date : str
384
+ The date of the packet data in 'YYYYMMDD' format, used for filtering.
385
+
386
+ Returns
387
+ -------
388
+ xarray.Dataset
389
+ A dataset of complete sectored counts and corresponding livetime values
390
+ for the processing day.
391
+ """
392
+ # TODO: Update to use fill values for partial frames rather than drop them
393
+
394
+ # Modify livetime_counter to use a new epoch coordinate
395
+ # that is aligned with the original epoch dimension. This
396
+ # ensures that livetime doesn't get filtered when the original
397
+ # epoch dimension is filtered for complete sets.
398
+ sectored_counts_dataset = update_livetime_coord(sectored_counts_dataset)
399
+
400
+ # Identify 10-minute intervals of complete sectored counts
401
+ # by using the mod 10 values of the header minute counts.
402
+ # Mod 10 determines the species and energy bin the data belongs
403
+ # to. A mapping of mod 10 values to species and energy bins is
404
+ # provided in l0/constants.py for reference.
405
+ bin_size = 10
406
+ mod_10: np.ndarray = sectored_counts_dataset.hdr_minute_cnt.values % 10
407
+ start_indices = find_complete_mod10_sets(mod_10)
408
+
409
+ # Filter out start indices that are less than or equal to the bin size
410
+ # since the previous 10 minutes are needed for calculating rates
411
+ if start_indices.size == 0:
412
+ raise ValueError(
413
+ "No data to process - valid start indices not found for "
414
+ "complete sectored counts."
415
+ )
416
+ else:
417
+ start_indices = start_indices[start_indices >= bin_size]
418
+
419
+ # Subset data for complete sets of sectored counts.
420
+ # Each set of sectored counts is 10 minutes long, so we take the indices
421
+ # starting from the start indices and extending to the bin size of 10.
422
+ # This creates a 1D array of indices that correspond to the complete
423
+ # sets of sectored counts which is used to filter the L1A dataset and
424
+ # create the L1B sectored rates dataset.
425
+ data_indices = np.concatenate(
426
+ [np.arange(idx, idx + bin_size) for idx in start_indices]
427
+ )
428
+ complete_sectored_counts_dataset = sectored_counts_dataset.isel(epoch=data_indices)
429
+
430
+ epoch_per_complete_set = np.repeat(
431
+ [
432
+ complete_sectored_counts_dataset.epoch[idx : idx + bin_size].mean().item()
433
+ for idx in range(0, len(complete_sectored_counts_dataset.epoch), 10)
434
+ ],
435
+ bin_size,
436
+ )
437
+
438
+ # Filter dataset for data in the processing day
439
+
440
+ # Trim the sectored data to epoch_per_complete_set values in the processing day
441
+ filtered_dataset = filter_dataset_to_processing_day(
442
+ complete_sectored_counts_dataset, packet_date, epoch_vals=epoch_per_complete_set
443
+ )
444
+
445
+ # Trim livetime to the size of the sectored data but shifted 10 minutes earlier.
446
+ filtered_dataset = subset_livetime(filtered_dataset)
447
+
448
+ return filtered_dataset
449
+
450
+
451
+ def update_livetime_coord(sectored_dataset: xr.Dataset) -> xr.Dataset:
452
+ """
453
+ Update livetime_counter to use a new epoch coordinate.
454
+
455
+ Assign a new epoch coordinate to the `livetime_counter` variable.
456
+ This new coordinate is aligned with the original `epoch` dimension,
457
+ ensuring that `livetime_counter` remains unaffected when the original
458
+ `epoch` dimension is filtered for complete sets in `subset_sectored_counts`
459
+ function.
460
+
461
+ Parameters
462
+ ----------
463
+ sectored_dataset : xarray.Dataset
464
+ The dataset containing sectored counts and livetime_counter data.
465
+
466
+ Returns
467
+ -------
468
+ xarray.Dataset
469
+ The updated dataset with modified livetime_counter.
470
+ """
471
+ epoch_values = sectored_dataset.epoch.values
472
+ sectored_dataset = sectored_dataset.assign_coords(
473
+ {
474
+ "epoch_livetime": ("epoch", epoch_values),
475
+ }
476
+ )
477
+ da = sectored_dataset["livetime_counter"]
478
+ da = da.assign_coords(epoch_livetime=("epoch", epoch_values))
479
+
480
+ # Swap the dimension from 'epoch' to 'epoch_livetime'
481
+ da = da.swap_dims({"epoch": "epoch_livetime"})
482
+
483
+ # Update the dataset with the modified variable
484
+ sectored_dataset["livetime_counter"] = da
485
+
486
+ return sectored_dataset
487
+
488
+
489
+ def subset_livetime(dataset: xr.Dataset) -> xr.Dataset:
490
+ """
491
+ Trim livetime to values shifted 10 minutes earlier than sectored data.
492
+
493
+ This ensures that the livetime values correspond to the sectored counts correctly
494
+ since sectored data is collected 10 minutes before it's transmitted.
495
+
496
+ Parameters
497
+ ----------
498
+ dataset : xarray.Dataset
499
+ The dataset containing sectored counts and livetime data.
500
+
501
+ Returns
502
+ -------
503
+ xarray.Dataset
504
+ The updated dataset with trimmed livetime data.
505
+ """
506
+ # epoch values are per science frame which is 1 minute
507
+ epoch_vals = dataset["epoch"].values
508
+ epoch_livetime_vals = dataset["epoch_livetime"].values
509
+
510
+ if not epoch_vals.size:
511
+ raise ValueError(
512
+ "Epoch values are empty. Cannot proceed with livetime subsetting."
513
+ )
514
+
515
+ # Get index positions of epoch[0] and epoch[-1] in epoch_livetime
516
+ start_idx = np.where(epoch_livetime_vals == epoch_vals[0])[0][0]
517
+ end_idx = np.where(epoch_livetime_vals == epoch_vals[-1])[0][0]
518
+
519
+ if start_idx < 10:
520
+ raise ValueError(
521
+ "Start index for livetime is less than 10. This indicates that the "
522
+ "dataset is too small to shift livetime correctly."
523
+ )
524
+
525
+ # Compute shifted indices by 10 minutes
526
+ start_trimmed = max(start_idx - 10, 0)
527
+ end_trimmed = max(end_idx - 10, 0)
528
+
529
+ return dataset.isel(epoch_livetime=slice(start_trimmed, end_trimmed + 1))
530
+
531
+
532
+ def filter_dataset_to_processing_day(
533
+ dataset: xr.Dataset,
534
+ packet_date: str,
535
+ epoch_vals: np.ndarray,
536
+ sc_tick: bool = False,
537
+ ) -> xr.Dataset:
538
+ """
539
+ Filter the dataset for data within the processing day.
540
+
541
+ Parameters
542
+ ----------
543
+ dataset : xarray.Dataset
544
+ The dataset to filter.
545
+ packet_date : str
546
+ The date of the packet data in 'YYYYMMDD' format.
547
+ epoch_vals : np.ndarray
548
+ An array of epoch values. Used to identify indices of data that
549
+ belong in the processing day. For sectored counts data, an
550
+ array of mean epoch values for major frames (10 min. intervals)
551
+ is used to filter the dataset to ensure that major frames that span
552
+ midnight, but belong to the processing day, are included. For other
553
+ datasets, the dataset's epoch coordinate values will be used.
554
+ sc_tick : bool
555
+ If true, the dataset's sc_tick will be used to filter data as well.
556
+ This ensures that the ccsds headers that use sc_tick as a coordinate,
557
+ instead of epoch, also corresponds to the processing day.
558
+
559
+ Returns
560
+ -------
561
+ xarray.Dataset
562
+ The filtered dataset containing data within the processing day.
563
+ """
564
+ processing_day = datetime.strptime(packet_date, "%Y%m%d").strftime("%Y-%m-%d")
565
+
566
+ # Filter dataset by epoch indices in processing day
567
+ epoch_dt = et_to_datetime64(ttj2000ns_to_et(epoch_vals))
568
+ epoch_indices_in_processing_day = np.where(
569
+ epoch_dt.astype("datetime64[D]") == np.datetime64(processing_day)
570
+ )[0]
571
+ dataset = dataset.isel(epoch=epoch_indices_in_processing_day)
572
+
573
+ # If sc_tick is provided (coord for ccsds headers), filter by sc_tick too
574
+ if sc_tick:
575
+ sc_tick_dt = met_to_datetime64(dataset["sc_tick"].values)
576
+ indices_in_processing_day = np.where(
577
+ sc_tick_dt.astype("datetime64[D]") == np.datetime64(processing_day)
578
+ )[0]
579
+ dataset = dataset.isel(sc_tick=indices_in_processing_day)
580
+ return dataset
581
+
582
+
311
583
  def process_science(
312
- dataset: xr.Dataset, attr_mgr: ImapCdfAttributes
584
+ dataset: xr.Dataset, attr_mgr: ImapCdfAttributes, packet_date: str
313
585
  ) -> list[xr.Dataset]:
314
586
  """
315
587
  Will process science datasets for CDF products.
316
588
 
317
- Process binary science data for CDF creation. The data is
318
- grouped into science frames, decommutated and decompressed,
319
- and split into count rates and event datasets. Updates the
320
- dataset attributes and coordinates and data variable
321
- dimensions according to specifications in a cdf yaml file.
589
+ The function processes binary science data for CDF creation.
590
+ The data is decommutated, decompressed, grouped into science frames,
591
+ and split into count rates, sectored count rates, and event datasets.
592
+ It also updates the dataset attributes according to specifications
593
+ in a cdf yaml file.
322
594
 
323
595
  Parameters
324
596
  ----------
325
597
  dataset : xarray.Dataset
326
598
  A dataset containing HIT science data.
327
-
328
599
  attr_mgr : ImapCdfAttributes
329
600
  Attribute manager used to get the data product field's attributes.
601
+ packet_date : str
602
+ The date of the packet data, used for processing.
330
603
 
331
604
  Returns
332
605
  -------
@@ -338,20 +611,37 @@ def process_science(
338
611
  # Decommutate and decompress the science data
339
612
  sci_dataset = decom_hit(dataset)
340
613
 
341
- # Organize sectored rates by species type
342
- sci_dataset = subcom_sectorates(sci_dataset)
614
+ # Create dataset for sectored data organized by species type
615
+ sectored_dataset = subcom_sectorates(sci_dataset)
616
+
617
+ # Subset sectored data for complete sets (10 min intervals covering all species)
618
+ sectored_dataset = subset_sectored_counts(sectored_dataset, packet_date)
619
+
620
+ # TODO:
621
+ # - headers are values per packet rather than per frame. Do these need to align
622
+ # with the science frames?
623
+ # For instance, the mean epoch for a frame that spans midnight might contain
624
+ # packets from the previous day but filtering sc_tick by processing day will
625
+ # exclude those packets. Is this an issue?
626
+
627
+ # Filter the science dataset to only include data from the processing day
628
+ sci_dataset = filter_dataset_to_processing_day(
629
+ sci_dataset, packet_date, epoch_vals=sci_dataset["epoch"].values, sc_tick=True
630
+ )
343
631
 
344
632
  # Split the science data into count rates and event datasets
345
633
  pha_raw_dataset = xr.Dataset(
346
634
  {"pha_raw": sci_dataset["pha_raw"]}, coords={"epoch": sci_dataset["epoch"]}
347
635
  )
348
- count_rates_dataset = sci_dataset.drop_vars("pha_raw")
636
+ count_rates_dataset = sci_dataset.drop_vars(["pha_raw", "sectorates"])
349
637
 
350
638
  # Calculate uncertainties for count rates
351
639
  count_rates_dataset = calculate_uncertainties(count_rates_dataset)
640
+ sectored_count_rates_dataset = calculate_uncertainties(sectored_dataset)
352
641
 
353
642
  l1a_datasets: dict = {
354
- "imap_hit_l1a_counts": count_rates_dataset,
643
+ "imap_hit_l1a_counts-standard": count_rates_dataset,
644
+ "imap_hit_l1a_counts-sectored": sectored_count_rates_dataset,
355
645
  "imap_hit_l1a_direct-events": pha_raw_dataset,
356
646
  }
357
647