imap-processing 0.19.3__py3-none-any.whl → 0.19.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of imap-processing might be problematic. Click here for more details.

Files changed (33) hide show
  1. imap_processing/_version.py +2 -2
  2. imap_processing/cdf/config/imap_codice_l1a_variable_attrs.yaml +90 -91
  3. imap_processing/cdf/config/imap_codice_l1b_variable_attrs.yaml +6 -6
  4. imap_processing/cdf/config/imap_enamaps_l2-common_variable_attrs.yaml +18 -23
  5. imap_processing/cdf/config/imap_hi_global_cdf_attrs.yaml +1 -2
  6. imap_processing/cdf/config/imap_hi_variable_attrs.yaml +1 -0
  7. imap_processing/cdf/config/imap_ultra_l1c_variable_attrs.yaml +8 -6
  8. imap_processing/cdf/utils.py +5 -0
  9. imap_processing/cli.py +72 -54
  10. imap_processing/codice/codice_l1a.py +44 -6
  11. imap_processing/codice/codice_l1b.py +35 -6
  12. imap_processing/codice/constants.py +10 -6
  13. imap_processing/ena_maps/ena_maps.py +2 -7
  14. imap_processing/glows/l1b/glows_l1b.py +29 -21
  15. imap_processing/hi/hi_l1a.py +49 -29
  16. imap_processing/hi/hi_l1b.py +34 -0
  17. imap_processing/hi/hi_l1c.py +23 -17
  18. imap_processing/hi/hi_l2.py +225 -81
  19. imap_processing/ialirt/utils/create_xarray.py +11 -1
  20. imap_processing/lo/l1b/lo_l1b.py +111 -77
  21. imap_processing/lo/l1c/lo_l1c.py +10 -11
  22. imap_processing/lo/l2/lo_l2.py +43 -22
  23. imap_processing/mag/l1c/interpolation_methods.py +9 -1
  24. imap_processing/mag/l1c/mag_l1c.py +99 -45
  25. imap_processing/ultra/l1c/helio_pset.py +2 -2
  26. imap_processing/ultra/l1c/spacecraft_pset.py +7 -4
  27. imap_processing/ultra/l2/ultra_l2.py +51 -24
  28. imap_processing/ultra/utils/ultra_l1_utils.py +4 -4
  29. {imap_processing-0.19.3.dist-info → imap_processing-0.19.4.dist-info}/METADATA +1 -1
  30. {imap_processing-0.19.3.dist-info → imap_processing-0.19.4.dist-info}/RECORD +33 -33
  31. {imap_processing-0.19.3.dist-info → imap_processing-0.19.4.dist-info}/LICENSE +0 -0
  32. {imap_processing-0.19.3.dist-info → imap_processing-0.19.4.dist-info}/WHEEL +0 -0
  33. {imap_processing-0.19.3.dist-info → imap_processing-0.19.4.dist-info}/entry_points.txt +0 -0
@@ -327,14 +327,18 @@ def pset_counts(
327
327
  fill_value=0,
328
328
  )
329
329
 
330
- # Convert list of DEs to pandas dataframe for ease indexing/filtering
331
- de_df = l1b_de_dataset.drop_dims("epoch").to_pandas()
330
+ # Drop events with FILLVAL for trigger_id. This should only occur for a
331
+ # pointing with no events that gets a single fill event
332
+ de_ds = l1b_de_dataset.drop_dims("epoch")
332
333
 
334
+ # Remove DEs with invalid trigger_id. This should only occur for a
335
+ # pointing with no events that gets a single fill event
336
+ good_mask = de_ds["trigger_id"].data != de_ds["trigger_id"].attrs["FILLVAL"]
333
337
  # Remove DEs not in Goodtimes/angles
334
- good_mask = good_time_and_phase_mask(
338
+ good_mask &= good_time_and_phase_mask(
335
339
  l1b_de_dataset.event_met.values, l1b_de_dataset.spin_phase.values
336
340
  )
337
- de_df = de_df[good_mask]
341
+ de_ds = de_ds.isel(event_met=good_mask)
338
342
 
339
343
  # The calibration product configuration potentially has different coincidence
340
344
  # types for each ESA and different TOF windows for each calibration product,
@@ -346,17 +350,17 @@ def pset_counts(
346
350
  # esa_energy_step is recorded for each packet rather than for each DE,
347
351
  # so we use ccsds_index to get the esa_energy_step for each DE
348
352
  esa_mask = (
349
- l1b_de_dataset["esa_energy_step"].data[de_df["ccsds_index"].to_numpy()]
353
+ l1b_de_dataset["esa_energy_step"].data[de_ds["ccsds_index"].data]
350
354
  == esa_energy
351
355
  )
352
356
  # Now loop over the calibration products for the current ESA energy
353
357
  for config_row in esa_df.itertuples():
354
358
  # Remove DEs that are not at the current ESA energy and in the list
355
359
  # of coincidence types for the current calibration product
356
- type_mask = de_df["coincidence_type"].isin(
360
+ type_mask = de_ds["coincidence_type"].isin(
357
361
  config_row.coincidence_type_values
358
362
  )
359
- filtered_de_df = de_df[(esa_mask & type_mask)]
363
+ filtered_de_ds = de_ds.isel(event_met=(esa_mask & type_mask))
360
364
 
361
365
  # Use the TOF window mask to remove DEs with TOFs outside the allowed range
362
366
  tof_fill_vals = {
@@ -366,17 +370,17 @@ def pset_counts(
366
370
  for detector_pair in CalibrationProductConfig.tof_detector_pairs
367
371
  }
368
372
  tof_in_window_mask = get_tof_window_mask(
369
- filtered_de_df, config_row, tof_fill_vals
373
+ filtered_de_ds, config_row, tof_fill_vals
370
374
  )
371
- filtered_de_df = filtered_de_df[tof_in_window_mask]
375
+ filtered_de_ds = filtered_de_ds.isel(event_met=tof_in_window_mask)
372
376
 
373
377
  # Bin remaining DEs into spin-bins
374
378
  i_esa = np.flatnonzero(pset_coords["esa_energy_step"].data == esa_energy)[0]
375
379
  # spin_phase is in the range [0, 1). Multiplying by N_SPIN_BINS and
376
380
  # truncating to an integer gives the correct bin index
377
- spin_bin_indices = (
378
- filtered_de_df["spin_phase"].to_numpy() * N_SPIN_BINS
379
- ).astype(int)
381
+ spin_bin_indices = (filtered_de_ds["spin_phase"].data * N_SPIN_BINS).astype(
382
+ int
383
+ )
380
384
  # When iterating over rows of a dataframe, the names of the multi-index
381
385
  # are not preserved. Below, `config_row.Index[0]` gets the
382
386
  # calibration_prod value from the namedtuple representing the
@@ -390,15 +394,15 @@ def pset_counts(
390
394
 
391
395
 
392
396
  def get_tof_window_mask(
393
- de_df: pd.DataFrame, prod_config_row: NamedTuple, fill_vals: dict
397
+ de_ds: xr.Dataset, prod_config_row: NamedTuple, fill_vals: dict
394
398
  ) -> NDArray[bool]:
395
399
  """
396
400
  Generate a mask indicating which DEs to keep based on TOF windows.
397
401
 
398
402
  Parameters
399
403
  ----------
400
- de_df : pandas.DataFrame
401
- The Direct Event dataframe for the DEs to filter based on the TOF
404
+ de_ds : xarray.Dataset
405
+ The Direct Event Dataset for the DEs to filter based on the TOF
402
406
  windows.
403
407
  prod_config_row : NamedTuple
404
408
  A single row of the prod config dataframe represented as a named tuple.
@@ -415,11 +419,13 @@ def get_tof_window_mask(
415
419
  The mask is intended to directly filter the DE dataframe.
416
420
  """
417
421
  detector_pairs = CalibrationProductConfig.tof_detector_pairs
418
- tof_in_window_mask = np.empty((len(detector_pairs), len(de_df)), dtype=bool)
422
+ tof_in_window_mask = np.empty(
423
+ (len(detector_pairs), len(de_ds["event_met"])), dtype=bool
424
+ )
419
425
  for i_pair, detector_pair in enumerate(detector_pairs):
420
426
  low_limit = getattr(prod_config_row, f"tof_{detector_pair}_low")
421
427
  high_limit = getattr(prod_config_row, f"tof_{detector_pair}_high")
422
- tof_array = de_df[f"tof_{detector_pair}"].to_numpy()
428
+ tof_array = de_ds[f"tof_{detector_pair}"].data
423
429
  # The TOF in window mask contains True wherever the TOF is within
424
430
  # the configuration low/high bounds OR the FILLVAL is present. The
425
431
  # FILLVAL indicates that the detector pair was not hit. DEs with
@@ -8,10 +8,10 @@ import pandas as pd
8
8
  import xarray as xr
9
9
 
10
10
  from imap_processing.ena_maps.ena_maps import (
11
- AbstractSkyMap,
12
11
  HiPointingSet,
13
12
  RectangularSkyMap,
14
13
  )
14
+ from imap_processing.ena_maps.utils.corrections import PowerLawFluxCorrector
15
15
  from imap_processing.ena_maps.utils.naming import MapDescriptor
16
16
  from imap_processing.hi.utils import CalibrationProductConfig
17
17
 
@@ -23,8 +23,7 @@ VARS_TO_EXPOSURE_TIME_AVERAGE = ["bg_rates", "bg_rates_unc", "obs_date"]
23
23
 
24
24
  def hi_l2(
25
25
  psets: list[str | Path],
26
- geometric_factors_path: str | Path,
27
- esa_energies_path: str | Path,
26
+ l2_ancillary_path_dict: dict[str, Path],
28
27
  descriptor: str,
29
28
  ) -> list[xr.Dataset]:
30
29
  """
@@ -34,10 +33,9 @@ def hi_l2(
34
33
  ----------
35
34
  psets : list of str or pathlib.Path
36
35
  List of input PSETs to make a map from.
37
- geometric_factors_path : str or pathlib.Path
38
- Where to get the geometric factors from.
39
- esa_energies_path : str or pathlib.Path
40
- Where to get the energies from.
36
+ l2_ancillary_path_dict : dict[str, pathlib.Path]
37
+ Mapping containing ancillary file descriptors as keys and file paths as
38
+ values. Require keys are: ["cal-prod", "esa-energies", "esa-eta-fit-factors"].
41
39
  descriptor : str
42
40
  Output filename descriptor. Contains full configuration for the options
43
41
  of how to generate the map.
@@ -47,33 +45,27 @@ def hi_l2(
47
45
  l2_dataset : list[xarray.Dataset]
48
46
  Level 2 IMAP-Hi dataset ready to be written to a CDF file.
49
47
  """
50
- cg_corrected = False
51
- map_descriptor = MapDescriptor.from_string(descriptor)
52
-
53
- sky_map = generate_hi_map(
54
- psets,
55
- geometric_factors_path,
56
- esa_energies_path,
57
- spin_phase=map_descriptor.spin_phase,
58
- output_map=map_descriptor.to_empty_map(),
59
- cg_corrected=cg_corrected,
48
+ logger.info(
49
+ f"Hi L2 processing running for descriptor: {descriptor} with"
50
+ f"{len(psets)} PSETs input."
60
51
  )
61
52
 
62
- # Get the map dataset with variables/coordinates in the correct shape
63
- # TODO get the correct descriptor and frame
64
-
65
- if not isinstance(sky_map, RectangularSkyMap):
66
- raise NotImplementedError("HEALPix map output not supported for Hi")
53
+ map_descriptor = MapDescriptor.from_string(descriptor)
67
54
  if not isinstance(map_descriptor.sensor, str):
68
55
  raise ValueError(
69
56
  "Invalid map_descriptor. Sensor attribute must be of type str "
70
57
  "and be either '45' or '90'"
71
58
  )
72
59
 
60
+ sky_map = generate_hi_map(
61
+ psets,
62
+ l2_ancillary_path_dict,
63
+ map_descriptor,
64
+ )
65
+
73
66
  l2_ds = sky_map.build_cdf_dataset(
74
67
  "hi",
75
68
  "l2",
76
- map_descriptor.frame_descriptor,
77
69
  descriptor,
78
70
  sensor=map_descriptor.sensor,
79
71
  )
@@ -83,12 +75,9 @@ def hi_l2(
83
75
 
84
76
  def generate_hi_map(
85
77
  psets: list[str | Path],
86
- geometric_factors_path: str | Path,
87
- esa_energies_path: str | Path,
88
- output_map: AbstractSkyMap,
89
- cg_corrected: bool = False,
90
- spin_phase: str = "full",
91
- ) -> AbstractSkyMap:
78
+ l2_ancillary_path_dict: dict[str, Path],
79
+ descriptor: MapDescriptor,
80
+ ) -> RectangularSkyMap:
92
81
  """
93
82
  Project Hi PSET data into a sky map.
94
83
 
@@ -96,32 +85,30 @@ def generate_hi_map(
96
85
  ----------
97
86
  psets : list of str or pathlib.Path
98
87
  List of input PSETs to make a map from.
99
- geometric_factors_path : str or pathlib.Path
100
- Where to get the geometric factors from.
101
- esa_energies_path : str or pathlib.Path
102
- Where to get the energies from.
103
- output_map : AbstractSkyMap
104
- The map object to collect data into. Determines pixel spacing,
105
- coordinate system, etc.
106
- cg_corrected : bool, Optional
107
- Whether to apply Compton-Getting correction to the energies. Defaults to
108
- False.
109
- spin_phase : str, Optional
110
- Apply filtering to PSET data include ram or anti-ram or full spin data.
111
- Defaults to "full".
88
+ l2_ancillary_path_dict : dict[str, pathlib.Path]
89
+ Mapping containing ancillary file descriptors as keys and file paths as
90
+ values. Require keys are: ["cal-prod", "esa-energies", "esa-eta-fit-factors"].
91
+ descriptor : imap_processing.ena_maps.utils.naming.MapDescriptor
92
+ Output filename descriptor. Contains full configuration for the options
93
+ of how to generate the map.
112
94
 
113
95
  Returns
114
96
  -------
115
- sky_map : AbstractSkyMap
97
+ sky_map : RectangularSkyMap
116
98
  The sky map with all the PSET data projected into the map.
117
99
  """
100
+ output_map = descriptor.to_empty_map()
101
+
102
+ if not isinstance(output_map, RectangularSkyMap):
103
+ raise NotImplementedError("Healpix map output not supported for Hi")
104
+
118
105
  # TODO: Implement Compton-Getting correction
119
- if cg_corrected:
120
- raise NotImplementedError
106
+ if descriptor.frame_descriptor != "sf":
107
+ raise NotImplementedError("CG correction not implemented for Hi")
121
108
 
122
109
  for pset_path in psets:
123
110
  logger.info(f"Processing {pset_path}")
124
- pset = HiPointingSet(pset_path, spin_phase=spin_phase)
111
+ pset = HiPointingSet(pset_path, spin_phase=descriptor.spin_phase)
125
112
 
126
113
  # Background rate and uncertainty are exposure time weighted means in
127
114
  # the map.
@@ -141,10 +128,8 @@ def generate_hi_map(
141
128
  output_map.data_1d[var] /= output_map.data_1d["exposure_factor"]
142
129
 
143
130
  output_map.data_1d.update(calculate_ena_signal_rates(output_map.data_1d))
144
- output_map.data_1d.update(
145
- calculate_ena_intensity(
146
- output_map.data_1d, geometric_factors_path, esa_energies_path
147
- )
131
+ output_map.data_1d = calculate_ena_intensity(
132
+ output_map.data_1d, l2_ancillary_path_dict, descriptor
148
133
  )
149
134
 
150
135
  output_map.data_1d["obs_date"].data = output_map.data_1d["obs_date"].data.astype(
@@ -155,7 +140,8 @@ def generate_hi_map(
155
140
 
156
141
  # Rename and convert coordinate from esa_energy_step energy
157
142
  esa_df = esa_energy_df(
158
- esa_energies_path, output_map.data_1d["esa_energy_step"].data
143
+ l2_ancillary_path_dict["esa-energies"],
144
+ output_map.data_1d["esa_energy_step"].data,
159
145
  )
160
146
  output_map.data_1d = output_map.data_1d.rename({"esa_energy_step": "energy"})
161
147
  output_map.data_1d = output_map.data_1d.assign_coords(
@@ -222,9 +208,9 @@ def calculate_ena_signal_rates(map_ds: xr.Dataset) -> dict[str, xr.DataArray]:
222
208
 
223
209
  def calculate_ena_intensity(
224
210
  map_ds: xr.Dataset,
225
- geometric_factors_path: str | Path,
226
- esa_energies_path: str | Path,
227
- ) -> dict[str, xr.DataArray]:
211
+ l2_ancillary_path_dict: dict[str, Path],
212
+ descriptor: MapDescriptor,
213
+ ) -> xr.Dataset:
228
214
  """
229
215
  Calculate the ena intensities.
230
216
 
@@ -232,52 +218,210 @@ def calculate_ena_intensity(
232
218
  ----------
233
219
  map_ds : xarray.Dataset
234
220
  Map dataset that has ena_signal_rate fields calculated.
235
- geometric_factors_path : str or pathlib.Path
236
- Where to get the geometric factors from.
237
- esa_energies_path : str or pathlib.Path
238
- Where to get the esa energies, energy deltas, and geometric factors.
221
+ l2_ancillary_path_dict : dict[str, pathlib.Path]
222
+ Mapping containing ancillary file descriptors as keys and file paths as
223
+ values. Require keys are: ["cal-prod", "esa-energies", "esa-eta-fit-factors"].
224
+ descriptor : imap_processing.ena_maps.utils.naming.MapDescriptor
225
+ Output filename descriptor. Contains full configuration for the options
226
+ of how to generate the map. For this function, the principal data string
227
+ is used to determine if a flux correction should be applied.
239
228
 
240
229
  Returns
241
230
  -------
242
- intensity_vars : dict[str, xarray.DataArray]
243
- ENA Intensity with statistical and systematic uncertainties.
231
+ map_ds : xarray.Dataset
232
+ Map dataset with new variables: ena_intensity, ena_intensity_stat_unc,
233
+ ena_intensity_sys_err.
244
234
  """
245
235
  # read calibration product configuration file
246
- cal_prod_df = CalibrationProductConfig.from_csv(geometric_factors_path)
236
+ cal_prod_df = CalibrationProductConfig.from_csv(l2_ancillary_path_dict["cal-prod"])
247
237
  # reindex_like removes esa_energy_steps and calibration products not in the
248
238
  # map_ds esa_energy_step and calibration_product coordinates
249
239
  geometric_factor = cal_prod_df.to_xarray().reindex_like(map_ds)["geometric_factor"]
250
240
  geometric_factor = geometric_factor.transpose(
251
241
  *[coord for coord in map_ds.coords if coord in geometric_factor.coords]
252
242
  )
253
- energy_df = esa_energy_df(esa_energies_path, map_ds["esa_energy_step"].data)
243
+ energy_df = esa_energy_df(
244
+ l2_ancillary_path_dict["esa-energies"], map_ds["esa_energy_step"].data
245
+ )
254
246
  esa_energy = energy_df.to_xarray()["nominal_central_energy"]
255
247
 
256
248
  # Convert ENA Signal Rate to Flux
257
249
  flux_conversion_divisor = geometric_factor * esa_energy
258
- intensity_vars = {
259
- "ena_intensity": map_ds["ena_signal_rates"] / flux_conversion_divisor,
260
- "ena_intensity_stat_unc": map_ds["ena_signal_rate_stat_unc"]
261
- / flux_conversion_divisor,
262
- "ena_intensity_sys_err": map_ds["bg_rates_unc"] / flux_conversion_divisor,
263
- }
264
-
265
- # TODO: Correctly implement combining of calibration products. For now, just sum
266
- # Hi groups direct events into distinct calibration products based on coincidence
267
- # type. (See L1B processing and Hi Algorithm Document section 6.1.2) When adding
268
- # together different calibration products, a different weighting must be used
269
- # than exposure time. (See Hi Algorithm Document Section 3.1.2)
270
- intensity_vars["ena_intensity"] = intensity_vars["ena_intensity"].sum(
271
- dim="calibration_prod"
250
+ map_ds["ena_intensity"] = map_ds["ena_signal_rates"] / flux_conversion_divisor
251
+ map_ds["ena_intensity_stat_unc"] = (
252
+ map_ds["ena_signal_rate_stat_unc"] / flux_conversion_divisor
272
253
  )
273
- intensity_vars["ena_intensity_stat_unc"] = np.sqrt(
274
- (intensity_vars["ena_intensity_stat_unc"] ** 2).sum(dim="calibration_prod")
254
+ map_ds["ena_intensity_sys_err"] = map_ds["bg_rates_unc"] / flux_conversion_divisor
255
+
256
+ # Combine calibration products using proper weighted averaging
257
+ # as described in Hi Algorithm Document Section 3.1.2
258
+ map_ds = combine_calibration_products(
259
+ map_ds,
260
+ geometric_factor,
261
+ esa_energy,
275
262
  )
276
- intensity_vars["ena_intensity_sys_err"] = np.sqrt(
277
- (intensity_vars["ena_intensity_sys_err"] ** 2).sum(dim="calibration_prod")
263
+
264
+ if "raw" not in descriptor.principal_data:
265
+ # Flux correction
266
+ corrector = PowerLawFluxCorrector(l2_ancillary_path_dict["esa-eta-fit-factors"])
267
+ # FluxCorrector does not accept the size 1 epoch dimension. Remove that
268
+ # dimension by passing the zeroth element.
269
+ corrected_intensity, corrected_stat_unc = corrector.apply_flux_correction(
270
+ map_ds["ena_intensity"].values[0],
271
+ map_ds["ena_intensity_stat_unc"].values[0],
272
+ esa_energy.data,
273
+ )
274
+ # Add the size 1 epoch dimension back in to the corrected fluxes.
275
+ map_ds["ena_intensity"].data = corrected_intensity[np.newaxis, ...]
276
+ map_ds["ena_intensity_stat_unc"].data = corrected_stat_unc[np.newaxis, ...]
277
+
278
+ return map_ds
279
+
280
+
281
+ def combine_calibration_products(
282
+ map_ds: xr.Dataset,
283
+ geometric_factors: xr.DataArray,
284
+ esa_energies: xr.DataArray,
285
+ ) -> xr.Dataset:
286
+ """
287
+ Combine calibration products using weighted averaging.
288
+
289
+ Implements the algorithm described in Hi Algorithm Document Section 3.1.2
290
+ for properly combining data from multiple calibration products.
291
+
292
+ Parameters
293
+ ----------
294
+ map_ds : xarray.Dataset
295
+ Map dataset that has preliminary intensity variables computed for each
296
+ calibration product.
297
+ geometric_factors : xarray.DataArray
298
+ Geometric factors for each calibration product and energy step.
299
+ esa_energies : xarray.DataArray
300
+ Central energies for each energy step.
301
+
302
+ Returns
303
+ -------
304
+ map_ds : xarray.Dataset
305
+ Map dataset with updated variables: ena_intensity, ena_intensity_stat_unc,
306
+ ena_intensity_sys_err now combined across calibration products at each
307
+ energy level.
308
+ """
309
+ ena_flux = map_ds["ena_intensity"]
310
+ sys_err = map_ds["ena_intensity_sys_err"]
311
+
312
+ # Calculate improved statistical variance estimates using geometric factor
313
+ # ratios to reduce bias from Poisson uncertainty estimation
314
+ improved_stat_variance = _calculate_improved_stat_variance(
315
+ map_ds, geometric_factors, esa_energies
316
+ )
317
+
318
+ # Calculate total variance
319
+ # Note that sys_err contains uncertainty, so it must be squared to get
320
+ # the systematic variance needed in this equation.
321
+ total_variance = improved_stat_variance + sys_err**2
322
+
323
+ # Perform inverse-variance weighted averaging
324
+ # Handle divide by zero and invalid values
325
+ with np.errstate(divide="ignore", invalid="ignore"):
326
+ # Calculate weights for statistical variance combination using only
327
+ # statistical variance
328
+ stat_weights = 1.0 / improved_stat_variance
329
+
330
+ # Combined statistical uncertainty from inverse-variance formula
331
+ combined_stat_unc = np.sqrt(1.0 / stat_weights.sum(dim="calibration_prod"))
332
+
333
+ # Use total variance weights for flux combination
334
+ flux_weights = 1.0 / total_variance
335
+ weighted_flux_sum = (ena_flux * flux_weights).sum(dim="calibration_prod")
336
+ combined_flux = weighted_flux_sum / flux_weights.sum(dim="calibration_prod")
337
+
338
+ map_ds["ena_intensity"] = combined_flux
339
+ map_ds["ena_intensity_stat_unc"] = combined_stat_unc
340
+ # For systematic error, just do quadrature sum over the systematic error for
341
+ # each calibration product.
342
+ map_ds["ena_intensity_sys_err"] = np.sqrt((sys_err**2).sum(dim="calibration_prod"))
343
+
344
+ return map_ds
345
+
346
+
347
+ def _calculate_improved_stat_variance(
348
+ map_ds: xr.Dataset,
349
+ geometric_factors: xr.DataArray,
350
+ esa_energies: xr.DataArray,
351
+ ) -> xr.DataArray:
352
+ """
353
+ Calculate improved statistical variances using geometric factor ratios.
354
+
355
+ This implements the algorithm from Hi Algorithm Document Section 3.1.2:
356
+ For calibration product X, replace N_X in the uncertainty calculation with
357
+ an improved estimate using geometric factor ratios from all calibration products.
358
+
359
+ The key insight is that we can vectorize this by first computing a geometric
360
+ factor normalized signal rate, then scaling it back for each calibration product.
361
+
362
+ Parameters
363
+ ----------
364
+ map_ds : xarray.Dataset
365
+ Map dataset.
366
+ geometric_factors : xr.DataArray
367
+ Geometric factors for each calibration product.
368
+ esa_energies : xarray.DataArray
369
+ Central energies for each energy step.
370
+
371
+ Returns
372
+ -------
373
+ improved_variance : xr.DataArray
374
+ Improved statistical variance estimates.
375
+ """
376
+ n_calib_prods = map_ds["ena_intensity"].sizes.get("calibration_prod", 1)
377
+
378
+ if n_calib_prods <= 1:
379
+ # No improvement possible with single calibration product
380
+ return map_ds["ena_intensity_stat_unc"] ** 2
381
+
382
+ logger.debug("Computing geometric factor normalized signal rates")
383
+
384
+ # signal_rates = counts / exposure_factor - bg_rates
385
+ # signal_rates shape is: (n_epoch, n_energy, n_cal_prod, n_spatial_pixels)
386
+ signal_rates = map_ds["ena_signal_rates"]
387
+
388
+ # Compute geometric factor normalized signal rate (vectorized approach)
389
+ # This represents the weighted average signal rate per unit geometric factor
390
+ # geometric_factor_norm_signal_rates shape is: (n_epoch, n_energy, n_spatial_pixels)
391
+ geometric_factor_norm_signal_rates = signal_rates.sum(
392
+ dim="calibration_prod"
393
+ ) / geometric_factors.sum(dim="calibration_prod")
394
+
395
+ # For each calibration product, the averaged signal rate estimate is:
396
+ # averaged_signal_rate_i = geometric_factor_norm_signal_rates * geometric_factor_i
397
+ # averaged_signal_rates shape is: (n_epoch, n_energy, n_cal_prod, n_spatial_pixels)
398
+ averaged_signal_rates = geometric_factor_norm_signal_rates * geometric_factors
399
+
400
+ logger.debug("Including background rates in uncertainty calculation")
401
+ # Convert averaged signal rates back to flux uncertainties
402
+ # Total count rates for Poisson uncertainty calculation
403
+ total_count_rates_for_uncertainty = map_ds["bg_rates"] + averaged_signal_rates
404
+
405
+ # Ensure non-negative values for sqrt and minimum of 1 for uncertainty calculation
406
+ total_count_rates_for_uncertainty = xr.where(
407
+ total_count_rates_for_uncertainty < 1, 1, total_count_rates_for_uncertainty
408
+ )
409
+
410
+ logger.debug("Computing improved flux uncertainties")
411
+ # Statistical variance:
412
+ with np.errstate(divide="ignore", invalid="ignore"):
413
+ improved_variance = total_count_rates_for_uncertainty / (
414
+ map_ds["exposure_factor"] * (geometric_factors * esa_energies)
415
+ )
416
+
417
+ # Handle invalid cases by falling back to original uncertainties
418
+ improved_variance = xr.where(
419
+ ~np.isfinite(improved_variance) | (geometric_factors == 0),
420
+ map_ds["ena_intensity_stat_unc"],
421
+ improved_variance,
278
422
  )
279
423
 
280
- return intensity_vars
424
+ return improved_variance
281
425
 
282
426
 
283
427
  def esa_energy_df(
@@ -144,7 +144,17 @@ def create_xarray_from_records(records: list[dict]) -> xr.Dataset: # noqa: PLR0
144
144
  # Populate the dataset variables
145
145
  for i, record in enumerate(records):
146
146
  for key, val in record.items():
147
- if key in ["apid", "met", "met_in_utc", "ttj2000ns", "last_modified"]:
147
+ if key in [
148
+ "apid",
149
+ "met",
150
+ "met_in_utc",
151
+ "ttj2000ns",
152
+ "last_modified",
153
+ "sc_position_GSM",
154
+ "sc_position_GSE",
155
+ "sc_velocity_GSM",
156
+ "sc_velocity_GSE",
157
+ ]:
148
158
  continue
149
159
  elif key in ["mag_B_GSE", "mag_B_GSM", "mag_B_RTN"]:
150
160
  dataset[key].data[i, :] = val