PyPI - imap-processing - Versions diffs - 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

imap-processing 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of imap-processing might be problematic. Click here for more details.

Files changed (99) hide show

imap_processing/_version.py +2 -2
imap_processing/ccsds/excel_to_xtce.py +2 -0
imap_processing/cdf/config/imap_hi_variable_attrs.yaml +100 -1
imap_processing/cdf/config/imap_hit_global_cdf_attrs.yaml +14 -0
imap_processing/cdf/config/imap_hit_l1a_variable_attrs.yaml +63 -1
imap_processing/cdf/config/imap_idex_global_cdf_attrs.yaml +7 -0
imap_processing/cdf/config/imap_idex_l1a_variable_attrs.yaml +574 -231
imap_processing/cdf/config/imap_idex_l1b_variable_attrs.yaml +326 -0
imap_processing/cdf/config/imap_lo_l1a_variable_attrs.yaml +33 -23
imap_processing/cdf/config/imap_ultra_l1b_variable_attrs.yaml +7 -4
imap_processing/cdf/utils.py +3 -5
imap_processing/cli.py +13 -4
imap_processing/codice/codice_l1a.py +5 -5
imap_processing/codice/constants.py +9 -9
imap_processing/codice/decompress.py +6 -2
imap_processing/glows/l1a/glows_l1a.py +1 -2
imap_processing/hi/l1a/hi_l1a.py +4 -4
imap_processing/hi/l1a/histogram.py +106 -108
imap_processing/hi/l1a/science_direct_event.py +91 -224
imap_processing/hi/packet_definitions/TLM_HI_COMBINED_SCI.xml +3994 -0
imap_processing/hit/l0/constants.py +2 -2
imap_processing/hit/l0/decom_hit.py +12 -101
imap_processing/hit/l1a/hit_l1a.py +164 -23
imap_processing/ialirt/l0/process_codicelo.py +153 -0
imap_processing/ialirt/l0/process_hit.py +5 -5
imap_processing/ialirt/packet_definitions/ialirt_codicelo.xml +281 -0
imap_processing/ialirt/process_ephemeris.py +212 -0
imap_processing/idex/idex_l1a.py +55 -75
imap_processing/idex/idex_l1b.py +192 -0
imap_processing/idex/idex_variable_unpacking_and_eu_conversion.csv +33 -0
imap_processing/idex/packet_definitions/idex_packet_definition.xml +97 -595
imap_processing/lo/l0/decompression_tables/decompression_tables.py +16 -0
imap_processing/lo/l0/lo_science.py +44 -12
imap_processing/lo/l1a/lo_l1a.py +76 -8
imap_processing/lo/packet_definitions/lo_xtce.xml +9877 -87
imap_processing/mag/l1a/mag_l1a.py +1 -2
imap_processing/mag/l1a/mag_l1a_data.py +1 -2
imap_processing/mag/l1b/mag_l1b.py +2 -1
imap_processing/spice/geometry.py +37 -19
imap_processing/spice/time.py +144 -2
imap_processing/swapi/l1/swapi_l1.py +3 -3
imap_processing/swapi/packet_definitions/swapi_packet_definition.xml +1535 -446
imap_processing/swe/l2/swe_l2.py +134 -17
imap_processing/tests/ccsds/test_data/expected_output.xml +1 -1
imap_processing/tests/codice/test_codice_l1a.py +8 -8
imap_processing/tests/codice/test_decompress.py +4 -4
imap_processing/tests/conftest.py +46 -43
imap_processing/tests/hi/test_data/l0/H90_NHK_20241104.bin +0 -0
imap_processing/tests/hi/test_data/l0/H90_sci_cnt_20241104.bin +0 -0
imap_processing/tests/hi/test_data/l0/H90_sci_de_20241104.bin +0 -0
imap_processing/tests/hi/test_hi_l1b.py +2 -2
imap_processing/tests/hi/test_l1a.py +31 -58
imap_processing/tests/hi/test_science_direct_event.py +58 -0
imap_processing/tests/hit/test_data/sci_sample1.ccsds +0 -0
imap_processing/tests/hit/test_decom_hit.py +60 -50
imap_processing/tests/hit/test_hit_l1a.py +327 -12
imap_processing/tests/hit/test_hit_l1b.py +76 -0
imap_processing/tests/hit/validation_data/hskp_sample_eu.csv +89 -0
imap_processing/tests/hit/validation_data/sci_sample_raw1.csv +29 -0
imap_processing/tests/ialirt/test_data/l0/apid01152.tlm +0 -0
imap_processing/tests/ialirt/test_data/l0/imap_codice_l1a_lo-ialirt_20241110193700_v0.0.0.cdf +0 -0
imap_processing/tests/ialirt/unit/test_process_codicelo.py +106 -0
imap_processing/tests/ialirt/unit/test_process_ephemeris.py +109 -0
imap_processing/tests/ialirt/unit/test_process_hit.py +9 -6
imap_processing/tests/idex/conftest.py +1 -1
imap_processing/tests/idex/test_idex_l0.py +1 -1
imap_processing/tests/idex/test_idex_l1a.py +7 -1
imap_processing/tests/idex/test_idex_l1b.py +126 -0
imap_processing/tests/lo/test_lo_l1a.py +7 -16
imap_processing/tests/lo/test_lo_science.py +67 -3
imap_processing/tests/lo/test_pkts/imap_lo_l0_raw_20240803_v002.pkts +0 -0
imap_processing/tests/lo/validation_data/Instrument_FM1_T104_R129_20240803_ILO_SCI_DE_dec_DN_with_fills.csv +1999 -0
imap_processing/tests/mag/test_mag_l1b.py +39 -5
imap_processing/tests/spice/test_geometry.py +32 -6
imap_processing/tests/spice/test_time.py +135 -6
imap_processing/tests/swapi/test_swapi_decom.py +75 -69
imap_processing/tests/swapi/test_swapi_l1.py +4 -4
imap_processing/tests/swe/test_swe_l2.py +64 -8
imap_processing/tests/test_utils.py +1 -1
imap_processing/tests/ultra/test_data/l0/ultra45_raw_sc_ultrarawimg_withFSWcalcs_FM45_40P_Phi28p5_BeamCal_LinearScan_phi2850_theta-000_20240207T102740.csv +3314 -3314
imap_processing/tests/ultra/unit/test_de.py +8 -3
imap_processing/tests/ultra/unit/test_spatial_utils.py +125 -0
imap_processing/tests/ultra/unit/test_ultra_l1b_extended.py +39 -29
imap_processing/tests/ultra/unit/test_ultra_l1c_pset_bins.py +2 -25
imap_processing/ultra/constants.py +4 -0
imap_processing/ultra/l1b/de.py +8 -14
imap_processing/ultra/l1b/ultra_l1b_extended.py +29 -70
imap_processing/ultra/l1c/ultra_l1c_pset_bins.py +1 -36
imap_processing/ultra/utils/spatial_utils.py +221 -0
{imap_processing-0.8.0.dist-info → imap_processing-0.9.0.dist-info}/METADATA +1 -1
{imap_processing-0.8.0.dist-info → imap_processing-0.9.0.dist-info}/RECORD +94 -76
imap_processing/hi/l0/__init__.py +0 -0
imap_processing/hi/l0/decom_hi.py +0 -24
imap_processing/hi/packet_definitions/hi_packet_definition.xml +0 -482
imap_processing/tests/hi/test_decom.py +0 -55
imap_processing/tests/hi/test_l1a_sci_de.py +0 -72
{imap_processing-0.8.0.dist-info → imap_processing-0.9.0.dist-info}/LICENSE +0 -0
{imap_processing-0.8.0.dist-info → imap_processing-0.9.0.dist-info}/WHEEL +0 -0
{imap_processing-0.8.0.dist-info → imap_processing-0.9.0.dist-info}/entry_points.txt +0 -0

imap_processing/hit/l0/constants.py CHANGED Viewed

@@ -9,8 +9,8 @@ MOD_10_MAPPING = {
     0: {"species": "H", "energy_min": 1.8, "energy_max": 3.6},
     1: {"species": "H", "energy_min": 4, "energy_max": 6},
     2: {"species": "H", "energy_min": 6, "energy_max": 10},
-    3: {"species": "4He", "energy_min": 4, "energy_max": 6},
-    4: {"species": "4He", "energy_min": 6, "energy_max": 12},
+    3: {"species": "He4", "energy_min": 4, "energy_max": 6},
+    4: {"species": "He4", "energy_min": 6, "energy_max": 12},
     5: {"species": "CNO", "energy_min": 4, "energy_max": 6},
     6: {"species": "CNO", "energy_min": 6, "energy_max": 12},
     7: {"species": "NeMgSi", "energy_min": 4, "energy_max": 6},

imap_processing/hit/l0/decom_hit.py CHANGED Viewed

@@ -9,106 +9,10 @@ from imap_processing.hit.l0.constants import (
     FLAG_PATTERN,
     FRAME_SIZE,
     MANTISSA_BITS,
-    MOD_10_MAPPING,
 )
 from imap_processing.utils import convert_to_binary_string
-def subcom_sectorates(sci_dataset: xr.Dataset) -> None:
-    """
-    Subcommutate sectorates data.
-    Sector rates data contains rates for 5 species and 10
-    energy ranges. This function subcommutates the sector
-    rates data by organizing the rates by species. Which
-    species and energy range the data belongs to is determined
-    by taking the mod 10 value of the corresponding header
-    minute count value in the dataset. A mapping of mod 10
-    values to species and energy ranges is provided in constants.py.
-    MOD_10_MAPPING = {
-        0: {"species": "H", "energy_min": 1.8, "energy_max": 3.6},
-        1: {"species": "H", "energy_min": 4, "energy_max": 6},
-        2: {"species": "H", "energy_min": 6, "energy_max": 10},
-        3: {"species": "4He", "energy_min": 4, "energy_max": 6},
-        ...
-        9: {"species": "Fe", "energy_min": 4, "energy_max": 12}}
-    The data is added to the dataset as new data fields named
-    according to their species. They have 4 dimensions: epoch
-    energy index, declination, and azimuth. The energy index
-    dimension is used to distinguish between the different energy
-    ranges the data belongs to. The energy min and max values for
-    each species are also added to the dataset as new data fields.
-    Parameters
-    ----------
-    sci_dataset : xr.Dataset
-        Xarray dataset containing parsed HIT science data.
-    """
-    # TODO:
-    #  - Update to use fill values defined in attribute manager which
-    #    isn't passed into this module nor defined for L1A sci data yet
-    #  - Determine naming convention for species data fields in dataset
-    #    (i.e. h, H, hydrogen, Hydrogen, etc.)
-    #  - Remove raw "sectorates" data from dataset after processing is complete?
-    #  - consider moving this function to hit_l1a.py
-    # Calculate mod 10 values
-    hdr_min_count_mod_10 = sci_dataset.hdr_minute_cnt.values % 10
-    # Reference mod 10 mapping to initialize data structure for species and
-    # energy ranges and add 8x15 arrays with fill values for each science frame.
-    num_frames = len(hdr_min_count_mod_10)
-    data_by_species_and_energy_range = {
-        key: {**value, "rates": np.full((num_frames, 8, 15), fill_value=np.nan)}
-        for key, value in MOD_10_MAPPING.items()
-    }
-    # Update rates for science frames where data is available
-    for i, mod_10 in enumerate(hdr_min_count_mod_10):
-        data_by_species_and_energy_range[mod_10]["rates"][i] = sci_dataset[
-            "sectorates"
-        ].values[i]
-    # H has 3 energy ranges, 4He, CNO, NeMgSi have 2, and Fe has 1.
-    # Aggregate sector rates and energy min/max values for each species.
-    # First, initialize dictionaries to store rates and min/max energy values by species
-    data_by_species: dict = {
-        value["species"]: {"rates": [], "energy_min": [], "energy_max": []}
-        for value in data_by_species_and_energy_range.values()
-    }
-    for value in data_by_species_and_energy_range.values():
-        species = value["species"]
-        data_by_species[species]["rates"].append(value["rates"])
-        data_by_species[species]["energy_min"].append(value["energy_min"])
-        data_by_species[species]["energy_max"].append(value["energy_max"])
-    # Add sector rates by species to the dataset
-    for species, data in data_by_species.items():
-        # Rates data has shape: energy_index, epoch, declination, azimuth
-        # Convert rates to numpy array and transpose axes to get
-        # shape: epoch, energy_index, declination, azimuth
-        rates_data = np.transpose(np.array(data["rates"]), axes=(1, 0, 2, 3))
-        sci_dataset[species] = xr.DataArray(
-            data=rates_data,
-            dims=["epoch", f"{species}_energy_index", "declination", "azimuth"],
-            name=species,
-        )
-        sci_dataset[f"{species}_energy_min"] = xr.DataArray(
-            data=np.array(data["energy_min"]),
-            dims=[f"{species}_energy_index"],
-            name=f"{species}_energy_min",
-        )
-        sci_dataset[f"{species}_energy_max"] = xr.DataArray(
-            data=np.array(data["energy_max"]),
-            dims=[f"{species}_energy_index"],
-            name=f"{species}_energy_max",
-        )
 def parse_data(bin_str: str, bits_per_index: int, start: int, end: int) -> list:
     """
     Parse binary data.
@@ -197,6 +101,16 @@ def parse_count_rates(sci_dataset: xr.Dataset) -> None:
             dims = ["epoch"]
         sci_dataset[field] = xr.DataArray(parsed_data, dims=dims, name=field)
+        # Add dimensions to coordinates
+        # TODO: confirm that dtype int16 is correct
+        for dim in dims:
+            if dim not in sci_dataset.coords:
+                sci_dataset.coords[dim] = xr.DataArray(
+                    np.arange(sci_dataset.sizes[dim], dtype=np.int16),
+                    dims=[dim],
+                    name=dim,
+                )
         # increment the start of the next section of data to parse
         section_start += field_meta.section_length
@@ -495,10 +409,7 @@ def decom_hit(sci_dataset: xr.Dataset) -> xr.Dataset:
     # Parse count rates data from binary and add to dataset
     parse_count_rates(sci_dataset)
-    # Further organize sector rates by species type
-    subcom_sectorates(sci_dataset)
-    # TODO:
-    #  -clean up dataset - remove raw binary data, raw sectorates? Any other fields?
+    # Remove raw binary data and unused spare bits from dataset
+    sci_dataset = sci_dataset.drop_vars(["count_rates_raw", "science_data", "spare"])
     return sci_dataset

imap_processing/hit/l1a/hit_l1a.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import logging
+import numpy as np
 import xarray as xr
 from imap_processing.cdf.imap_cdf_manager import ImapCdfAttributes
@@ -11,6 +12,7 @@ from imap_processing.hit.hit_utils import (
     get_datasets_by_apid,
     process_housekeeping_data,
 )
+from imap_processing.hit.l0.constants import MOD_10_MAPPING
 from imap_processing.hit.l0.decom_hit import decom_hit
 logger = logging.getLogger(__name__)
@@ -40,26 +42,131 @@ def hit_l1a(packet_file: str, data_version: str) -> list[xr.Dataset]:
     # Create the attribute manager for this data level
     attr_mgr = get_attribute_manager(data_version, "l1a")
+    l1a_datasets = []
     # Process l1a data products
     if HitAPID.HIT_HSKP in datasets_by_apid:
         logger.info("Creating HIT L1A housekeeping dataset")
-        datasets_by_apid[HitAPID.HIT_HSKP] = process_housekeeping_data(
-            datasets_by_apid[HitAPID.HIT_HSKP], attr_mgr, "imap_hit_l1a_hk"
+        l1a_datasets.append(
+            process_housekeeping_data(
+                datasets_by_apid[HitAPID.HIT_HSKP], attr_mgr, "imap_hit_l1a_hk"
+            )
         )
     if HitAPID.HIT_SCIENCE in datasets_by_apid:
-        # TODO complete science data processing
-        print("Skipping science data for now")
-        datasets_by_apid[HitAPID.HIT_SCIENCE] = process_science(
-            datasets_by_apid[HitAPID.HIT_SCIENCE], attr_mgr
+        l1a_datasets.extend(
+            process_science(datasets_by_apid[HitAPID.HIT_SCIENCE], attr_mgr)
         )
+    return l1a_datasets
+def subcom_sectorates(sci_dataset: xr.Dataset) -> None:
+    """
+    Subcommutate sectorates data.
+    Sector rates data contains rates for 5 species and 10
+    energy ranges. This function subcommutates the sector
+    rates data by organizing the rates by species. Which
+    species and energy range the data belongs to is determined
+    by taking the mod 10 value of the corresponding header
+    minute count value in the dataset. A mapping of mod 10
+    values to species and energy ranges is provided in constants.py.
+    MOD_10_MAPPING = {
+        0: {"species": "H", "energy_min": 1.8, "energy_max": 3.6},
+        1: {"species": "H", "energy_min": 4, "energy_max": 6},
+        2: {"species": "H", "energy_min": 6, "energy_max": 10},
+        3: {"species": "4He", "energy_min": 4, "energy_max": 6},
+        ...
+        9: {"species": "Fe", "energy_min": 4, "energy_max": 12}}
+    The data is added to the dataset as new data fields named
+    according to their species. They have 4 dimensions: epoch
+    energy index, declination, and azimuth. The energy index
+    dimension is used to distinguish between the different energy
+    ranges the data belongs to. The energy min and max values for
+    each species are also added to the dataset as new data fields.
-    return list(datasets_by_apid.values())
+    Parameters
+    ----------
+    sci_dataset : xarray.Dataset
+        Xarray dataset containing parsed HIT science data.
+    """
+    # TODO:
+    #  - Update to use fill values defined in attribute manager which
+    #    isn't defined for L1A science data yet
+    #  - fix issues with fe_counts_sectored. The array has shape
+    #      (epoch: 28, fe_energy_index: 1, declination: 8, azimuth: 15),
+    #      but cdflib drops second dimension of size 1 and recognizes
+    #      only 3 total dimensions. Are dimensions of 1 ignored?
+    # Calculate mod 10 values
+    hdr_min_count_mod_10 = sci_dataset.hdr_minute_cnt.values % 10
+    # Reference mod 10 mapping to initialize data structure for species and
+    # energy ranges and add 8x15 arrays with fill values for each science frame.
+    num_frames = len(hdr_min_count_mod_10)
+    # TODO: add more specific dtype for rates (ex. int16) once this is defined by HIT
+    data_by_species_and_energy_range = {
+        key: {**value, "rates": np.full((num_frames, 8, 15), fill_value=-1, dtype=int)}
+        for key, value in MOD_10_MAPPING.items()
+    }
+    # Update rates for science frames where data is available
+    for i, mod_10 in enumerate(hdr_min_count_mod_10):
+        data_by_species_and_energy_range[mod_10]["rates"][i] = sci_dataset[
+            "sectorates"
+        ].values[i]
+    # H has 3 energy ranges, 4He, CNO, NeMgSi have 2, and Fe has 1.
+    # Aggregate sector rates and energy min/max values for each species.
+    # First, initialize dictionaries to store rates and min/max energy values by species
+    data_by_species: dict = {
+        value["species"]: {"rates": [], "energy_min": [], "energy_max": []}
+        for value in data_by_species_and_energy_range.values()
+    }
+    for value in data_by_species_and_energy_range.values():
+        species = value["species"]
+        data_by_species[species]["rates"].append(value["rates"])
+        data_by_species[species]["energy_min"].append(value["energy_min"])
+        data_by_species[species]["energy_max"].append(value["energy_max"])
+    # Add sector rates by species to the dataset
+    for species_type, data in data_by_species.items():
+        # Rates data has shape: energy_index, epoch, declination, azimuth
+        # Convert rates to numpy array and transpose axes to get
+        # shape: epoch, energy_index, declination, azimuth
+        rates_data = np.transpose(np.array(data["rates"]), axes=(1, 0, 2, 3))
+        species = species_type.lower()
+        sci_dataset[f"{species}_counts_sectored"] = xr.DataArray(
+            data=rates_data,
+            dims=["epoch", f"{species}_energy_index", "declination", "azimuth"],
+            name=f"{species}_counts_sectored",
+        )
+        sci_dataset[f"{species}_energy_min"] = xr.DataArray(
+            data=np.array(data["energy_min"], dtype=np.int8),
+            dims=[f"{species}_energy_index"],
+            name=f"{species}_energy_min",
+        )
+        sci_dataset[f"{species}_energy_max"] = xr.DataArray(
+            data=np.array(data["energy_max"], dtype=np.int8),
+            dims=[f"{species}_energy_index"],
+            name=f"{species}_energy_max",
+        )
+        # add energy index coordinate to the dataset
+        sci_dataset.coords[f"{species}_energy_index"] = xr.DataArray(
+            np.arange(sci_dataset.sizes[f"{species}_energy_index"], dtype=np.int8),
+            dims=[f"{species}_energy_index"],
+            name=f"{species}_energy_index",
+        )
-def process_science(dataset: xr.Dataset, attr_mgr: ImapCdfAttributes) -> xr.Dataset:
+def process_science(
+    dataset: xr.Dataset, attr_mgr: ImapCdfAttributes
+) -> list[xr.Dataset]:
     """
-    Will process science dataset for CDF product.
+    Will process science datasets for CDF products.
     Process binary science data for CDF creation. The data is
     grouped into science frames, decommutated and decompressed,
@@ -70,30 +177,64 @@ def process_science(dataset: xr.Dataset, attr_mgr: ImapCdfAttributes) -> xr.Data
     Parameters
     ----------
     dataset : xarray.Dataset
-        Dataset containing HIT science data.
+        A dataset containing HIT science data.
     attr_mgr : ImapCdfAttributes
         Attribute manager used to get the data product field's attributes.
     Returns
     -------
-    dataset : xarray.Dataset
-        An updated dataset ready for CDF conversion.
+    dataset : list
+        A list of science datasets ready for CDF conversion.
     """
     logger.info("Creating HIT L1A science datasets")
-    # Logical sources for the two products.
-    # logical_sources = ["imap_hit_l1a_count-rates", "imap_hit_l1a_pulse-height-event"]
     # Decommutate and decompress the science data
     sci_dataset = decom_hit(dataset)
-    # TODO: Complete this function
-    #  - split the science data into count rates and event datasets
-    #  - update dimensions and add attributes to the dataset and data arrays
-    #  - return list of two datasets (count rates and events)?
+    # Organize sector rates by species type
+    subcom_sectorates(sci_dataset)
-    # logger.info("HIT L1A event dataset created")
-    # logger.info("HIT L1A count rates dataset created")
+    # Split the science data into count rates and event datasets
+    pha_raw_dataset = xr.Dataset(
+        {"pha_raw": sci_dataset["pha_raw"]}, coords={"epoch": sci_dataset["epoch"]}
+    )
+    count_rates_dataset = sci_dataset.drop_vars("pha_raw")
-    return sci_dataset
+    # Logical sources for the two products.
+    logical_sources = ["imap_hit_l1a_count-rates", "imap_hit_l1a_pulse-height-events"]
+    datasets = []
+    # Update attributes and dimensions
+    for dataset, logical_source in zip(
+        [count_rates_dataset, pha_raw_dataset], logical_sources
+    ):
+        dataset.attrs = attr_mgr.get_global_attributes(logical_source)
+        # TODO: Add CDF attributes to yaml once they're defined for L1A science data
+        # Assign attributes and dimensions to each data array in the Dataset
+        for field in dataset.data_vars.keys():
+            try:
+                # Create a dict of dimensions using the DEPEND_I keys in the
+                # attributes
+                dims = {
+                    key: value
+                    for key, value in attr_mgr.get_variable_attributes(field).items()
+                    if "DEPEND" in key
+                }
+                dataset[field].attrs = attr_mgr.get_variable_attributes(field)
+                dataset[field].assign_coords(dims)
+            except KeyError:
+                print(f"Field {field} not found in attribute manager.")
+                logger.warning(f"Field {field} not found in attribute manager.")
+        dataset.epoch.attrs = attr_mgr.get_variable_attributes("epoch")
+        # Remove DEPEND_0 attribute from epoch variable added by attr_mgr.
+        # Not required for epoch
+        del dataset["epoch"].attrs["DEPEND_0"]
+        datasets.append(dataset)
+        logger.info(f"HIT L1A dataset created for {logical_source}")
+    return datasets

imap_processing/ialirt/l0/process_codicelo.py ADDED Viewed

@@ -0,0 +1,153 @@
+"""Functions to support I-ALiRT CoDICE Lo processing."""
+import logging
+from typing import Any
+import numpy as np
+import xarray as xr
+logger = logging.getLogger(__name__)
+def find_groups(data: xr.Dataset) -> xr.Dataset:
+    """
+    Find all occurrences of the sequential set of 233 values 0-232.
+    If a value is missing, or we are starting/ending
+    in the middle of a sequence we do not count that as a valid group.
+    Parameters
+    ----------
+    data : xr.Dataset
+        CoDICE Lo Dataset.
+    Returns
+    -------
+    grouped_data : xr.Dataset
+        Grouped data.
+    """
+    subcom_range = (0, 232)
+    data = data.sortby("cod_lo_acq", ascending=True)
+    # Use cod_lo_counter == 0 to define the beginning of the group.
+    # Find cod_lo_acq at this index and use it as the beginning time for the group.
+    start_sc_ticks = data["cod_lo_acq"][(data["cod_lo_counter"] == subcom_range[0])]
+    start_sc_tick = start_sc_ticks.min()
+    # Use cod_lo_counter == 232 to define the end of the group.
+    last_sc_ticks = data["cod_lo_acq"][
+        ([data["cod_lo_counter"] == subcom_range[-1]][-1])
+    ]
+    last_sc_tick = last_sc_ticks.max()
+    # Filter out data before the first cod_lo_counter=0 and
+    # after the last cod_lo_counter=232.
+    grouped_data = data.where(
+        (data["cod_lo_acq"] >= start_sc_tick) & (data["cod_lo_acq"] <= last_sc_tick),
+        drop=True,
+    )
+    # Assign labels based on the cod_lo_acq times.
+    group_labels = np.searchsorted(
+        start_sc_ticks, grouped_data["cod_lo_acq"], side="right"
+    )
+    # Example:
+    # grouped_data.coords
+    # Coordinates:
+    #   * epoch    (epoch) int64 7kB 315922822184000000 ... 315923721184000000
+    #   * group    (group) int64 7kB 1 1 1 1 1 1 1 1 1 ... 15 15 15 15 15 15 15 15 15
+    grouped_data["group"] = ("group", group_labels)
+    return grouped_data
+def append_cod_lo_data(dataset: xr.Dataset) -> xr.Dataset:
+    """
+    Append the cod_lo_## data values and create an xarray.
+    Parameters
+    ----------
+    dataset : xr.Dataset
+        Original dataset of group.
+    Returns
+    -------
+    appended_dataset : xr.Dataset
+        Dataset with cod_lo_## stacked.
+    """
+    # Number of codice lo data rows
+    num_cod_lo_rows = 15
+    cod_lo_data = np.stack(
+        [dataset[f"cod_lo_data_{i:02}"].values for i in range(num_cod_lo_rows)], axis=1
+    )
+    repeated_data = {
+        var: np.repeat(dataset[var].values, num_cod_lo_rows)
+        for var in dataset.data_vars
+        if not var.startswith("cod_lo_data_")
+    }
+    repeated_data["cod_lo_appended"] = cod_lo_data.flatten()
+    repeated_epoch = np.repeat(dataset["epoch"].values, num_cod_lo_rows)
+    appended_dataset = xr.Dataset(
+        data_vars={name: ("epoch", values) for name, values in repeated_data.items()},
+        coords={"epoch": repeated_epoch},
+    )
+    return appended_dataset
+def process_codicelo(xarray_data: xr.Dataset) -> list[dict]:
+    """
+    Create final data products.
+    Parameters
+    ----------
+    xarray_data : xr.Dataset
+        Parsed data.
+    Returns
+    -------
+    codicelo_data : list[dict]
+        Dictionary of final data product.
+    Notes
+    -----
+    This function is incomplete and will need to be updated to include the
+    necessary calculations and data products.
+    - Calculate species counts (pg 27 of Algorithm Document)
+    - Calculate rates (assume 4 minutes per group)
+    - Calculate L2 CoDICE pseudodensities (pg 37 of Algorithm Document)
+    - Calculate the public data products
+    """
+    grouped_data = find_groups(xarray_data)
+    unique_groups = np.unique(grouped_data["group"])
+    codicelo_data: list[dict[str, Any]] = [{}]
+    for group in unique_groups:
+        # cod_lo_counter values for the group should be 0-232 with no duplicates.
+        subcom_values = grouped_data["cod_lo_counter"][
+            (grouped_data["group"] == group).values
+        ]
+        # Ensure no duplicates and all values from 0 to 232 are present
+        if not np.array_equal(subcom_values, np.arange(233)):
+            logger.warning(
+                f"Group {group} does not contain all values from 0 to "
+                f"232 without duplicates."
+            )
+            continue
+        mask = grouped_data["group"] == group
+        filtered_indices = np.where(mask)[0]
+        group_data = grouped_data.isel(epoch=filtered_indices)
+        append_cod_lo_data(group_data)
+        # TODO: calculate species counts
+        # TODO: calculate rates
+        # TODO: calculate L2 CoDICE pseudodensities
+        # TODO: calculate the public data products
+    return codicelo_data

imap_processing/ialirt/l0/process_hit.py CHANGED Viewed

@@ -161,13 +161,12 @@ def process_hit(xarray_data: xr.Dataset) -> list[dict]:
     Parameters
     ----------
-    xarray_data : dict(xr.Dataset)
-        Dictionary of xarray data including a single
-        set for processing.
+    xarray_data : xr.Dataset
+        Parsed data.
     Returns
     -------
-    hit_data : dict
+    hit_data : list[dict]
         Dictionary final data product.
     """
     hit_data = []
@@ -182,10 +181,11 @@ def process_hit(xarray_data: xr.Dataset) -> list[dict]:
         # Ensure no duplicates and all values from 0 to 59 are present
         if not np.array_equal(subcom_values, np.arange(60)):
-            raise ValueError(
+            logger.warning(
                 f"Group {group} does not contain all values from 0 to "
                 f"59 without duplicates."
             )
+            continue
         fast_rate_1 = grouped_data["hit_fast_rate_1"][
             (grouped_data["group"] == group).values

imap-processing 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

Potentially problematic release.

imap-processing 0.8.0py3-none-any.whl → 0.9.0py3-none-any.whl