PyPI - reboost - Versions diffs - 0.5.5__tar.gz → 0.6.0__tar.gz - Mend

reboost 0.5.5tar.gz → 0.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

{reboost-0.5.5 → reboost-0.6.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: reboost
-Version: 0.5.5
+Version: 0.6.0
 Summary: New LEGEND Monte-Carlo simulation post-processing
 Author-email: Manuel Huber <info@manuelhu.de>, Toby Dixon <toby.dixon.23@ucl.ac.uk>, Luigi Pertoldi <gipert@pm.me>
 Maintainer: The LEGEND Collaboration
@@ -696,6 +696,7 @@ Classifier: Topic :: Scientific/Engineering
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: hdf5plugin
 Requires-Dist: colorlog
 Requires-Dist: numpy
 Requires-Dist: scipy

{reboost-0.5.5 → reboost-0.6.0}/pyproject.toml RENAMED Viewed

@@ -32,6 +32,7 @@ classifiers = [
 ]
 requires-python = ">=3.9"
 dependencies = [
+    "hdf5plugin",
     "colorlog",
     "numpy",
     "scipy",
@@ -147,6 +148,7 @@ ignore = [
   "D213",     # Multi-line docstring summary should start at the first line
   "D401",     # Summary does not need to be in imperative mood
   "D413",     # No blank line after last section in docstring
+  "PLC0415",  # we sometimes use relative imports for performance reasons
   "PLC2401",  # We like non-ASCII characters for math
 ]
 isort.required-imports = ["from __future__ import annotations"]

{reboost-0.5.5 → reboost-0.6.0}/src/reboost/__init__.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import hdf5plugin
 from lgdo import lh5
 from ._version import version as __version__
@@ -10,4 +11,4 @@ __all__ = [
     "build_hit",
 ]
-lh5.settings.DEFAULT_HDF5_SETTINGS = {"shuffle": True, "compression": "lzf"}
+lh5.settings.DEFAULT_HDF5_SETTINGS = {"compression": hdf5plugin.Zstd()}

{reboost-0.5.5 → reboost-0.6.0}/src/reboost/_version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.5.5'
-__version_tuple__ = version_tuple = (0, 5, 5)
+__version__ = version = '0.6.0'
+__version_tuple__ = version_tuple = (0, 6, 0)

reboost-0.6.0/src/reboost/build_evt.py ADDED Viewed

@@ -0,0 +1,134 @@
+from __future__ import annotations
+import logging
+import awkward as ak
+import numpy as np
+from dbetto import AttrsDict
+from lgdo import Array, Table, VectorOfVectors, lh5
+from . import core, math, shape, utils
+from .shape import group
+log = logging.getLogger(__name__)
+def build_evt(
+    tcm: VectorOfVectors,
+    hitfile: str,
+    outfile: str | None,
+    channel_groups: AttrsDict,
+    pars: AttrsDict,
+    run_part: AttrsDict,
+) -> Table | None:
+    """Build events out of a TCM.
+    Parameters
+    ----------
+    tcm
+        the time coincidence map.
+    hitfile
+        file with the hits.
+    outfile
+        the path to the output-file, if `None` with return
+        the events in memory.
+    channel_groups
+        a dictionary of groups of channels. For example:
+        .. code-block:: python
+            {"det1": "on", "det2": "off", "det3": "ac"}
+    pars
+        A dictionary of parameters. The first key should
+        be the run ID, followed by different sets of parameters
+        arranged in groups. Run numbers should be given in the
+        format `"p00-r001"`, etc.
+        For example:
+        .. code-block:: python
+            {"p03-r000": {"reso": {"det1": [1, 2], "det2": [0, 1]}}}
+    run_part
+        The run partitioning file giving the number of events
+        for each run. This should be organized as a dictionary
+        with the following format:
+        .. code-block:: python
+            {"p03-r000": 1000, "p03-r001": 2000}
+    Returns
+    -------
+    the event file in memory as a table if no output file is specified.
+    """
+    tcm_tables = utils.get_table_names(tcm)
+    tcm_ak = tcm.view_as("ak")
+    # loop over the runs
+    cum_sum = 0
+    tab = None
+    for idx, (run_full, n_event) in enumerate(run_part.items()):
+        period, run = run_full.split("-")
+        pars_tmp = pars[run_full]
+        # create an output table
+        out_tab = Table(size=n_event)
+        tcm_tmp = tcm_ak[cum_sum : cum_sum + n_event]
+        # usabilities
+        is_off = shape.group.get_isin_group(
+            tcm_tmp.table_key, channel_groups, tcm_tables, group="off"
+        )
+        # filter out off channels
+        channels = tcm_tmp.table_key[~is_off]
+        rows = tcm_tmp.row_in_table[~is_off]
+        out_tab.add_field("channel", VectorOfVectors(channels))
+        out_tab.add_field("row_in_table", VectorOfVectors(rows))
+        out_tab.add_field("period", Array(np.ones(len(channels)) * int(period[1:])))
+        out_tab.add_field("run", Array(np.ones(len(channels)) * int(run[1:])))
+        # now check for channels in ac
+        is_good = group.get_isin_group(channels, channel_groups, tcm_tables, group="on")
+        # get energy
+        energy_true = core.read_data_at_channel_as_ak(
+            channels, rows, hitfile, "energy", "hit", tcm_tables
+        )
+        energy = math.stats.apply_energy_resolution(
+            energy_true,
+            channels,
+            tcm_tables,
+            pars_tmp.reso,
+            lambda energy, sig0, sig1: np.sqrt(energy * sig1**2 + sig0**2),
+        )
+        out_tab.add_field("is_good", VectorOfVectors(is_good[energy > 25]))
+        out_tab.add_field("energy", VectorOfVectors(energy[energy > 25]))
+        out_tab.add_field("multiplicity", Array(ak.sum(energy > 25, axis=-1).to_numpy()))
+        # write table
+        wo_mode = "of" if idx == 0 else "append"
+        # add attrs
+        out_tab.attrs["tables"] = tcm.attrs["tables"]
+        if outfile is not None:
+            lh5.write(out_tab, "evt", outfile, wo_mode=wo_mode)
+        else:
+            tab = (
+                ak.concatenate((tab, out_tab.view_as("ak")))
+                if tab is not None
+                else out_tab.view_as("ak")
+            )
+    return Table(tab)

{reboost-0.5.5 → reboost-0.6.0}/src/reboost/core.py RENAMED Viewed

@@ -5,7 +5,9 @@ import time
 from typing import Any
 import awkward as ak
+import numpy as np
 from dbetto import AttrsDict
+from lgdo import lh5
 from lgdo.types import LGDO, Table
 from . import utils
@@ -14,6 +16,78 @@ from .profile import ProfileDict
 log = logging.getLogger(__name__)
+def read_data_at_channel_as_ak(
+    channels: ak.Array, rows: ak.Array, file: str, field: str, group: str, tab_map: dict[int, str]
+) -> ak.Array:
+    r"""Read the data from a particular field to an awkward array. This replaces the TCM like object defined by the channels and rows with the corresponding data field.
+    Parameters
+    ----------
+    channels
+        Array of the channel indices (uids).
+    rows
+        Array of the rows in the files to gather data from.
+    file
+        File to read the data from.
+    field
+        the field to read.
+    group
+        the group to read data from (eg. `hit` or `stp`.)
+    tab_map
+        mapping between indices and table names. Of the form:
+        .. code:: python
+            {NAME: UID}
+        For example:
+        .. code:: python
+            {"det001": 1, "det002": 2}
+    Returns
+    -------
+    an array with the data, of the same same as the channels and rows.
+    """
+    # initialise the output
+    data_flat = None
+    tcm_rows_full = None
+    # save the unflattening
+    reorder = ak.num(rows)
+    for tab_name, key in tab_map.items():
+        # get the rows to read
+        idx = ak.flatten(rows[channels == key]).to_numpy()
+        arg_idx = np.argsort(idx)
+        # get the rows in the flattened data we want to append to
+        tcm_rows = np.where(ak.flatten(channels == key))[0]
+        # read the data with sorted idx
+        data_ch = lh5.read(f"{group}/{tab_name}/{field}", file, idx=idx[arg_idx]).view_as("ak")
+        # sort back to order for tcm
+        data_ch = data_ch[np.argsort(arg_idx)]
+        # append to output
+        data_flat = ak.concatenate((data_flat, data_ch)) if data_flat is not None else data_ch
+        tcm_rows_full = (
+            np.concatenate((tcm_rows_full, tcm_rows)) if tcm_rows_full is not None else tcm_rows
+        )
+    if len(data_flat) != len(tcm_rows_full):
+        msg = "every index in the tcm should have been read"
+        raise ValueError(msg)
+    # sort the final data
+    data_flat = data_flat[np.argsort(tcm_rows_full)]
+    return ak.unflatten(data_flat, reorder)
 def evaluate_output_column(
     hit_table: Table,
     expression: str,

reboost-0.6.0/src/reboost/math/stats.py ADDED Viewed

@@ -0,0 +1,119 @@
+from __future__ import annotations
+import logging
+from typing import Callable
+import awkward as ak
+import numpy as np
+from lgdo import Array
+from numpy.typing import ArrayLike
+log = logging.getLogger(__name__)
+def get_resolution(
+    energies: ak.Array, channels: ak.Array, tcm_tables: dict, reso_pars: dict, reso_func: Callable
+) -> ak.Array:
+    """Get the resolution for each energy.
+    Parameters
+    ----------
+    energies
+        the energies to smear
+    channels
+        the channel index for each energy
+    tcm_tables
+        the mapping from indices to channel names.
+    reso_pars
+        the pars for each channel.
+    reso_func
+        the function to compute the resolution.
+    """
+    n_pars = len(reso_pars[next(iter(reso_pars))])
+    pars_shaped = []
+    for _ in range(n_pars):
+        pars_shaped.append(np.zeros(len(ak.flatten(channels))))
+    num = ak.num(channels, axis=-1)
+    for key, value in tcm_tables.items():
+        for i in range(n_pars):
+            pars_shaped[i][ak.flatten(channels) == value] = reso_pars[key][i]
+    ch_reso = reso_func(ak.flatten(energies), *pars_shaped)
+    return ak.unflatten(ch_reso, num)
+def apply_energy_resolution(
+    energies: ak.Array, channels: ak.Array, tcm_tables: dict, reso_pars: dict, reso_func: Callable
+):
+    """Apply the energy resolution sampling to an array with many channels.
+    Parameters
+    ----------
+    energies
+        the energies to smear
+    channels
+        the channel index for each energy
+    tcm_tables
+        the mapping from indices to channel names.
+    reso_pars
+        the pars for each channel.
+    reso_func
+        the function to compute the resolution.
+    """
+    num = ak.num(channels, axis=-1)
+    ch_reso = get_resolution(energies, channels, tcm_tables, reso_pars, reso_func)
+    energies_flat_smear = gaussian_sample(ak.flatten(energies), ak.flatten(ch_reso))
+    return ak.unflatten(energies_flat_smear, num)
+def gaussian_sample(mu: ArrayLike, sigma: ArrayLike | float, *, seed: int | None = None) -> Array:
+    r"""Generate samples from a gaussian.
+    Based on:
+    .. math::
+        y_i \sim \mathcal{N}(\mu_i,\sigma_i)
+    where $y_i$ is the output, $x_i$ the input (mu) and $\sigma$ is the standard
+    deviation for each point.
+    Parameters
+    ----------
+    mu
+        the mean positions to sample from, should be a flat (ArrayLike) object.
+    sigma
+        the standard deviation for each input value, can also be a single float.
+    seed
+        the random seed.
+    Returns
+    -------
+    sampled values.
+    """
+    # convert inputs
+    if isinstance(mu, Array):
+        mu = mu.view_as("np")
+    elif isinstance(mu, ak.Array):
+        mu = mu.to_numpy()
+    elif not isinstance(mu, np.ndarray):
+        mu = np.array(mu)
+    # similar for sigma
+    if isinstance(sigma, Array):
+        sigma = sigma.view_as("np")
+    elif isinstance(sigma, ak.Array):
+        sigma = sigma.to_numpy()
+    elif not isinstance(sigma, (float, int, np.ndarray)):
+        sigma = np.array(sigma)
+    rng = np.random.default_rng(seed=seed)  # Create a random number generator
+    return Array(rng.normal(loc=mu, scale=sigma))

{reboost-0.5.5 → reboost-0.6.0}/src/reboost/shape/group.py RENAMED Viewed

@@ -4,11 +4,49 @@ import logging
 import awkward as ak
 import numpy as np
+from dbetto import AttrsDict
 from lgdo import Table, VectorOfVectors
+from numpy.typing import ArrayLike
 log = logging.getLogger(__name__)
+def isin(channels: ak.Array, chan_list: list):
+    """Check if each element of the awkward array channels is in the channel list."""
+    num_channels = ak.num(channels, axis=-1)
+    channels_flat = ak.flatten(channels)
+    isin = np.isin(channels_flat, chan_list)
+    # unflatten
+    return ak.unflatten(isin, num_channels)
+def get_isin_group(
+    channels: ArrayLike, groups: AttrsDict, tcm_tables: dict, group: str = "off"
+) -> ak.Array:
+    """For each channel check if it is in the group.
+    Parameters
+    ----------
+    channels
+        Array of the channel indices.
+    groups
+        A mapping of the group for every channel name.
+    tcm_tables
+        the mapping of indices to table names
+    group
+        the group to select.
+    Returns
+    -------
+    an awkward array of the same shape of channels of booleans.
+    """
+    usability = {uid: groups[name] for name, uid in tcm_tables.items()}
+    group_idx = [key for key, item in usability.items() if item == group]
+    return isin(channels, group_idx)
 def _sort_data(obj: ak.Array, *, time_name: str = "time", evtid_name: str = "evtid") -> ak.Array:
     """Sort the data by evtid then time.

{reboost-0.5.5 → reboost-0.6.0}/src/reboost/utils.py RENAMED Viewed

@@ -18,6 +18,16 @@ from .profile import ProfileDict
 log = logging.getLogger(__name__)
+def get_table_names(tcm: VectorOfVectors) -> dict:
+    """Extract table names from tcm.attrs['tables'] and return them as a dictionary."""
+    raw = tcm.attrs["tables"]
+    cleaned = raw.strip("[]").replace(" ", "").replace("'", "")
+    tables = cleaned.split(",")
+    tables = [tab.split("/")[-1] for tab in tables]
+    return {name: idx for idx, name in enumerate(tables)}
 def get_wo_mode(
     group: int, out_det: int, in_det: int, chunk: int, new_hit_file: bool, overwrite: bool = False
 ) -> str:

{reboost-0.5.5 → reboost-0.6.0}/src/reboost.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: reboost
-Version: 0.5.5
+Version: 0.6.0
 Summary: New LEGEND Monte-Carlo simulation post-processing
 Author-email: Manuel Huber <info@manuelhu.de>, Toby Dixon <toby.dixon.23@ucl.ac.uk>, Luigi Pertoldi <gipert@pm.me>
 Maintainer: The LEGEND Collaboration
@@ -696,6 +696,7 @@ Classifier: Topic :: Scientific/Engineering
 Requires-Python: >=3.9
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: hdf5plugin
 Requires-Dist: colorlog
 Requires-Dist: numpy
 Requires-Dist: scipy

{reboost-0.5.5 → reboost-0.6.0}/src/reboost.egg-info/SOURCES.txt RENAMED Viewed

@@ -48,6 +48,7 @@ tests/test_profile.py
 tests/test_shape.py
 tests/test_units.py
 tests/test_utils.py
+tests/evt/test_evt.py
 tests/glm/test_build_glm.py
 tests/hit/test_build_hit.py
 tests/hit/configs/args.yaml

{reboost-0.5.5 → reboost-0.6.0}/src/reboost.egg-info/requires.txt RENAMED Viewed

@@ -1,3 +1,4 @@
+hdf5plugin
 colorlog
 numpy
 scipy

reboost-0.6.0/tests/evt/test_evt.py ADDED Viewed

@@ -0,0 +1,60 @@
+from __future__ import annotations
+import awkward as ak
+import pytest
+from dbetto import AttrsDict
+from lgdo import Array, Struct, Table, VectorOfVectors, lh5
+from reboost.build_evt import build_evt
+@pytest.fixture(scope="module")
+def test_gen_lh5(tmptestdir):
+    # write a basic lh5 file
+    hit_path = str(tmptestdir / "basic_hit.lh5")
+    data_ch1 = {}
+    data_ch1["energy"] = Array([100, 200, 300])
+    tab1 = Table(data_ch1)
+    data_ch2 = {}
+    data_ch2["energy"] = Array([2615, 2042, 100, 500])
+    tab2 = Table(data_ch2)
+    lh5.write(Struct({"det1": tab1}), "hit", hit_path, wo_mode="of")
+    lh5.write(Struct({"det2": tab2}), "hit", hit_path, wo_mode="append_column")
+    # now make a TCM
+    channels = ak.Array([[0], [1], [1], [0, 1], [1, 0]])
+    rows = ak.Array([[0], [0], [1], [1, 2], [3, 2]])
+    tcm = Table(
+        {"table_key": VectorOfVectors(channels), "row_in_table": VectorOfVectors(rows)},
+        attrs={"tables": "['stp/det1','stp/det2']"},
+    )
+    return hit_path, tcm
+def test_basic(test_gen_lh5):
+    ch_groups = AttrsDict({"det1": "on", "det2": "off"})
+    pars = AttrsDict(
+        {
+            "p01-r001": {"reso": {"det1": [1, 0.1], "det2": [2, 0.2]}},
+            "p01-r002": {"reso": {"det1": [1, 0.2], "det2": [1, 0.2]}},
+        }
+    )
+    run_part = AttrsDict({"p01-r001": 2, "p01-r002": 3})
+    evts = build_evt(
+        test_gen_lh5[1],
+        hitfile=test_gen_lh5[0],
+        outfile=None,
+        channel_groups=ch_groups,
+        pars=pars,
+        run_part=run_part,
+    )
+    assert isinstance(evts, Table)
+    print(evts)

{reboost-0.5.5 → reboost-0.6.0}/tests/hit/test_build_hit.py RENAMED Viewed

@@ -122,8 +122,10 @@ def test_basic(test_gen_lh5, tmptestdir):
     assert lh5.ls(outfile) == ["hit", "vtx"]
     with h5py.File(outfile) as h5f:
-        assert h5f["/hit/det1/energy"].shuffle is True
-        assert h5f["/hit/det1/energy"].compression == "lzf"
+        assert (
+            h5f["/hit/det1/energy"].id.get_create_plist().get_filter(0)[3]
+            == b"Zstandard compression: http://www.zstd.net"
+        )
     hits = lh5.read("hit/det1", outfile).view_as("ak")

{reboost-0.5.5 → reboost-0.6.0}/tests/hpge/test_dt_heuristic.py RENAMED Viewed

@@ -100,7 +100,7 @@ def dt_map_dummy(legendtestdata):
     data = lh5.read("V99000A", legendtestdata["lh5/hpge-drift-time-maps.lh5"])
     data = AttrsDict({k: data[k].view_as("np", with_units=True) for k in ("r", "z", "drift_time")})
-    nan_idx = np.where(data.drift_time.m == np.nan)
+    nan_idx = np.isnan(data.drift_time.m)
     dt_dummy_z = np.arange(0.1, 2, step=0.023)
     drift_time = np.tile(dt_dummy_z, 38).reshape((38, 83))

{reboost-0.5.5 → reboost-0.6.0}/tests/test_core.py RENAMED Viewed

@@ -11,7 +11,7 @@ import pygeomtools
 import pytest
 from dbetto import AttrsDict
 from legendtestdata import LegendTestData
-from lgdo import Array, Table
+from lgdo import Array, Struct, Table, VectorOfVectors, lh5
 import reboost
@@ -54,6 +54,58 @@ def make_gdml(test_data_configs):
     return f"{test_data_configs}/geometry.gdml"
+@pytest.fixture(scope="module")
+def hitfiles(tmptestdir):
+    # make some hit tier files
+    channel1 = Table(
+        {
+            "energy": Array([100, 200, 400, 300]),
+            "times": VectorOfVectors([[0.1], [0.2, 0.3], [0.4, 98], [2]]),
+        }
+    )
+    channel2 = Table(
+        {
+            "energy": Array([10, 70, 0, 56, 400, 400]),
+            "times": VectorOfVectors([[12], [], [-0.4, 0.4], [89], [1], [2]]),
+        }
+    )
+    lh5.write(Struct({"det001": channel1}), "hit", f"{tmptestdir}/hit_file_test.lh5", wo_mode="of")
+    lh5.write(
+        Struct({"det002": channel2}),
+        "hit",
+        f"{tmptestdir}/hit_file_test.lh5",
+        wo_mode="append_column",
+    )
+    return channel1.view_as("ak"), channel2.view_as("ak"), f"{tmptestdir}/hit_file_test.lh5"
+def test_read_data_at_channel(hitfiles):
+    # make a TCM
+    tcm_channels = ak.Array([[0], [0], [0, 1], [1], [1], [0, 1], [1], [1]])
+    tcm_rows = ak.Array([[0], [1], [2, 0], [1], [2], [3, 3], [4], [5]])
+    energy = reboost.core.read_data_at_channel_as_ak(
+        tcm_channels, tcm_rows, hitfiles[2], "energy", "hit", {"det001": 0, "det002": 1}
+    )
+    # check the same
+    assert len(energy) == len(tcm_channels)
+    assert ak.all(ak.num(energy, axis=-1) == ak.num(tcm_channels, axis=-1))
+    # check the data itself
+    assert energy[0] == hitfiles[0].energy[0]
+    assert energy[1] == hitfiles[0].energy[1]
+    assert ak.all(energy[2] == [hitfiles[0].energy[2], hitfiles[1].energy[0]])
+    # also check for VoV
+    times = reboost.core.read_data_at_channel_as_ak(
+        tcm_channels, tcm_rows, hitfiles[2], "times", "hit", {"det001": 0, "det002": 1}
+    )
+    assert len(times) == len(tcm_channels)
 def test_get_objects(test_data_configs, make_gdml):
     # check basic eval
     expression = "pyg4ometry.geant4.Registry()"

{reboost-0.5.5 → reboost-0.6.0}/tests/test_math.py RENAMED Viewed

@@ -98,3 +98,28 @@ def test_sample():
     # sigma float
     samples = stats.gaussian_sample([1, 2, 3], 0.1)
     assert isinstance(samples, Array)
+def test_energy_res():
+    energy = ak.Array([[100, 100], [200], [300, 100, 100]])
+    channels = ak.Array([[0, 1], [1], [2, 0, 1]])
+    tcm_tables = {"det000": 0, "det001": 1, "det002": 2}
+    reso_pars = {"det000": [1, 0], "det001": [1, 0.01], "det002": [2, 0.05]}
+    def reso_func(energy, p0, p1):
+        return np.sqrt(energy * p1 + p0)
+    reso = stats.get_resolution(energy, channels, tcm_tables, reso_pars, reso_func)
+    assert len(reso) == len(energy)
+    assert ak.all(ak.num(reso, axis=-1) == ak.num(energy, axis=-1))
+    # test a few values
+    assert reso[0][0] == np.sqrt(100 * 0 + 1)
+    assert reso[0][1] == np.sqrt(100 * 0.01 + 1)
+    smeared = stats.apply_energy_resolution(energy, channels, tcm_tables, reso_pars, reso_func)
+    assert len(smeared) == len(energy)
+    assert ak.all(ak.num(smeared, axis=-1) == ak.num(energy, axis=-1))

{reboost-0.5.5 → reboost-0.6.0}/tests/test_shape.py RENAMED Viewed

@@ -93,6 +93,26 @@ def test_time_group():
     )
+def test_isin_group():
+    channels = ak.Array([[1, 2, 3], [4, 5]])
+    chan_list = [1, 2]
+    assert ak.all(group.isin(channels, chan_list) == ak.Array([[1, 1, 0], [0, 0]]))
+    chan_list = [4]
+    assert ak.all(group.isin(channels, chan_list) == ak.Array([[0, 0, 0], [1, 0]]))
+    tcm_tables = {"det001": 1, "det002": 2, "det003": 3}
+    channels = ak.Array([[1, 2], [1], [3]])
+    groups = {"det001": "on", "det002": "on", "det003": "off"}
+    off = group.get_isin_group(channels, groups, tcm_tables, group="off")
+    assert ak.all(off == ak.Array([[0, 0], [0], [1]]))
+    on = group.get_isin_group(channels, groups, tcm_tables, group="on")
+    assert ak.all(on == ak.Array([[1, 1], [1], [0]]))
 def test_cluster_basic():
     trackid = ak.Array([[1, 1, 1, 2, 2, 3, 3, 7], [2, 2, 2, 3, 3, 3], [1]])

{reboost-0.5.5 → reboost-0.6.0}/tests/test_utils.py RENAMED Viewed

@@ -6,7 +6,7 @@ from pathlib import Path
 import pytest
 import yaml
-from lgdo.types import Array, Table
+from lgdo.types import Array, Table, VectorOfVectors
 import reboost
 from reboost.shape import group
@@ -15,6 +15,7 @@ from reboost.utils import (
     copy_units,
     get_file_dict,
     get_function_string,
+    get_table_names,
     get_wo_mode,
     merge_dicts,
 )
@@ -216,5 +217,11 @@ def test_units():
     assert reshaped.c.attrs["units"] == "keV"
-def test_get_channels():
-    pass
+def test_table_names():
+    names = "['hit/det001','hit/det002']"
+    tcm = VectorOfVectors([[]], attrs={"tables": names})
+    table_names = get_table_names(tcm)
+    assert table_names["det001"] == 0
+    assert table_names["det002"] == 1

reboost-0.5.5/src/reboost/build_evt.py DELETED Viewed

@@ -1,166 +0,0 @@
-"""A program for combining the hits from various detectors, to build events.
-Is able to parse a config file with the following format config file:
-.. code-block:: yaml
-    channels:
-        geds_on:
-        - det001
-        - det002
-        geds_ac:
-        - det003
-    outputs:
-    - energy
-    - multiplicity
-    operations:
-     energy_id:
-        channels: geds_on
-        aggregation_mode: gather
-        query: "hit.energy > 25"
-        expression: tcm.channel_id
-     energy:
-        aggregation_mode: keep_at_ch:evt.energy_id
-        expression: "hit.energy > 25"
-        channels: geds_on
-     multiplicity:
-        channels: geds_on
-        aggregation_mode: sum
-        expression: "hit.energy > 25"
-        initial: 0
-Must contain:
-- "channels": dictionary of channel groupings
-- "outputs": fields for the output file
-- "operations": operations to perform see :func:`pygama.evt.build_evt.evaluate_expression` for more details.
-"""
-from __future__ import annotations
-import logging
-import awkward as ak
-import numpy as np
-from lgdo import Table
-from lgdo.lh5 import LH5Iterator, write
-from pygama.evt.build_evt import evaluate_expression
-from pygama.evt.utils import TCMData
-from . import utils
-log = logging.getLogger(__name__)
-def build_evt(
-    hit_file: str, tcm_file: str, evt_file: str | None, config: dict, buffer: int = int(5e6)
-) -> ak.Array | None:
-    """Generates the event tier from the hit and tcm.
-    Parameters
-    ----------
-    hit_file
-        path to the hit tier file
-    tcm_file
-        path to the tcm tier file
-    evt_file
-        path to the evt tier (output) file, if `None` the :class:`Table` is returned in memory
-    config
-        dictionary of the configuration.
-    buffer
-        number of events to process simultaneously
-    Returns
-    -------
-    ak.Array of the evt tier data (if the data is not saved to disk)
-    """
-    # create the objects needed for evaluate expression
-    file_info = {
-        "hit": (hit_file, "hit", "det{:03}"),
-        "evt": (evt_file, "evt"),
-    }
-    # iterate through the TCM
-    out_ak = ak.Array([])
-    mode = "overwrite_file"
-    # get channel groupings
-    channels = {}
-    for group, info in config["channels"].items():
-        if isinstance(info, str):
-            channels[group] = [info]
-        elif isinstance(info, list):
-            channels[group] = info
-    for tcm_lh5 in LH5Iterator(tcm_file, "tcm", buffer_len=buffer):
-        tcm_lh5_sel = tcm_lh5
-        tcm_ak = tcm_lh5_sel.view_as("ak")
-        tcm = TCMData(
-            id=np.array(ak.flatten(tcm_ak.array_id)),
-            idx=np.array(ak.flatten(tcm_ak.array_idx)),
-            cumulative_length=np.array(np.cumsum(ak.num(tcm_ak.array_id, axis=-1))),
-        )
-        n_rows = len(tcm.cumulative_length)
-        out_tab = Table(size=n_rows)
-        for name, info in config["operations"].items():
-            msg = f"computing field {name}"
-            log.debug(msg)
-            defaultv = info.get("initial", np.nan)
-            if isinstance(defaultv, str) and (defaultv in ["np.nan", "np.inf", "-np.inf"]):
-                defaultv = eval(defaultv)
-            channels_use = utils.get_channels_from_groups(info.get("channels", []), channels)
-            channels_exclude = utils.get_channels_from_groups(
-                info.get("exclude_channels", []), channels
-            )
-            if "aggregation_mode" not in info:
-                field = out_tab.eval(
-                    info["expression"].replace("evt.", ""), info.get("parameters", {})
-                )
-            else:
-                field = evaluate_expression(
-                    file_info,
-                    tcm,
-                    channels_use,
-                    table=out_tab,
-                    mode=info["aggregation_mode"],
-                    expr=info["expression"],
-                    query=info.get("query", None),
-                    sorter=info.get("sort", None),
-                    channels_skip=channels_exclude,
-                    default_value=defaultv,
-                    n_rows=n_rows,
-                )
-            msg = f"field {field}"
-            log.debug(msg)
-            out_tab.add_field(name, field)
-        # remove fields if necessary
-        existing_cols = list(out_tab.keys())
-        for col in existing_cols:
-            if col not in config["outputs"]:
-                out_tab.remove_column(col, delete=True)
-        # write
-        if evt_file is not None:
-            write(out_tab, "evt", evt_file, wo_mode=mode)
-            mode = "append"
-        else:
-            out_ak = ak.concatenate((out_ak, out_tab.view_as("ak")))
-    if evt_file is None:
-        return out_ak
-    return None

reboost-0.5.5/src/reboost/math/stats.py DELETED Viewed

@@ -1,57 +0,0 @@
-from __future__ import annotations
-import logging
-import awkward as ak
-import numpy as np
-from lgdo import Array
-from numpy.typing import ArrayLike
-log = logging.getLogger(__name__)
-def gaussian_sample(mu: ArrayLike, sigma: ArrayLike | float, *, seed: int = 999) -> Array:
-    r"""Generate samples from a gaussian.
-    Based on:
-    .. math::
-        y_i \sim \mathcal{N}(\mu_i,\sigma_i)
-    where $y_i$ is the output, $x_i$ the input (mu) and $\sigma$ is the standard
-    deviation for each point.
-    Parameters
-    ----------
-    mu
-        the mean positions to sample from, should be a flat (ArrayLike) object.
-    sigma
-        the standard deviation for each input value, can also be a single float.
-    seed
-        the random seed.
-    Returns
-    -------
-    sampled values.
-    """
-    # convert inputs
-    if isinstance(mu, Array):
-        mu = mu.view_as("np")
-    elif isinstance(mu, ak.Array):
-        mu = mu.to_numpy()
-    elif not isinstance(mu, np.ndarray):
-        mu = np.array(mu)
-    # similar for sigma
-    if isinstance(sigma, Array):
-        sigma = sigma.view_as("np")
-    elif isinstance(sigma, ak.Array):
-        sigma = sigma.to_numpy()
-    elif not isinstance(sigma, (float, int, np.ndarray)):
-        sigma = np.array(sigma)
-    rng = np.random.default_rng(seed=seed)  # Create a random number generator
-    return Array(rng.normal(loc=mu, scale=sigma))