PyPI - reboost - Versions diffs - 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

reboost 0.3.0py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

reboost/__init__.py +2 -2
reboost/_version.py +2 -2
reboost/build_glm.py +8 -2
reboost/build_hit.py +64 -55
reboost/build_tcm.py +1 -1
reboost/cli.py +10 -8
reboost/core.py +86 -16
reboost/hpge/psd.py +257 -0
reboost/hpge/surface.py +145 -1
reboost/iterator.py +119 -58
reboost/optmap/cli.py +7 -7
reboost/shape/group.py +1 -1
reboost/utils.py +51 -1
{reboost-0.3.0.dist-info → reboost-0.3.1.dist-info}/METADATA +1 -1
{reboost-0.3.0.dist-info → reboost-0.3.1.dist-info}/RECORD +19 -19
{reboost-0.3.0.dist-info → reboost-0.3.1.dist-info}/WHEEL +0 -0
{reboost-0.3.0.dist-info → reboost-0.3.1.dist-info}/entry_points.txt +0 -0
{reboost-0.3.0.dist-info → reboost-0.3.1.dist-info}/licenses/LICENSE +0 -0
{reboost-0.3.0.dist-info → reboost-0.3.1.dist-info}/top_level.txt +0 -0

reboost/hpge/psd.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import logging
+from math import erf, exp
 import awkward as ak
 import numba
@@ -236,3 +237,259 @@ def _drift_time_heuristic_impl(
         dt_heu[i] = max_id_metric
     return dt_heu
+@numba.njit(cache=True)
+def _vectorized_erf(x: ArrayLike) -> NDArray:
+    """Error function that can take in a numpy array."""
+    out = np.empty_like(x)
+    for i in range(x.size):
+        out[i] = erf(x[i])
+    return out
+@numba.njit(cache=True)
+def _current_pulse_model(
+    times: ArrayLike, Amax: float, mu: float, sigma: float, tail_fraction: float, tau: float
+) -> NDArray:
+    r"""Analytic model for the current pulse in a Germanium detector.
+    Consists of a Gaussian and an exponential tail:
+     .. math::
+       A(t) = A_{max}\times (1-p)\times \text{Gauss}(t,\mu,\sigma)+ A \times p (1-\text{Erf}((t-\mu)/sigma))\times
+        \frac{e^{(t/\tau)}}{2e^{\mu/\tau}}
+    Parameters
+    ----------
+    times
+        Array of times to compute current for
+    Amax
+        Maximum current
+    mu
+        Time of the maximum current.
+    sigma
+        Width of the current pulse
+    tail_fraction
+        Fraction of the tail in the pulse.
+    tau
+        Time constant of the low time tail.
+    Returns
+    -------
+    The predicted current waveform for this energy deposit.
+    """
+    norm = 2 * exp(mu / tau)
+    dx = times - mu
+    term1 = Amax * (1 - tail_fraction) * np.exp(-(dx * dx) / (2 * sigma * sigma))
+    term2 = Amax * tail_fraction * (1 - _vectorized_erf(dx / sigma)) * np.exp(times / tau) / norm
+    return term1 + term2
+def convolve_surface_response(surf_current: np.ndarray, bulk_pulse: np.ndarray) -> NDArray:
+    """Convolve the surface response pulse with the bulk current pulse.
+    This combines the current induced on the edge of the FCCD region with the bulk response
+    on the p+ contact.
+    Parameters
+    ----------
+    surf_current
+        array of the current induced via diffusion against time.
+    bulk_pulse
+        the pulse template to convolve the surface current with.
+    Returns
+    -------
+    the current waveform after convolution.
+    """
+    return np.convolve(surf_current, bulk_pulse, mode="full")[: len(surf_current)]
+@numba.njit(cache=True)
+def get_current_waveform(
+    edep: ak.Array,
+    drift_time: ak.Array,
+    template: ArrayLike,
+    start: float,
+    dt: float,
+    range_t: tuple,
+) -> tuple(NDArray, NDArray):
+    r"""Estimate the current waveform.
+    Based on modelling the current as a sum over the current pulse model defined by
+    the template.
+    .. math::
+        A(t) = \sum_i E_i \times N f(t,dt_i,\vector{\theta})
+    Where:
+        - $f(t)$ is the template
+        - $\vector{\theta}$ are the parameters (sigma, p, tau)
+        - $E_i$ and $dt_i$ are the deposited energy and drift time.
+        - N is a normalisation term
+    Parameters
+    ----------
+    edep
+        Array of energies for each step
+    drift_time
+        Array of drift times for each step
+    template
+        array of the template for the current waveforms, with 1 ns binning.
+    start
+        first time value of the template
+    dt
+        timestep (in ns) for the template.
+    range_t
+        a range of times to search around
+    Returns
+    -------
+    A tuple of the time and current for the current waveform for this event.
+    """
+    n = len(template)
+    times = np.arange(n) * dt + start
+    y = np.zeros_like(times)
+    for i in range(len(edep)):
+        E = edep[i]
+        mu = drift_time[i]
+        shift = int(mu / dt)
+        # Add scaled template starting at index `shift`
+        for j in range(n):
+            if (
+                (shift + j) >= n
+                or (times[shift + j] < range_t[0])
+                or (times[shift + j] > range_t[1])
+            ):
+                continue
+            y[shift + j] += E * template[j]
+    return times, y
+@numba.njit(cache=True)
+def _estimate_current_impl(
+    edep: ak.Array,
+    dt: ak.Array,
+    sigma: float,
+    tail_fraction: float,
+    tau: float,
+    mean_AoE: float = 0,
+) -> tuple[NDArray, NDArray]:
+    """Estimate the maximum current that would be measured in the HPGe detector.
+    This is based on extracting a waveform with :func:`get_current_waveform` and finding the maxima of it.
+    Parameters
+    ----------
+    edep
+        Array of energies for each step.
+    drift_time
+        Array of drift times for each step.
+    sigma
+        Sigma parameter of the current pulse model.
+    tail_fraction
+        Tail-fraction parameter of the current pulse.
+    tau
+        Tail parameter of the current pulse
+    mean_AoE
+        The mean AoE value for this detector (to normalise current pulses).
+    """
+    A = np.zeros(len(dt))
+    maximum_t = np.zeros(len(dt))
+    # get normalisation factor
+    x_coarse = np.linspace(-1000, 3000, 201)
+    x_fine = np.linspace(-1000, 3000, 4001)
+    # make a template with 1 ns binning so
+    # template[(i-start)/dt] = _current_pulse_model(x,1,i,...)
+    template_coarse = _current_pulse_model(x_coarse, 1, 0, sigma, tail_fraction, tau)
+    template_coarse /= np.max(template_coarse)
+    template_coarse *= mean_AoE
+    template_fine = _current_pulse_model(x_fine, 1, 0, sigma, tail_fraction, tau)
+    template_fine /= np.max(template_fine)
+    template_fine *= mean_AoE
+    for i in range(len(dt)):
+        t = np.asarray(dt[i])
+        e = np.asarray(edep[i])
+        # first pass
+        times_coarse, W = get_current_waveform(
+            e, t, template=template_coarse, start=-1000, dt=20, range_t=(-1000, 3000)
+        )
+        max_t = times_coarse[np.argmax(W)]
+        # fine scan
+        times, W = get_current_waveform(
+            e, t, template=template_fine, start=-1000, dt=1, range_t=(max_t - 50, max_t + 50)
+        )
+        A[i] = np.max(W)
+        maximum_t[i] = times[np.argmax(W)]
+    return A, maximum_t
+def maximum_current(
+    edep: ArrayLike,
+    drift_time: ArrayLike,
+    *,
+    sigma: float,
+    tail_fraction: float,
+    tau: float,
+    mean_AoE: float = 0,
+    get_timepoint: bool = False,
+) -> Array:
+    """Estimate the maximum current in the HPGe detector based on :func:`_estimate_current_impl`.
+    Parameters
+    ----------
+    edep
+        Array of energies for each step.
+    drift_time
+        Array of drift times for each step.
+    sigma
+        Sigma parameter of the current pulse model.
+    tail_fraction
+        Tail-fraction parameter of the current pulse.
+    tau
+        Tail parameter of the current pulse
+    mean_AoE
+        The mean AoE value for this detector (to normalise current pulses).
+    get_timepoint
+        Flag to return the time of the maximum current (relative to t0) instead of the current.
+    Returns
+    -------
+    An Array of the maximum current for each hit.
+    """
+    # extract LGDO data and units
+    drift_time, _ = units.unwrap_lgdo(drift_time)
+    edep, _ = units.unwrap_lgdo(edep)
+    curr, time = _estimate_current_impl(
+        ak.Array(edep),
+        ak.Array(drift_time),
+        sigma=sigma,
+        tail_fraction=tail_fraction,
+        tau=tau,
+        mean_AoE=mean_AoE,
+    )
+    # return
+    if get_timepoint:
+        return Array(time)
+    return Array(curr)

reboost/hpge/surface.py CHANGED Viewed

@@ -4,10 +4,12 @@ import logging
 import awkward as ak
 import legendhpges
+import numba
 import numpy as np
 from lgdo import VectorOfVectors
 from lgdo.types import LGDO
 from numpy.typing import ArrayLike
+from scipy import stats
 log = logging.getLogger(__name__)
@@ -104,4 +106,146 @@ def distance_to_surface(
             local_positions[indices], surface_indices=surface_indices
         )
-    return VectorOfVectors(ak.unflatten(distances, size))
+    return VectorOfVectors(ak.unflatten(distances, size), dtype=np.float32)
+@numba.njit(cache=True)
+def _advance_diffusion(
+    charge: np.ndarray,
+    factor: float,
+    recomb: float = 0,
+    recomb_depth: float = 600,
+    delta_x: float = 10,
+):
+    """Make a step of diffusion using explicit Euler scheme.
+    Parameters
+    ----------
+    charge
+        charge in each space bin up to the FCCD
+    factor
+        the factor of diffusion for the Euler scheme
+    recomb
+        the recomination probability.
+    recomb_depth
+        the depth of the recombination region.
+    delta_x
+        the width of each spatial bin.
+    Returns
+    -------
+    a tuple of the charge distribution at the next time step and the collected charge.
+    """
+    charge_xp1 = np.append(charge[1:], [0])
+    charge_xm1 = np.append([0], charge[:-1])
+    # collected charge
+    collected = factor * charge[-1]
+    # charge at the next step
+    charge_new = charge_xp1 * factor + charge_xm1 * factor + charge * (1 - 2 * factor)
+    # correction for recombination
+    charge_new[0 : int(recomb_depth / delta_x)] = (1 - recomb) * charge_new[
+        0 : int(recomb_depth / delta_x)
+    ]
+    return charge_new, collected
+@numba.njit(cache=True)
+def _compute_diffusion_impl(
+    init_charge: np.ndarray,
+    nsteps: int,
+    factor: float,
+    recomb: float = 0,
+    recomb_depth: float = 600,
+    delta_x: float = 10,
+):
+    """Compute the charge collected as a function of time.
+    Parameters
+    ----------
+    init_charge
+        Initial charge distribution.
+    nsteps
+        Number of time steps to take.
+    kwargs
+        Keyword arguments to pass to :func:`_advance_diffusion`
+    """
+    charge = init_charge
+    collected_charge = np.zeros(nsteps)
+    for i in range(nsteps):
+        charge, collected = _advance_diffusion(
+            charge, factor=factor, recomb=recomb, recomb_depth=recomb_depth, delta_x=delta_x
+        )
+        collected_charge[i] = collected
+    return collected_charge
+def get_surface_response(
+    fccd: float,
+    recomb_depth: float,
+    init: float = 0,
+    recomb: float = 0.002,
+    init_size: float = 0.0,
+    factor: float = 0.29,
+    nsteps: int = 10000,
+    delta_x: float = 10,
+):
+    """Extract the surface response current pulse based on diffusion.
+    Parameters
+    ----------
+    fccd
+        the full charge collection depth (in um)
+    recomb_depth
+        the depth of the recombination region (in um)
+    init
+        the initial position of the charge (in um)
+    recomb
+        the recombination rate
+    init_size
+        the initial size of the charge cloud (in um)
+    factor
+        the factor for the explicit Euler scheme (the probability of charge diffusuion)
+    nsteps
+        the number of time steps.
+    delta_x
+        the width of each position bin.
+    """
+    # number of position steps
+    nx = int(fccd / delta_x)
+    # initial charge
+    charge = np.zeros(nx)
+    # generate initial conditions
+    x = (fccd / nx) * np.arange(nx)
+    x_full = (fccd / nx) * np.arange(2 * nx)
+    # generate initial conditions
+    if init_size != 0:
+        charge = stats.norm.pdf(x, loc=init, scale=init_size)
+        charge_full = stats.norm.pdf(x_full, loc=init, scale=init_size)
+        charge_col = [(np.sum(charge_full) - np.sum(charge)) / np.sum(charge_full)]
+        charge = charge / np.sum(charge_full)
+    elif int(init * nx / fccd) < len(charge):
+        charge[int(init * nx / fccd)] = 1
+        charge_col = np.array([])
+    else:
+        charge_col = np.array([1])
+    # run the simulation
+    charge_collected = _compute_diffusion_impl(
+        charge,
+        nsteps=nsteps,
+        factor=factor,
+        recomb=recomb,
+        recomb_depth=recomb_depth,
+        delta_x=delta_x,
+    )
+    return np.cumsum(np.concatenate((charge_col, charge_collected)))

reboost/iterator.py CHANGED Viewed

@@ -8,7 +8,7 @@ import awkward as ak
 from lgdo.lh5 import LH5Store
 from lgdo.types import LGDO, Table
-from reboost import build_glm
+from . import build_glm
 log = logging.getLogger(__name__)
@@ -25,17 +25,28 @@ class GLMIterator:
         n_rows: int | None,
         *,
         stp_field: str = "stp",
-        read_vertices: bool = False,
         buffer: int = 10000,
         time_dict: dict | None = None,
+        reshaped_files: bool = False,
     ):
-        """Constructor for the glmIterator.
+        """Constructor for the GLMIterator.
+        The GLM iterator provides a way to iterate over the
+        simulated geant4 evtids, extracting the number of hits or steps for
+        each range in evtids. This ensures a single simulated event
+        is not split between two iterations and allows to specify a
+        start and an end evtid to extract.
+        In case the data is already reshaped and we do not need to
+        read a specific range of evtids this iterator is just loops
+        over the input stp field. Otherwise if the GLM file is not provided
+        this is created in memory.
         Parameters
         ----------
         glm_file
             the file containing the event lookup map, if `None` the glm will
-            be created in memory.
+            be created in memory if needed.
         stp_file
             the file containing the steps to read.
         lh5_group
@@ -46,12 +57,12 @@ class GLMIterator:
             the number of rows to read, if `None` read them all.
         stp_field
             name of the group.
-        read_vertices
-            whether to read also the vertices table.
         buffer
             the number of rows to read at once.
         time_dict
             time profiling data structure.
+        reshaped_files
+            flag for whether the files are reshaped.
         """
         # initialise
         self.glm_file = glm_file
@@ -62,18 +73,47 @@ class GLMIterator:
         self.n_rows = n_rows
         self.buffer = buffer
         self.current_i_entry = 0
-        self.read_vertices = read_vertices
         self.stp_field = stp_field
+        self.reshaped_files = reshaped_files
         # would be good to replace with an iterator
         self.sto = LH5Store()
         self.n_rows_read = 0
         self.time_dict = time_dict
         self.glm = None
+        self.use_glm = True
+        glm_n_rows = 0
-        # build the glm in memory
-        if self.glm_file is None:
-            self.glm = build_glm.build_glm(stp_file, None, out_table_name="glm", id_name="evtid")
+        # build the glm in memory if needed
+        if self.glm_file is None and (
+            (self.n_rows is not None) or (self.start_row != 0) or not reshaped_files
+        ):
+            if self.time_dict is not None:
+                time_start = time.time()
+            self.glm = build_glm.build_glm(
+                stp_file, None, out_table_name="glm", id_name="evtid", lh5_groups=[lh5_group]
+            )
+            if self.time_dict is not None:
+                self.time_dict.update_field("read/glm", time_start)
+            glm_n_rows = len(self.glm)
+        elif self.glm_file is None:
+            self.use_glm = False
+        else:
+            glm_n_rows = self.sto.read_n_rows(f"glm/{self.lh5_group}", self.glm_file)
+        # get the number of stp rows
+        stp_n_rows = self.sto.read_n_rows(f"{self.stp_field}/{self.lh5_group}", self.stp_file)
+        # heuristics for a good buffer length
+        if self.use_glm:
+            self.buffer = int(buffer * glm_n_rows / stp_n_rows)
+            msg = f"Number of stp rows {stp_n_rows}, number of glm rows {glm_n_rows} changing buffer from {buffer} to {self.buffer}"
+            log.info(msg)
     def __iter__(self) -> typing.Iterator:
         self.current_i_entry = 0
@@ -81,78 +121,99 @@ class GLMIterator:
         self.start_row_tmp = self.start_row
         return self
-    def __next__(self) -> tuple[LGDO, LGDO | None, int, int]:
+    def get_n_rows(self):
+        """Get the number of rows to read."""
         # get the number of rows to read
+        if self.time_dict is not None:
+            time_start = time.time()
         if self.n_rows is not None:
             rows_left = self.n_rows - self.n_rows_read
             n_rows = self.buffer if (self.buffer > rows_left) else rows_left
         else:
             n_rows = self.buffer
-        if self.time_dict is not None:
-            time_start = time.time()
+        glm_rows = None
+        start = 0
+        n = 0
-        # read the glm rows]
-        if self.glm_file is not None:
-            glm_rows, n_rows_read = self.sto.read(
-                f"/glm/{self.lh5_group}", self.glm_file, start_row=self.start_row_tmp, n_rows=n_rows
-            )
-        else:
-            # get the maximum row to read
-            max_row = self.start_row_tmp + n_rows
-            max_row = min(len(self.glm[self.lh5_group]), max_row)
+        if self.use_glm:
+            if self.glm_file is not None:
+                glm_rows, n_rows_read = self.sto.read(
+                    f"glm/{self.lh5_group}",
+                    self.glm_file,
+                    start_row=self.start_row_tmp,
+                    n_rows=n_rows,
+                )
+            else:
+                # get the maximum row to read
+                max_row = self.start_row_tmp + n_rows
+                max_row = min(len(self.glm[self.lh5_group]), max_row)
-            if max_row != self.start_row_tmp:
-                glm_rows = Table(self.glm[self.lh5_group][self.start_row_tmp : max_row])
+                if max_row != self.start_row_tmp:
+                    glm_rows = Table(self.glm[self.lh5_group][self.start_row_tmp : max_row])
-            n_rows_read = max_row - self.start_row_tmp
+                n_rows_read = max_row - self.start_row_tmp
-        if self.time_dict is not None:
-            self.time_dict.update_field("read/glm", time_start)
+            if self.time_dict is not None:
+                self.time_dict.update_field("read/glm", time_start)
-        self.n_rows_read += n_rows_read
-        self.start_row_tmp += n_rows_read
+            self.n_rows_read += n_rows_read
+            self.start_row_tmp += n_rows_read
-        if n_rows_read == 0:
-            raise StopIteration
+            # view our glm as an awkward array
+            if glm_rows is not None:
+                glm_ak = glm_rows.view_as("ak")
-        # view our glm as an awkward array
-        glm_ak = glm_rows.view_as("ak")
+                # remove empty rows
+                glm_ak = glm_ak[glm_ak.n_rows > 0]
-        # remove empty rows
-        glm_ak = glm_ak[glm_ak.n_rows > 0]
+                if len(glm_ak) > 0:
+                    # extract range of stp rows to read
+                    start = glm_ak.start_row[0]
+                    n = ak.sum(glm_ak.n_rows)
-        if len(glm_ak) > 0:
-            # extract range of stp rows to read
-            start = glm_ak.start_row[0]
-            n = ak.sum(glm_ak.n_rows)
+        else:
+            start = self.start_row_tmp
+            n = n_rows
+            n_rows_read = n
+            self.start_row_tmp += n
+        return start, n, n_rows_read
+    def __next__(self) -> tuple[LGDO, int, int]:
+        """Read one chunk.
+        Returns
+        -------
+        a tuple of:
+            - the steps
+            - the chunk index
+            - the number of steps read
+        """
+        # read the glm rows]
+        start, n, n_rows_read = self.get_n_rows()
-            if self.time_dict is not None:
-                time_start = time.time()
+        if self.time_dict is not None:
+            time_start = time.time()
+        try:
             stp_rows, n_steps = self.sto.read(
-                f"/{self.stp_field}/{self.lh5_group}",
+                f"{self.stp_field}/{self.lh5_group}",
                 self.stp_file,
                 start_row=int(start),
                 n_rows=int(n),
             )
+        except OverflowError:
+            raise StopIteration from None
-            # save time
-            if self.time_dict is not None:
-                self.time_dict.update_field("read/stp", time_start)
+        if n_rows_read == 0 or n_steps == 0:
+            raise StopIteration
-            self.current_i_entry += 1
+        # save time
+        if self.time_dict is not None:
+            self.time_dict.update_field("read/stp", time_start)
-            if self.read_vertices:
-                vert_rows, _ = self.sto.read(
-                    "/vtx",
-                    self.stp_file,
-                    start_row=self.start_row,
-                    n_rows=n_rows,
-                )
-            else:
-                vert_rows = None
-            # vertex table should have same structure as glm
+        self.current_i_entry += 1
-            return (stp_rows, vert_rows, self.current_i_entry - 1, n_steps)
-        return (None, None, self.current_i_entry, 0)
+        return (stp_rows, self.current_i_entry - 1, n_steps)

reboost 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

reboost 0.3.0py3-none-any.whl → 0.3.1py3-none-any.whl