reboost 0.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reboost/__init__.py +14 -0
- reboost/_version.py +34 -0
- reboost/build_evt.py +134 -0
- reboost/build_glm.py +305 -0
- reboost/build_hit.py +466 -0
- reboost/cli.py +194 -0
- reboost/core.py +526 -0
- reboost/daq/__init__.py +5 -0
- reboost/daq/core.py +262 -0
- reboost/daq/utils.py +28 -0
- reboost/hpge/__init__.py +0 -0
- reboost/hpge/psd.py +847 -0
- reboost/hpge/surface.py +284 -0
- reboost/hpge/utils.py +79 -0
- reboost/iterator.py +226 -0
- reboost/log_utils.py +29 -0
- reboost/math/__init__.py +0 -0
- reboost/math/functions.py +175 -0
- reboost/math/stats.py +119 -0
- reboost/optmap/__init__.py +5 -0
- reboost/optmap/cli.py +246 -0
- reboost/optmap/convolve.py +325 -0
- reboost/optmap/create.py +423 -0
- reboost/optmap/evt.py +141 -0
- reboost/optmap/mapview.py +208 -0
- reboost/optmap/numba_pdg.py +26 -0
- reboost/optmap/optmap.py +328 -0
- reboost/profile.py +82 -0
- reboost/shape/__init__.py +0 -0
- reboost/shape/cluster.py +260 -0
- reboost/shape/group.py +189 -0
- reboost/shape/reduction.py +0 -0
- reboost/spms/__init__.py +5 -0
- reboost/spms/pe.py +178 -0
- reboost/units.py +107 -0
- reboost/utils.py +503 -0
- reboost-0.8.3.dist-info/METADATA +82 -0
- reboost-0.8.3.dist-info/RECORD +42 -0
- reboost-0.8.3.dist-info/WHEEL +5 -0
- reboost-0.8.3.dist-info/entry_points.txt +3 -0
- reboost-0.8.3.dist-info/licenses/LICENSE +674 -0
- reboost-0.8.3.dist-info/top_level.txt +1 -0
reboost/hpge/surface.py
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
import awkward as ak
|
|
6
|
+
import numba
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pygeomhpges
|
|
9
|
+
from lgdo import VectorOfVectors
|
|
10
|
+
from lgdo.types import LGDO
|
|
11
|
+
from numpy.typing import ArrayLike
|
|
12
|
+
from scipy import stats
|
|
13
|
+
|
|
14
|
+
log = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def distance_to_surface(
|
|
18
|
+
positions_x: VectorOfVectors,
|
|
19
|
+
positions_y: VectorOfVectors,
|
|
20
|
+
positions_z: VectorOfVectors,
|
|
21
|
+
hpge: pygeomhpges.base.HPGe,
|
|
22
|
+
det_pos: ArrayLike,
|
|
23
|
+
*,
|
|
24
|
+
surface_type: str | None = None,
|
|
25
|
+
unit: str = "mm",
|
|
26
|
+
distances_precompute: VectorOfVectors | None = None,
|
|
27
|
+
precompute_cutoff: float | None = None,
|
|
28
|
+
) -> VectorOfVectors:
|
|
29
|
+
"""Computes the distance from each step to the detector surface.
|
|
30
|
+
|
|
31
|
+
The calculation can be performed for any surface type `nplus`, `pplus`,
|
|
32
|
+
`passive` or `None`. In order to speed up the calculation we provide
|
|
33
|
+
an option to only compute the distance for points within a certain distance
|
|
34
|
+
of any surface (as computed by remage and stored in the "distances_precompute") argument.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
positions_x
|
|
39
|
+
Global x positions for each step.
|
|
40
|
+
positions_y
|
|
41
|
+
Global y positions for each step.
|
|
42
|
+
positions_z
|
|
43
|
+
Global z positions for each step.
|
|
44
|
+
hpge
|
|
45
|
+
HPGe object.
|
|
46
|
+
det_pos
|
|
47
|
+
position of the detector origin, must be a 3 component array corresponding to `(x,y,z)`.
|
|
48
|
+
surface_type
|
|
49
|
+
string of which surface to use, can be `nplus`, `pplus` `passive` or None (in which case the distance to any surface is calculated).
|
|
50
|
+
unit
|
|
51
|
+
unit for the hit tier positions table.
|
|
52
|
+
distances_precompute
|
|
53
|
+
VectorOfVectors of distance to any surface computed by remage.
|
|
54
|
+
precompute_cutoff
|
|
55
|
+
cutoff on distances_precompute to not compute the distance for (in mm)
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
VectorOfVectors with the same shape as `positions_x/y/z` of the distance to the surface.
|
|
60
|
+
|
|
61
|
+
Note
|
|
62
|
+
----
|
|
63
|
+
`positions_x/positions_y/positions_z` must all have the same shape.
|
|
64
|
+
"""
|
|
65
|
+
factor = np.array([1, 100, 1000])[unit == np.array(["mm", "cm", "m"])][0]
|
|
66
|
+
|
|
67
|
+
# compute local positions
|
|
68
|
+
pos = []
|
|
69
|
+
sizes = []
|
|
70
|
+
|
|
71
|
+
for idx, pos_tmp in enumerate([positions_x, positions_y, positions_z]):
|
|
72
|
+
local_pos_tmp = ak.Array(pos_tmp) * factor - det_pos[idx]
|
|
73
|
+
local_pos_flat_tmp = ak.flatten(local_pos_tmp).to_numpy()
|
|
74
|
+
pos.append(local_pos_flat_tmp)
|
|
75
|
+
sizes.append(ak.num(local_pos_tmp, axis=1))
|
|
76
|
+
|
|
77
|
+
if not ak.all(sizes[0] == sizes[1]) or not ak.all(sizes[0] == sizes[2]):
|
|
78
|
+
msg = "all position vector of vector must have the same shape"
|
|
79
|
+
raise ValueError(msg)
|
|
80
|
+
|
|
81
|
+
size = sizes[0]
|
|
82
|
+
# restructure the positions
|
|
83
|
+
local_positions = np.vstack(pos).T
|
|
84
|
+
|
|
85
|
+
# get indices
|
|
86
|
+
surface_indices = (
|
|
87
|
+
np.where(np.array(hpge.surfaces) == surface_type) if surface_type is not None else None
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# distance calc itself
|
|
91
|
+
if distances_precompute is None:
|
|
92
|
+
distances = hpge.distance_to_surface(local_positions, surface_indices=surface_indices)
|
|
93
|
+
else:
|
|
94
|
+
# decide when the calculation needs to be run
|
|
95
|
+
if isinstance(distances_precompute, LGDO):
|
|
96
|
+
distances_precompute = distances_precompute.view_as("ak")
|
|
97
|
+
|
|
98
|
+
distances_precompute_flat = ak.flatten(distances_precompute)
|
|
99
|
+
distances = np.full_like(distances_precompute_flat.to_numpy(), np.nan, dtype=float)
|
|
100
|
+
|
|
101
|
+
# values to compute
|
|
102
|
+
indices = distances_precompute_flat < precompute_cutoff
|
|
103
|
+
|
|
104
|
+
# compute the distances
|
|
105
|
+
distances[indices] = hpge.distance_to_surface(
|
|
106
|
+
local_positions[indices], surface_indices=surface_indices
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
return VectorOfVectors(ak.unflatten(distances, size), dtype=np.float32)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@numba.njit(cache=True)
|
|
113
|
+
def _advance_diffusion(
|
|
114
|
+
charge: np.ndarray,
|
|
115
|
+
factor: float,
|
|
116
|
+
recomb: float = 0,
|
|
117
|
+
recomb_depth: float = 600,
|
|
118
|
+
delta_x: float = 10,
|
|
119
|
+
):
|
|
120
|
+
"""Make a step of diffusion using explicit Euler scheme.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
charge
|
|
125
|
+
charge in each space bin up to the FCCD
|
|
126
|
+
factor
|
|
127
|
+
the factor of diffusion for the Euler scheme
|
|
128
|
+
recomb
|
|
129
|
+
the recomination probability.
|
|
130
|
+
recomb_depth
|
|
131
|
+
the depth of the recombination region.
|
|
132
|
+
delta_x
|
|
133
|
+
the width of each spatial bin.
|
|
134
|
+
|
|
135
|
+
Returns
|
|
136
|
+
-------
|
|
137
|
+
a tuple of the charge distribution at the next time step and the collected charge.
|
|
138
|
+
"""
|
|
139
|
+
charge_xp1 = np.append(charge[1:], [0])
|
|
140
|
+
charge_xm1 = np.append([0], charge[:-1])
|
|
141
|
+
|
|
142
|
+
# collected charge
|
|
143
|
+
collected = factor * charge[-1]
|
|
144
|
+
|
|
145
|
+
# charge at the next step
|
|
146
|
+
charge_new = charge_xp1 * factor + charge_xm1 * factor + charge * (1 - 2 * factor)
|
|
147
|
+
|
|
148
|
+
# correction for recombination
|
|
149
|
+
charge_new[0 : int(recomb_depth / delta_x)] = (1 - recomb) * charge_new[
|
|
150
|
+
0 : int(recomb_depth / delta_x)
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
return charge_new, collected
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@numba.njit(cache=True)
|
|
157
|
+
def _compute_diffusion_impl(
|
|
158
|
+
init_charge: np.ndarray,
|
|
159
|
+
nsteps: int,
|
|
160
|
+
factor: float,
|
|
161
|
+
recomb: float = 0,
|
|
162
|
+
recomb_depth: float = 600,
|
|
163
|
+
delta_x: float = 10,
|
|
164
|
+
):
|
|
165
|
+
"""Compute the charge collected as a function of time.
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
init_charge
|
|
170
|
+
Initial charge distribution.
|
|
171
|
+
nsteps
|
|
172
|
+
Number of time steps to take.
|
|
173
|
+
kwargs
|
|
174
|
+
Keyword arguments to pass to :func:`_advance_diffusion`
|
|
175
|
+
"""
|
|
176
|
+
charge = init_charge
|
|
177
|
+
collected_charge = np.zeros(nsteps)
|
|
178
|
+
|
|
179
|
+
for i in range(nsteps):
|
|
180
|
+
charge, collected = _advance_diffusion(
|
|
181
|
+
charge, factor=factor, recomb=recomb, recomb_depth=recomb_depth, delta_x=delta_x
|
|
182
|
+
)
|
|
183
|
+
collected_charge[i] = collected
|
|
184
|
+
|
|
185
|
+
return collected_charge
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def get_surface_library(fccd: float, dist_step_in_um: float, **kwargs):
|
|
189
|
+
"""Build the surface response library by calling `reboost.hpge.surface.get_surface_response`.
|
|
190
|
+
|
|
191
|
+
Parameters
|
|
192
|
+
----------
|
|
193
|
+
fccd
|
|
194
|
+
The value of the FCCD
|
|
195
|
+
dist_step_in_um
|
|
196
|
+
The distance steps to use in building the library
|
|
197
|
+
**kwargs
|
|
198
|
+
Other keyword arguments to `reboost.hpge.surface.get_surface_response`.
|
|
199
|
+
|
|
200
|
+
Returns
|
|
201
|
+
-------
|
|
202
|
+
2D array of the cumulative-charge arriving at the p-n junction as a function
|
|
203
|
+
of time, for each distance.
|
|
204
|
+
"""
|
|
205
|
+
steps = int(fccd / dist_step_in_um)
|
|
206
|
+
|
|
207
|
+
out = np.zeros((10000, steps))
|
|
208
|
+
|
|
209
|
+
for step in range(steps):
|
|
210
|
+
out[:, step] = get_surface_response(fccd, init=step * dist_step_in_um, **kwargs)
|
|
211
|
+
|
|
212
|
+
return out
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def get_surface_response(
|
|
216
|
+
fccd: float,
|
|
217
|
+
init: float = 0,
|
|
218
|
+
*,
|
|
219
|
+
recomb_depth: float = 500,
|
|
220
|
+
recomb: float = 0.002,
|
|
221
|
+
init_size: float = 0.0,
|
|
222
|
+
factor: float = 0.29,
|
|
223
|
+
nsteps: int = 10000,
|
|
224
|
+
delta_x: float = 10,
|
|
225
|
+
):
|
|
226
|
+
"""Extract the surface response current pulse based on diffusion.
|
|
227
|
+
|
|
228
|
+
This extracts the amount of charge arrived (cumulative) at the p-n
|
|
229
|
+
junction as a function of time, based on diffusion. The final
|
|
230
|
+
induced waveform on the p-n contact is obtained from convolution
|
|
231
|
+
with the bulk pulse.
|
|
232
|
+
|
|
233
|
+
Parameters
|
|
234
|
+
----------
|
|
235
|
+
fccd
|
|
236
|
+
the full charge collection depth (in um)
|
|
237
|
+
recomb_depth
|
|
238
|
+
the depth of the recombination region (in um)
|
|
239
|
+
init
|
|
240
|
+
the initial position of the charge (in um)
|
|
241
|
+
recomb
|
|
242
|
+
the recombination rate
|
|
243
|
+
init_size
|
|
244
|
+
the initial size of the charge cloud (in um)
|
|
245
|
+
factor
|
|
246
|
+
the factor for the explicit Euler scheme (the probability of charge diffusuion)
|
|
247
|
+
nsteps
|
|
248
|
+
the number of time steps.
|
|
249
|
+
delta_x
|
|
250
|
+
the width of each position bin.
|
|
251
|
+
"""
|
|
252
|
+
# number of position steps
|
|
253
|
+
nx = int(fccd / delta_x)
|
|
254
|
+
|
|
255
|
+
# initial charge
|
|
256
|
+
charge = np.zeros(nx)
|
|
257
|
+
|
|
258
|
+
# generate initial conditions
|
|
259
|
+
x = (fccd / nx) * np.arange(nx)
|
|
260
|
+
x_full = (fccd / nx) * np.arange(2 * nx)
|
|
261
|
+
|
|
262
|
+
# generate initial conditions
|
|
263
|
+
if init_size != 0:
|
|
264
|
+
charge = stats.norm.pdf(x, loc=init, scale=init_size)
|
|
265
|
+
charge_full = stats.norm.pdf(x_full, loc=init, scale=init_size)
|
|
266
|
+
charge_col = [(np.sum(charge_full) - np.sum(charge)) / np.sum(charge_full)]
|
|
267
|
+
charge = charge / np.sum(charge_full)
|
|
268
|
+
elif int(init * nx / fccd) < len(charge):
|
|
269
|
+
charge[int(init * nx / fccd)] = 1
|
|
270
|
+
charge_col = np.array([])
|
|
271
|
+
else:
|
|
272
|
+
charge_col = np.array([1])
|
|
273
|
+
|
|
274
|
+
# run the simulation
|
|
275
|
+
charge_collected = _compute_diffusion_impl(
|
|
276
|
+
charge,
|
|
277
|
+
nsteps=nsteps,
|
|
278
|
+
factor=factor,
|
|
279
|
+
recomb=recomb,
|
|
280
|
+
recomb_depth=recomb_depth,
|
|
281
|
+
delta_x=delta_x,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
return np.cumsum(np.concatenate((charge_col, charge_collected)))
|
reboost/hpge/utils.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from typing import NamedTuple
|
|
5
|
+
|
|
6
|
+
import lgdo
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pint
|
|
9
|
+
from dbetto import AttrsDict
|
|
10
|
+
from lgdo import lh5
|
|
11
|
+
from scipy.interpolate import RegularGridInterpolator
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class HPGeScalarRZField(NamedTuple):
|
|
15
|
+
"""A scalar field defined in the cylindrical-like (r, z) HPGe plane."""
|
|
16
|
+
|
|
17
|
+
φ: Callable
|
|
18
|
+
"Scalar field, function of the coordinates (r, z)."
|
|
19
|
+
r_units: pint.Unit
|
|
20
|
+
"Physical units of the coordinate `r`."
|
|
21
|
+
z_units: pint.Unit
|
|
22
|
+
"Physical units of the coordinate `z`."
|
|
23
|
+
φ_units: pint.Unit
|
|
24
|
+
"Physical units of the field."
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_hpge_scalar_rz_field(
|
|
28
|
+
filename: str, obj: str, field: str, out_of_bounds_val: int | float = np.nan, **kwargs
|
|
29
|
+
) -> HPGeScalarRZField:
|
|
30
|
+
"""Create an interpolator for a gridded scalar HPGe field defined on `(r, z)`.
|
|
31
|
+
|
|
32
|
+
Reads from disk the following data structure: ::
|
|
33
|
+
|
|
34
|
+
FILENAME/
|
|
35
|
+
└── OBJ · struct{r,z,FIELD}
|
|
36
|
+
├── r · array<1>{real} ── {'units': 'UNITS'}
|
|
37
|
+
├── z · array<1>{real} ── {'units': 'UNITS'}
|
|
38
|
+
└── FIELD · array<2>{real} ── {'units': 'UNITS'}
|
|
39
|
+
|
|
40
|
+
where ``FILENAME``, ``OBJ`` and ``FIELD`` are provided as
|
|
41
|
+
arguments to this function. `obj` is a :class:`~lgdo.types.struct.Struct`,
|
|
42
|
+
`r` and `z` are one dimensional arrays specifying the radial and z
|
|
43
|
+
coordinates of the rectangular grid — not the coordinates of each single
|
|
44
|
+
grid point. In this coordinate system, the center of the p+ contact surface
|
|
45
|
+
is at `(0, 0)`, with the p+ contact facing downwards. `field` is instead a
|
|
46
|
+
two-dimensional array specifying the field value at each grid point. The
|
|
47
|
+
first and second dimensions are `r` and `z`, respectively. NaN values are
|
|
48
|
+
interpreted as points outside the detector profile in the `(r, z)` plane.
|
|
49
|
+
|
|
50
|
+
Before returning a :class:`HPGeScalarRZField`, the gridded field is fed to
|
|
51
|
+
:class:`scipy.interpolate.RegularGridInterpolator`.
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
filename
|
|
56
|
+
name of the LH5 file containing the gridded scalar field.
|
|
57
|
+
obj
|
|
58
|
+
name of the HDF5 dataset where the data is saved.
|
|
59
|
+
field
|
|
60
|
+
name of the HDF5 dataset holding the field values.
|
|
61
|
+
out_of_bounds_val
|
|
62
|
+
value to use to replace NaNs in the field values.
|
|
63
|
+
"""
|
|
64
|
+
data = lh5.read(obj, filename)
|
|
65
|
+
|
|
66
|
+
if not isinstance(data, lgdo.Struct):
|
|
67
|
+
msg = f"{obj} in {filename} is not an LGDO Struct"
|
|
68
|
+
raise ValueError(msg)
|
|
69
|
+
|
|
70
|
+
data = AttrsDict(
|
|
71
|
+
{
|
|
72
|
+
k: np.nan_to_num(data[k].view_as("np", with_units=True), nan=out_of_bounds_val)
|
|
73
|
+
for k in ("r", "z", field)
|
|
74
|
+
}
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
interpolator = RegularGridInterpolator((data.r.m, data.z.m), data[field].m, **kwargs)
|
|
78
|
+
|
|
79
|
+
return HPGeScalarRZField(interpolator, data.r.u, data.z.u, data[field].u)
|
reboost/iterator.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
import typing
|
|
6
|
+
|
|
7
|
+
import awkward as ak
|
|
8
|
+
from lgdo.lh5 import LH5Store
|
|
9
|
+
from lgdo.types import LGDO, Table
|
|
10
|
+
|
|
11
|
+
from . import build_glm
|
|
12
|
+
|
|
13
|
+
log = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class GLMIterator:
|
|
17
|
+
"""A class to iterate over the rows of an event lookup map."""
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
glm_file: str | None,
|
|
22
|
+
stp_file: str,
|
|
23
|
+
lh5_group: str,
|
|
24
|
+
start_row: int,
|
|
25
|
+
n_rows: int | None,
|
|
26
|
+
*,
|
|
27
|
+
stp_field: str = "stp",
|
|
28
|
+
buffer: int = 10000,
|
|
29
|
+
time_dict: dict | None = None,
|
|
30
|
+
reshaped_files: bool = False,
|
|
31
|
+
):
|
|
32
|
+
"""Constructor for the GLMIterator.
|
|
33
|
+
|
|
34
|
+
The GLM iterator provides a way to iterate over the
|
|
35
|
+
simulated geant4 evtids, extracting the number of hits or steps for
|
|
36
|
+
each range in evtids. This ensures a single simulated event
|
|
37
|
+
is not split between two iterations and allows to specify a
|
|
38
|
+
start and an end evtid to extract.
|
|
39
|
+
|
|
40
|
+
In case the data is already reshaped and we do not need to
|
|
41
|
+
read a specific range of evtids this iterator is just loops
|
|
42
|
+
over the input stp field. Otherwise if the GLM file is not provided
|
|
43
|
+
this is created in memory.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
glm_file
|
|
48
|
+
the file containing the event lookup map, if `None` the glm will
|
|
49
|
+
be created in memory if needed.
|
|
50
|
+
stp_file
|
|
51
|
+
the file containing the steps to read.
|
|
52
|
+
lh5_group
|
|
53
|
+
the name of the lh5 group to read.
|
|
54
|
+
start_row
|
|
55
|
+
the first row to read.
|
|
56
|
+
n_rows
|
|
57
|
+
the number of rows to read, if `None` read them all.
|
|
58
|
+
stp_field
|
|
59
|
+
name of the group.
|
|
60
|
+
buffer
|
|
61
|
+
the number of rows to read at once.
|
|
62
|
+
time_dict
|
|
63
|
+
time profiling data structure.
|
|
64
|
+
reshaped_files
|
|
65
|
+
flag for whether the files are reshaped.
|
|
66
|
+
"""
|
|
67
|
+
# initialise
|
|
68
|
+
self.glm_file = glm_file
|
|
69
|
+
self.stp_file = stp_file
|
|
70
|
+
self.lh5_group = lh5_group
|
|
71
|
+
self.start_row = start_row
|
|
72
|
+
self.start_row_tmp = start_row
|
|
73
|
+
self.n_rows = n_rows
|
|
74
|
+
self.buffer = buffer
|
|
75
|
+
self.current_i_entry = 0
|
|
76
|
+
self.stp_field = stp_field
|
|
77
|
+
self.reshaped_files = reshaped_files
|
|
78
|
+
|
|
79
|
+
# would be good to replace with an iterator
|
|
80
|
+
self.sto = LH5Store()
|
|
81
|
+
self.n_rows_read = 0
|
|
82
|
+
self.time_dict = time_dict
|
|
83
|
+
self.glm = None
|
|
84
|
+
self.use_glm = True
|
|
85
|
+
|
|
86
|
+
glm_n_rows = 0
|
|
87
|
+
|
|
88
|
+
# build the glm in memory if needed
|
|
89
|
+
if self.glm_file is None and (
|
|
90
|
+
(self.n_rows is not None) or (self.start_row != 0) or not reshaped_files
|
|
91
|
+
):
|
|
92
|
+
if self.time_dict is not None:
|
|
93
|
+
time_start = time.time()
|
|
94
|
+
|
|
95
|
+
self.glm = build_glm.build_glm(
|
|
96
|
+
stp_file, None, out_table_name="glm", id_name="evtid", lh5_groups=[lh5_group]
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if self.time_dict is not None:
|
|
100
|
+
self.time_dict.update_field("read/glm", time_start)
|
|
101
|
+
|
|
102
|
+
glm_n_rows = len(self.glm)
|
|
103
|
+
|
|
104
|
+
elif self.glm_file is None:
|
|
105
|
+
self.use_glm = False
|
|
106
|
+
else:
|
|
107
|
+
glm_n_rows = self.sto.read_n_rows(f"glm/{self.lh5_group}", self.glm_file)
|
|
108
|
+
|
|
109
|
+
# get the number of stp rows
|
|
110
|
+
try:
|
|
111
|
+
stp_n_rows = self.sto.read_n_rows(f"{self.stp_field}/{self.lh5_group}", self.stp_file)
|
|
112
|
+
except Exception:
|
|
113
|
+
stp_n_rows = 0
|
|
114
|
+
|
|
115
|
+
# heuristics for a good buffer length
|
|
116
|
+
if self.use_glm:
|
|
117
|
+
self.buffer = int(buffer * glm_n_rows / (1 + stp_n_rows))
|
|
118
|
+
msg = f"Number of stp rows {stp_n_rows}, number of glm rows {glm_n_rows} changing buffer from {buffer} to {self.buffer}"
|
|
119
|
+
log.debug(msg)
|
|
120
|
+
|
|
121
|
+
def __iter__(self) -> typing.Iterator:
|
|
122
|
+
self.current_i_entry = 0
|
|
123
|
+
self.n_rows_read = 0
|
|
124
|
+
self.start_row_tmp = self.start_row
|
|
125
|
+
return self
|
|
126
|
+
|
|
127
|
+
def get_n_rows(self):
|
|
128
|
+
"""Get the number of rows to read."""
|
|
129
|
+
# get the number of rows to read
|
|
130
|
+
if self.time_dict is not None:
|
|
131
|
+
time_start = time.time()
|
|
132
|
+
|
|
133
|
+
if self.n_rows is not None:
|
|
134
|
+
rows_left = self.n_rows - self.n_rows_read
|
|
135
|
+
n_rows = self.buffer if (self.buffer > rows_left) else rows_left
|
|
136
|
+
else:
|
|
137
|
+
n_rows = self.buffer
|
|
138
|
+
|
|
139
|
+
glm_rows = None
|
|
140
|
+
start = 0
|
|
141
|
+
n = 0
|
|
142
|
+
|
|
143
|
+
if self.use_glm:
|
|
144
|
+
if self.glm_file is not None:
|
|
145
|
+
glm_rows = self.sto.read(
|
|
146
|
+
f"glm/{self.lh5_group}",
|
|
147
|
+
self.glm_file,
|
|
148
|
+
start_row=self.start_row_tmp,
|
|
149
|
+
n_rows=n_rows,
|
|
150
|
+
)
|
|
151
|
+
n_rows_read = len(glm_rows.view_as("ak"))
|
|
152
|
+
|
|
153
|
+
else:
|
|
154
|
+
# get the maximum row to read
|
|
155
|
+
max_row = self.start_row_tmp + n_rows
|
|
156
|
+
max_row = min(len(self.glm[self.lh5_group]), max_row)
|
|
157
|
+
|
|
158
|
+
if max_row != self.start_row_tmp:
|
|
159
|
+
glm_rows = Table(self.glm[self.lh5_group][self.start_row_tmp : max_row])
|
|
160
|
+
|
|
161
|
+
n_rows_read = max_row - self.start_row_tmp
|
|
162
|
+
|
|
163
|
+
if self.time_dict is not None:
|
|
164
|
+
self.time_dict.update_field("read/glm", time_start)
|
|
165
|
+
|
|
166
|
+
self.n_rows_read += n_rows_read
|
|
167
|
+
self.start_row_tmp += n_rows_read
|
|
168
|
+
|
|
169
|
+
# view our glm as an awkward array
|
|
170
|
+
if glm_rows is not None:
|
|
171
|
+
glm_ak = glm_rows.view_as("ak")
|
|
172
|
+
|
|
173
|
+
# remove empty rows
|
|
174
|
+
glm_ak = glm_ak[glm_ak.n_rows > 0]
|
|
175
|
+
|
|
176
|
+
if len(glm_ak) > 0:
|
|
177
|
+
# extract range of stp rows to read
|
|
178
|
+
start = glm_ak.start_row[0]
|
|
179
|
+
n = ak.sum(glm_ak.n_rows)
|
|
180
|
+
|
|
181
|
+
else:
|
|
182
|
+
start = self.start_row_tmp
|
|
183
|
+
n = n_rows
|
|
184
|
+
n_rows_read = n
|
|
185
|
+
self.start_row_tmp += n
|
|
186
|
+
|
|
187
|
+
return start, n, n_rows_read
|
|
188
|
+
|
|
189
|
+
def __next__(self) -> tuple[LGDO, int, int]:
|
|
190
|
+
"""Read one chunk.
|
|
191
|
+
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
a tuple of:
|
|
195
|
+
- the steps
|
|
196
|
+
- the chunk index
|
|
197
|
+
- the number of steps read
|
|
198
|
+
"""
|
|
199
|
+
# read the glm rows]
|
|
200
|
+
start, n, n_rows_read = self.get_n_rows()
|
|
201
|
+
|
|
202
|
+
if self.time_dict is not None:
|
|
203
|
+
time_start = time.time()
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
stp_rows = self.sto.read(
|
|
207
|
+
f"{self.stp_field}/{self.lh5_group}",
|
|
208
|
+
self.stp_file,
|
|
209
|
+
start_row=int(start),
|
|
210
|
+
n_rows=int(n),
|
|
211
|
+
)
|
|
212
|
+
n_steps = len(stp_rows.view_as("ak"))
|
|
213
|
+
|
|
214
|
+
except OverflowError:
|
|
215
|
+
raise StopIteration from None
|
|
216
|
+
|
|
217
|
+
if n_rows_read == 0 or n_steps == 0:
|
|
218
|
+
raise StopIteration
|
|
219
|
+
|
|
220
|
+
# save time
|
|
221
|
+
if self.time_dict is not None:
|
|
222
|
+
self.time_dict.update_field("read/stp", time_start)
|
|
223
|
+
|
|
224
|
+
self.current_i_entry += 1
|
|
225
|
+
|
|
226
|
+
return (stp_rows, self.current_i_entry - 1, n_steps)
|
reboost/log_utils.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
import colorlog
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def setup_log(level: int | None = None, multiproc: bool = False) -> None:
|
|
9
|
+
"""Setup a colored logger for this package.
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
level
|
|
14
|
+
initial log level, or ``None`` to use the default.
|
|
15
|
+
multiproc
|
|
16
|
+
set to ``True`` to include process ID in log output (i.e. for multiprocessing setups)
|
|
17
|
+
"""
|
|
18
|
+
fmt = "%(log_color)s%(name)s [%(levelname)s]"
|
|
19
|
+
if multiproc:
|
|
20
|
+
fmt += " (pid=%(process)s)"
|
|
21
|
+
fmt += " %(message)s"
|
|
22
|
+
|
|
23
|
+
handler = colorlog.StreamHandler()
|
|
24
|
+
handler.setFormatter(colorlog.ColoredFormatter(fmt))
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger("reboost")
|
|
27
|
+
logger.addHandler(handler)
|
|
28
|
+
if level is not None:
|
|
29
|
+
logger.setLevel(level)
|
reboost/math/__init__.py
ADDED
|
File without changes
|