reboost 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reboost/__init__.py +2 -2
- reboost/_version.py +2 -2
- reboost/build_glm.py +8 -2
- reboost/build_hit.py +64 -55
- reboost/build_tcm.py +1 -1
- reboost/cli.py +10 -8
- reboost/core.py +86 -16
- reboost/hpge/psd.py +257 -0
- reboost/hpge/surface.py +145 -1
- reboost/iterator.py +119 -58
- reboost/optmap/cli.py +7 -7
- reboost/shape/group.py +1 -1
- reboost/utils.py +51 -1
- {reboost-0.3.0.dist-info → reboost-0.3.1.dist-info}/METADATA +1 -1
- {reboost-0.3.0.dist-info → reboost-0.3.1.dist-info}/RECORD +19 -19
- {reboost-0.3.0.dist-info → reboost-0.3.1.dist-info}/WHEEL +0 -0
- {reboost-0.3.0.dist-info → reboost-0.3.1.dist-info}/entry_points.txt +0 -0
- {reboost-0.3.0.dist-info → reboost-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {reboost-0.3.0.dist-info → reboost-0.3.1.dist-info}/top_level.txt +0 -0
reboost/hpge/psd.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
+
from math import erf, exp
|
|
4
5
|
|
|
5
6
|
import awkward as ak
|
|
6
7
|
import numba
|
|
@@ -236,3 +237,259 @@ def _drift_time_heuristic_impl(
|
|
|
236
237
|
dt_heu[i] = max_id_metric
|
|
237
238
|
|
|
238
239
|
return dt_heu
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
@numba.njit(cache=True)
|
|
243
|
+
def _vectorized_erf(x: ArrayLike) -> NDArray:
|
|
244
|
+
"""Error function that can take in a numpy array."""
|
|
245
|
+
out = np.empty_like(x)
|
|
246
|
+
for i in range(x.size):
|
|
247
|
+
out[i] = erf(x[i])
|
|
248
|
+
return out
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
@numba.njit(cache=True)
|
|
252
|
+
def _current_pulse_model(
|
|
253
|
+
times: ArrayLike, Amax: float, mu: float, sigma: float, tail_fraction: float, tau: float
|
|
254
|
+
) -> NDArray:
|
|
255
|
+
r"""Analytic model for the current pulse in a Germanium detector.
|
|
256
|
+
|
|
257
|
+
Consists of a Gaussian and an exponential tail:
|
|
258
|
+
|
|
259
|
+
.. math::
|
|
260
|
+
|
|
261
|
+
A(t) = A_{max}\times (1-p)\times \text{Gauss}(t,\mu,\sigma)+ A \times p (1-\text{Erf}((t-\mu)/sigma))\times
|
|
262
|
+
\frac{e^{(t/\tau)}}{2e^{\mu/\tau}}
|
|
263
|
+
|
|
264
|
+
Parameters
|
|
265
|
+
----------
|
|
266
|
+
times
|
|
267
|
+
Array of times to compute current for
|
|
268
|
+
Amax
|
|
269
|
+
Maximum current
|
|
270
|
+
mu
|
|
271
|
+
Time of the maximum current.
|
|
272
|
+
sigma
|
|
273
|
+
Width of the current pulse
|
|
274
|
+
tail_fraction
|
|
275
|
+
Fraction of the tail in the pulse.
|
|
276
|
+
tau
|
|
277
|
+
Time constant of the low time tail.
|
|
278
|
+
|
|
279
|
+
Returns
|
|
280
|
+
-------
|
|
281
|
+
The predicted current waveform for this energy deposit.
|
|
282
|
+
"""
|
|
283
|
+
norm = 2 * exp(mu / tau)
|
|
284
|
+
|
|
285
|
+
dx = times - mu
|
|
286
|
+
term1 = Amax * (1 - tail_fraction) * np.exp(-(dx * dx) / (2 * sigma * sigma))
|
|
287
|
+
term2 = Amax * tail_fraction * (1 - _vectorized_erf(dx / sigma)) * np.exp(times / tau) / norm
|
|
288
|
+
|
|
289
|
+
return term1 + term2
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def convolve_surface_response(surf_current: np.ndarray, bulk_pulse: np.ndarray) -> NDArray:
|
|
293
|
+
"""Convolve the surface response pulse with the bulk current pulse.
|
|
294
|
+
|
|
295
|
+
This combines the current induced on the edge of the FCCD region with the bulk response
|
|
296
|
+
on the p+ contact.
|
|
297
|
+
|
|
298
|
+
Parameters
|
|
299
|
+
----------
|
|
300
|
+
surf_current
|
|
301
|
+
array of the current induced via diffusion against time.
|
|
302
|
+
bulk_pulse
|
|
303
|
+
the pulse template to convolve the surface current with.
|
|
304
|
+
|
|
305
|
+
Returns
|
|
306
|
+
-------
|
|
307
|
+
the current waveform after convolution.
|
|
308
|
+
"""
|
|
309
|
+
return np.convolve(surf_current, bulk_pulse, mode="full")[: len(surf_current)]
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
@numba.njit(cache=True)
|
|
313
|
+
def get_current_waveform(
|
|
314
|
+
edep: ak.Array,
|
|
315
|
+
drift_time: ak.Array,
|
|
316
|
+
template: ArrayLike,
|
|
317
|
+
start: float,
|
|
318
|
+
dt: float,
|
|
319
|
+
range_t: tuple,
|
|
320
|
+
) -> tuple(NDArray, NDArray):
|
|
321
|
+
r"""Estimate the current waveform.
|
|
322
|
+
|
|
323
|
+
Based on modelling the current as a sum over the current pulse model defined by
|
|
324
|
+
the template.
|
|
325
|
+
|
|
326
|
+
.. math::
|
|
327
|
+
A(t) = \sum_i E_i \times N f(t,dt_i,\vector{\theta})
|
|
328
|
+
|
|
329
|
+
Where:
|
|
330
|
+
- $f(t)$ is the template
|
|
331
|
+
- $\vector{\theta}$ are the parameters (sigma, p, tau)
|
|
332
|
+
- $E_i$ and $dt_i$ are the deposited energy and drift time.
|
|
333
|
+
- N is a normalisation term
|
|
334
|
+
|
|
335
|
+
Parameters
|
|
336
|
+
----------
|
|
337
|
+
edep
|
|
338
|
+
Array of energies for each step
|
|
339
|
+
drift_time
|
|
340
|
+
Array of drift times for each step
|
|
341
|
+
template
|
|
342
|
+
array of the template for the current waveforms, with 1 ns binning.
|
|
343
|
+
start
|
|
344
|
+
first time value of the template
|
|
345
|
+
dt
|
|
346
|
+
timestep (in ns) for the template.
|
|
347
|
+
range_t
|
|
348
|
+
a range of times to search around
|
|
349
|
+
|
|
350
|
+
Returns
|
|
351
|
+
-------
|
|
352
|
+
A tuple of the time and current for the current waveform for this event.
|
|
353
|
+
"""
|
|
354
|
+
n = len(template)
|
|
355
|
+
|
|
356
|
+
times = np.arange(n) * dt + start
|
|
357
|
+
y = np.zeros_like(times)
|
|
358
|
+
|
|
359
|
+
for i in range(len(edep)):
|
|
360
|
+
E = edep[i]
|
|
361
|
+
mu = drift_time[i]
|
|
362
|
+
shift = int(mu / dt)
|
|
363
|
+
|
|
364
|
+
# Add scaled template starting at index `shift`
|
|
365
|
+
for j in range(n):
|
|
366
|
+
if (
|
|
367
|
+
(shift + j) >= n
|
|
368
|
+
or (times[shift + j] < range_t[0])
|
|
369
|
+
or (times[shift + j] > range_t[1])
|
|
370
|
+
):
|
|
371
|
+
continue
|
|
372
|
+
y[shift + j] += E * template[j]
|
|
373
|
+
|
|
374
|
+
return times, y
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
@numba.njit(cache=True)
|
|
378
|
+
def _estimate_current_impl(
|
|
379
|
+
edep: ak.Array,
|
|
380
|
+
dt: ak.Array,
|
|
381
|
+
sigma: float,
|
|
382
|
+
tail_fraction: float,
|
|
383
|
+
tau: float,
|
|
384
|
+
mean_AoE: float = 0,
|
|
385
|
+
) -> tuple[NDArray, NDArray]:
|
|
386
|
+
"""Estimate the maximum current that would be measured in the HPGe detector.
|
|
387
|
+
|
|
388
|
+
This is based on extracting a waveform with :func:`get_current_waveform` and finding the maxima of it.
|
|
389
|
+
|
|
390
|
+
Parameters
|
|
391
|
+
----------
|
|
392
|
+
edep
|
|
393
|
+
Array of energies for each step.
|
|
394
|
+
drift_time
|
|
395
|
+
Array of drift times for each step.
|
|
396
|
+
sigma
|
|
397
|
+
Sigma parameter of the current pulse model.
|
|
398
|
+
tail_fraction
|
|
399
|
+
Tail-fraction parameter of the current pulse.
|
|
400
|
+
tau
|
|
401
|
+
Tail parameter of the current pulse
|
|
402
|
+
mean_AoE
|
|
403
|
+
The mean AoE value for this detector (to normalise current pulses).
|
|
404
|
+
"""
|
|
405
|
+
A = np.zeros(len(dt))
|
|
406
|
+
maximum_t = np.zeros(len(dt))
|
|
407
|
+
|
|
408
|
+
# get normalisation factor
|
|
409
|
+
x_coarse = np.linspace(-1000, 3000, 201)
|
|
410
|
+
x_fine = np.linspace(-1000, 3000, 4001)
|
|
411
|
+
|
|
412
|
+
# make a template with 1 ns binning so
|
|
413
|
+
# template[(i-start)/dt] = _current_pulse_model(x,1,i,...)
|
|
414
|
+
|
|
415
|
+
template_coarse = _current_pulse_model(x_coarse, 1, 0, sigma, tail_fraction, tau)
|
|
416
|
+
template_coarse /= np.max(template_coarse)
|
|
417
|
+
template_coarse *= mean_AoE
|
|
418
|
+
|
|
419
|
+
template_fine = _current_pulse_model(x_fine, 1, 0, sigma, tail_fraction, tau)
|
|
420
|
+
template_fine /= np.max(template_fine)
|
|
421
|
+
template_fine *= mean_AoE
|
|
422
|
+
|
|
423
|
+
for i in range(len(dt)):
|
|
424
|
+
t = np.asarray(dt[i])
|
|
425
|
+
e = np.asarray(edep[i])
|
|
426
|
+
|
|
427
|
+
# first pass
|
|
428
|
+
times_coarse, W = get_current_waveform(
|
|
429
|
+
e, t, template=template_coarse, start=-1000, dt=20, range_t=(-1000, 3000)
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
max_t = times_coarse[np.argmax(W)]
|
|
433
|
+
|
|
434
|
+
# fine scan
|
|
435
|
+
times, W = get_current_waveform(
|
|
436
|
+
e, t, template=template_fine, start=-1000, dt=1, range_t=(max_t - 50, max_t + 50)
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
A[i] = np.max(W)
|
|
440
|
+
maximum_t[i] = times[np.argmax(W)]
|
|
441
|
+
|
|
442
|
+
return A, maximum_t
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def maximum_current(
|
|
446
|
+
edep: ArrayLike,
|
|
447
|
+
drift_time: ArrayLike,
|
|
448
|
+
*,
|
|
449
|
+
sigma: float,
|
|
450
|
+
tail_fraction: float,
|
|
451
|
+
tau: float,
|
|
452
|
+
mean_AoE: float = 0,
|
|
453
|
+
get_timepoint: bool = False,
|
|
454
|
+
) -> Array:
|
|
455
|
+
"""Estimate the maximum current in the HPGe detector based on :func:`_estimate_current_impl`.
|
|
456
|
+
|
|
457
|
+
Parameters
|
|
458
|
+
----------
|
|
459
|
+
edep
|
|
460
|
+
Array of energies for each step.
|
|
461
|
+
drift_time
|
|
462
|
+
Array of drift times for each step.
|
|
463
|
+
sigma
|
|
464
|
+
Sigma parameter of the current pulse model.
|
|
465
|
+
tail_fraction
|
|
466
|
+
Tail-fraction parameter of the current pulse.
|
|
467
|
+
tau
|
|
468
|
+
Tail parameter of the current pulse
|
|
469
|
+
mean_AoE
|
|
470
|
+
The mean AoE value for this detector (to normalise current pulses).
|
|
471
|
+
get_timepoint
|
|
472
|
+
Flag to return the time of the maximum current (relative to t0) instead of the current.
|
|
473
|
+
|
|
474
|
+
Returns
|
|
475
|
+
-------
|
|
476
|
+
An Array of the maximum current for each hit.
|
|
477
|
+
"""
|
|
478
|
+
# extract LGDO data and units
|
|
479
|
+
drift_time, _ = units.unwrap_lgdo(drift_time)
|
|
480
|
+
|
|
481
|
+
edep, _ = units.unwrap_lgdo(edep)
|
|
482
|
+
|
|
483
|
+
curr, time = _estimate_current_impl(
|
|
484
|
+
ak.Array(edep),
|
|
485
|
+
ak.Array(drift_time),
|
|
486
|
+
sigma=sigma,
|
|
487
|
+
tail_fraction=tail_fraction,
|
|
488
|
+
tau=tau,
|
|
489
|
+
mean_AoE=mean_AoE,
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
# return
|
|
493
|
+
if get_timepoint:
|
|
494
|
+
return Array(time)
|
|
495
|
+
return Array(curr)
|
reboost/hpge/surface.py
CHANGED
|
@@ -4,10 +4,12 @@ import logging
|
|
|
4
4
|
|
|
5
5
|
import awkward as ak
|
|
6
6
|
import legendhpges
|
|
7
|
+
import numba
|
|
7
8
|
import numpy as np
|
|
8
9
|
from lgdo import VectorOfVectors
|
|
9
10
|
from lgdo.types import LGDO
|
|
10
11
|
from numpy.typing import ArrayLike
|
|
12
|
+
from scipy import stats
|
|
11
13
|
|
|
12
14
|
log = logging.getLogger(__name__)
|
|
13
15
|
|
|
@@ -104,4 +106,146 @@ def distance_to_surface(
|
|
|
104
106
|
local_positions[indices], surface_indices=surface_indices
|
|
105
107
|
)
|
|
106
108
|
|
|
107
|
-
return VectorOfVectors(ak.unflatten(distances, size))
|
|
109
|
+
return VectorOfVectors(ak.unflatten(distances, size), dtype=np.float32)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@numba.njit(cache=True)
|
|
113
|
+
def _advance_diffusion(
|
|
114
|
+
charge: np.ndarray,
|
|
115
|
+
factor: float,
|
|
116
|
+
recomb: float = 0,
|
|
117
|
+
recomb_depth: float = 600,
|
|
118
|
+
delta_x: float = 10,
|
|
119
|
+
):
|
|
120
|
+
"""Make a step of diffusion using explicit Euler scheme.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
charge
|
|
125
|
+
charge in each space bin up to the FCCD
|
|
126
|
+
factor
|
|
127
|
+
the factor of diffusion for the Euler scheme
|
|
128
|
+
recomb
|
|
129
|
+
the recomination probability.
|
|
130
|
+
recomb_depth
|
|
131
|
+
the depth of the recombination region.
|
|
132
|
+
delta_x
|
|
133
|
+
the width of each spatial bin.
|
|
134
|
+
|
|
135
|
+
Returns
|
|
136
|
+
-------
|
|
137
|
+
a tuple of the charge distribution at the next time step and the collected charge.
|
|
138
|
+
"""
|
|
139
|
+
charge_xp1 = np.append(charge[1:], [0])
|
|
140
|
+
charge_xm1 = np.append([0], charge[:-1])
|
|
141
|
+
|
|
142
|
+
# collected charge
|
|
143
|
+
collected = factor * charge[-1]
|
|
144
|
+
|
|
145
|
+
# charge at the next step
|
|
146
|
+
charge_new = charge_xp1 * factor + charge_xm1 * factor + charge * (1 - 2 * factor)
|
|
147
|
+
|
|
148
|
+
# correction for recombination
|
|
149
|
+
charge_new[0 : int(recomb_depth / delta_x)] = (1 - recomb) * charge_new[
|
|
150
|
+
0 : int(recomb_depth / delta_x)
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
return charge_new, collected
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@numba.njit(cache=True)
|
|
157
|
+
def _compute_diffusion_impl(
|
|
158
|
+
init_charge: np.ndarray,
|
|
159
|
+
nsteps: int,
|
|
160
|
+
factor: float,
|
|
161
|
+
recomb: float = 0,
|
|
162
|
+
recomb_depth: float = 600,
|
|
163
|
+
delta_x: float = 10,
|
|
164
|
+
):
|
|
165
|
+
"""Compute the charge collected as a function of time.
|
|
166
|
+
|
|
167
|
+
Parameters
|
|
168
|
+
----------
|
|
169
|
+
init_charge
|
|
170
|
+
Initial charge distribution.
|
|
171
|
+
nsteps
|
|
172
|
+
Number of time steps to take.
|
|
173
|
+
kwargs
|
|
174
|
+
Keyword arguments to pass to :func:`_advance_diffusion`
|
|
175
|
+
"""
|
|
176
|
+
charge = init_charge
|
|
177
|
+
collected_charge = np.zeros(nsteps)
|
|
178
|
+
|
|
179
|
+
for i in range(nsteps):
|
|
180
|
+
charge, collected = _advance_diffusion(
|
|
181
|
+
charge, factor=factor, recomb=recomb, recomb_depth=recomb_depth, delta_x=delta_x
|
|
182
|
+
)
|
|
183
|
+
collected_charge[i] = collected
|
|
184
|
+
|
|
185
|
+
return collected_charge
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def get_surface_response(
|
|
189
|
+
fccd: float,
|
|
190
|
+
recomb_depth: float,
|
|
191
|
+
init: float = 0,
|
|
192
|
+
recomb: float = 0.002,
|
|
193
|
+
init_size: float = 0.0,
|
|
194
|
+
factor: float = 0.29,
|
|
195
|
+
nsteps: int = 10000,
|
|
196
|
+
delta_x: float = 10,
|
|
197
|
+
):
|
|
198
|
+
"""Extract the surface response current pulse based on diffusion.
|
|
199
|
+
|
|
200
|
+
Parameters
|
|
201
|
+
----------
|
|
202
|
+
fccd
|
|
203
|
+
the full charge collection depth (in um)
|
|
204
|
+
recomb_depth
|
|
205
|
+
the depth of the recombination region (in um)
|
|
206
|
+
init
|
|
207
|
+
the initial position of the charge (in um)
|
|
208
|
+
recomb
|
|
209
|
+
the recombination rate
|
|
210
|
+
init_size
|
|
211
|
+
the initial size of the charge cloud (in um)
|
|
212
|
+
factor
|
|
213
|
+
the factor for the explicit Euler scheme (the probability of charge diffusuion)
|
|
214
|
+
nsteps
|
|
215
|
+
the number of time steps.
|
|
216
|
+
delta_x
|
|
217
|
+
the width of each position bin.
|
|
218
|
+
"""
|
|
219
|
+
# number of position steps
|
|
220
|
+
nx = int(fccd / delta_x)
|
|
221
|
+
|
|
222
|
+
# initial charge
|
|
223
|
+
charge = np.zeros(nx)
|
|
224
|
+
|
|
225
|
+
# generate initial conditions
|
|
226
|
+
x = (fccd / nx) * np.arange(nx)
|
|
227
|
+
x_full = (fccd / nx) * np.arange(2 * nx)
|
|
228
|
+
|
|
229
|
+
# generate initial conditions
|
|
230
|
+
if init_size != 0:
|
|
231
|
+
charge = stats.norm.pdf(x, loc=init, scale=init_size)
|
|
232
|
+
charge_full = stats.norm.pdf(x_full, loc=init, scale=init_size)
|
|
233
|
+
charge_col = [(np.sum(charge_full) - np.sum(charge)) / np.sum(charge_full)]
|
|
234
|
+
charge = charge / np.sum(charge_full)
|
|
235
|
+
elif int(init * nx / fccd) < len(charge):
|
|
236
|
+
charge[int(init * nx / fccd)] = 1
|
|
237
|
+
charge_col = np.array([])
|
|
238
|
+
else:
|
|
239
|
+
charge_col = np.array([1])
|
|
240
|
+
|
|
241
|
+
# run the simulation
|
|
242
|
+
charge_collected = _compute_diffusion_impl(
|
|
243
|
+
charge,
|
|
244
|
+
nsteps=nsteps,
|
|
245
|
+
factor=factor,
|
|
246
|
+
recomb=recomb,
|
|
247
|
+
recomb_depth=recomb_depth,
|
|
248
|
+
delta_x=delta_x,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
return np.cumsum(np.concatenate((charge_col, charge_collected)))
|
reboost/iterator.py
CHANGED
|
@@ -8,7 +8,7 @@ import awkward as ak
|
|
|
8
8
|
from lgdo.lh5 import LH5Store
|
|
9
9
|
from lgdo.types import LGDO, Table
|
|
10
10
|
|
|
11
|
-
from
|
|
11
|
+
from . import build_glm
|
|
12
12
|
|
|
13
13
|
log = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -25,17 +25,28 @@ class GLMIterator:
|
|
|
25
25
|
n_rows: int | None,
|
|
26
26
|
*,
|
|
27
27
|
stp_field: str = "stp",
|
|
28
|
-
read_vertices: bool = False,
|
|
29
28
|
buffer: int = 10000,
|
|
30
29
|
time_dict: dict | None = None,
|
|
30
|
+
reshaped_files: bool = False,
|
|
31
31
|
):
|
|
32
|
-
"""Constructor for the
|
|
32
|
+
"""Constructor for the GLMIterator.
|
|
33
|
+
|
|
34
|
+
The GLM iterator provides a way to iterate over the
|
|
35
|
+
simulated geant4 evtids, extracting the number of hits or steps for
|
|
36
|
+
each range in evtids. This ensures a single simulated event
|
|
37
|
+
is not split between two iterations and allows to specify a
|
|
38
|
+
start and an end evtid to extract.
|
|
39
|
+
|
|
40
|
+
In case the data is already reshaped and we do not need to
|
|
41
|
+
read a specific range of evtids this iterator is just loops
|
|
42
|
+
over the input stp field. Otherwise if the GLM file is not provided
|
|
43
|
+
this is created in memory.
|
|
33
44
|
|
|
34
45
|
Parameters
|
|
35
46
|
----------
|
|
36
47
|
glm_file
|
|
37
48
|
the file containing the event lookup map, if `None` the glm will
|
|
38
|
-
be created in memory.
|
|
49
|
+
be created in memory if needed.
|
|
39
50
|
stp_file
|
|
40
51
|
the file containing the steps to read.
|
|
41
52
|
lh5_group
|
|
@@ -46,12 +57,12 @@ class GLMIterator:
|
|
|
46
57
|
the number of rows to read, if `None` read them all.
|
|
47
58
|
stp_field
|
|
48
59
|
name of the group.
|
|
49
|
-
read_vertices
|
|
50
|
-
whether to read also the vertices table.
|
|
51
60
|
buffer
|
|
52
61
|
the number of rows to read at once.
|
|
53
62
|
time_dict
|
|
54
63
|
time profiling data structure.
|
|
64
|
+
reshaped_files
|
|
65
|
+
flag for whether the files are reshaped.
|
|
55
66
|
"""
|
|
56
67
|
# initialise
|
|
57
68
|
self.glm_file = glm_file
|
|
@@ -62,18 +73,47 @@ class GLMIterator:
|
|
|
62
73
|
self.n_rows = n_rows
|
|
63
74
|
self.buffer = buffer
|
|
64
75
|
self.current_i_entry = 0
|
|
65
|
-
self.read_vertices = read_vertices
|
|
66
76
|
self.stp_field = stp_field
|
|
77
|
+
self.reshaped_files = reshaped_files
|
|
67
78
|
|
|
68
79
|
# would be good to replace with an iterator
|
|
69
80
|
self.sto = LH5Store()
|
|
70
81
|
self.n_rows_read = 0
|
|
71
82
|
self.time_dict = time_dict
|
|
72
83
|
self.glm = None
|
|
84
|
+
self.use_glm = True
|
|
85
|
+
|
|
86
|
+
glm_n_rows = 0
|
|
73
87
|
|
|
74
|
-
# build the glm in memory
|
|
75
|
-
if self.glm_file is None
|
|
76
|
-
self.
|
|
88
|
+
# build the glm in memory if needed
|
|
89
|
+
if self.glm_file is None and (
|
|
90
|
+
(self.n_rows is not None) or (self.start_row != 0) or not reshaped_files
|
|
91
|
+
):
|
|
92
|
+
if self.time_dict is not None:
|
|
93
|
+
time_start = time.time()
|
|
94
|
+
|
|
95
|
+
self.glm = build_glm.build_glm(
|
|
96
|
+
stp_file, None, out_table_name="glm", id_name="evtid", lh5_groups=[lh5_group]
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if self.time_dict is not None:
|
|
100
|
+
self.time_dict.update_field("read/glm", time_start)
|
|
101
|
+
|
|
102
|
+
glm_n_rows = len(self.glm)
|
|
103
|
+
|
|
104
|
+
elif self.glm_file is None:
|
|
105
|
+
self.use_glm = False
|
|
106
|
+
else:
|
|
107
|
+
glm_n_rows = self.sto.read_n_rows(f"glm/{self.lh5_group}", self.glm_file)
|
|
108
|
+
|
|
109
|
+
# get the number of stp rows
|
|
110
|
+
stp_n_rows = self.sto.read_n_rows(f"{self.stp_field}/{self.lh5_group}", self.stp_file)
|
|
111
|
+
|
|
112
|
+
# heuristics for a good buffer length
|
|
113
|
+
if self.use_glm:
|
|
114
|
+
self.buffer = int(buffer * glm_n_rows / stp_n_rows)
|
|
115
|
+
msg = f"Number of stp rows {stp_n_rows}, number of glm rows {glm_n_rows} changing buffer from {buffer} to {self.buffer}"
|
|
116
|
+
log.info(msg)
|
|
77
117
|
|
|
78
118
|
def __iter__(self) -> typing.Iterator:
|
|
79
119
|
self.current_i_entry = 0
|
|
@@ -81,78 +121,99 @@ class GLMIterator:
|
|
|
81
121
|
self.start_row_tmp = self.start_row
|
|
82
122
|
return self
|
|
83
123
|
|
|
84
|
-
def
|
|
124
|
+
def get_n_rows(self):
|
|
125
|
+
"""Get the number of rows to read."""
|
|
85
126
|
# get the number of rows to read
|
|
127
|
+
if self.time_dict is not None:
|
|
128
|
+
time_start = time.time()
|
|
129
|
+
|
|
86
130
|
if self.n_rows is not None:
|
|
87
131
|
rows_left = self.n_rows - self.n_rows_read
|
|
88
132
|
n_rows = self.buffer if (self.buffer > rows_left) else rows_left
|
|
89
133
|
else:
|
|
90
134
|
n_rows = self.buffer
|
|
91
135
|
|
|
92
|
-
|
|
93
|
-
|
|
136
|
+
glm_rows = None
|
|
137
|
+
start = 0
|
|
138
|
+
n = 0
|
|
94
139
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
140
|
+
if self.use_glm:
|
|
141
|
+
if self.glm_file is not None:
|
|
142
|
+
glm_rows, n_rows_read = self.sto.read(
|
|
143
|
+
f"glm/{self.lh5_group}",
|
|
144
|
+
self.glm_file,
|
|
145
|
+
start_row=self.start_row_tmp,
|
|
146
|
+
n_rows=n_rows,
|
|
147
|
+
)
|
|
148
|
+
else:
|
|
149
|
+
# get the maximum row to read
|
|
150
|
+
max_row = self.start_row_tmp + n_rows
|
|
151
|
+
max_row = min(len(self.glm[self.lh5_group]), max_row)
|
|
104
152
|
|
|
105
|
-
|
|
106
|
-
|
|
153
|
+
if max_row != self.start_row_tmp:
|
|
154
|
+
glm_rows = Table(self.glm[self.lh5_group][self.start_row_tmp : max_row])
|
|
107
155
|
|
|
108
|
-
|
|
156
|
+
n_rows_read = max_row - self.start_row_tmp
|
|
109
157
|
|
|
110
|
-
|
|
111
|
-
|
|
158
|
+
if self.time_dict is not None:
|
|
159
|
+
self.time_dict.update_field("read/glm", time_start)
|
|
112
160
|
|
|
113
|
-
|
|
114
|
-
|
|
161
|
+
self.n_rows_read += n_rows_read
|
|
162
|
+
self.start_row_tmp += n_rows_read
|
|
115
163
|
|
|
116
|
-
|
|
117
|
-
|
|
164
|
+
# view our glm as an awkward array
|
|
165
|
+
if glm_rows is not None:
|
|
166
|
+
glm_ak = glm_rows.view_as("ak")
|
|
118
167
|
|
|
119
|
-
|
|
120
|
-
|
|
168
|
+
# remove empty rows
|
|
169
|
+
glm_ak = glm_ak[glm_ak.n_rows > 0]
|
|
121
170
|
|
|
122
|
-
|
|
123
|
-
|
|
171
|
+
if len(glm_ak) > 0:
|
|
172
|
+
# extract range of stp rows to read
|
|
173
|
+
start = glm_ak.start_row[0]
|
|
174
|
+
n = ak.sum(glm_ak.n_rows)
|
|
124
175
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
176
|
+
else:
|
|
177
|
+
start = self.start_row_tmp
|
|
178
|
+
n = n_rows
|
|
179
|
+
n_rows_read = n
|
|
180
|
+
self.start_row_tmp += n
|
|
181
|
+
|
|
182
|
+
return start, n, n_rows_read
|
|
183
|
+
|
|
184
|
+
def __next__(self) -> tuple[LGDO, int, int]:
|
|
185
|
+
"""Read one chunk.
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
a tuple of:
|
|
190
|
+
- the steps
|
|
191
|
+
- the chunk index
|
|
192
|
+
- the number of steps read
|
|
193
|
+
"""
|
|
194
|
+
# read the glm rows]
|
|
195
|
+
start, n, n_rows_read = self.get_n_rows()
|
|
129
196
|
|
|
130
|
-
|
|
131
|
-
|
|
197
|
+
if self.time_dict is not None:
|
|
198
|
+
time_start = time.time()
|
|
132
199
|
|
|
200
|
+
try:
|
|
133
201
|
stp_rows, n_steps = self.sto.read(
|
|
134
|
-
f"
|
|
202
|
+
f"{self.stp_field}/{self.lh5_group}",
|
|
135
203
|
self.stp_file,
|
|
136
204
|
start_row=int(start),
|
|
137
205
|
n_rows=int(n),
|
|
138
206
|
)
|
|
207
|
+
except OverflowError:
|
|
208
|
+
raise StopIteration from None
|
|
139
209
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
self.time_dict.update_field("read/stp", time_start)
|
|
210
|
+
if n_rows_read == 0 or n_steps == 0:
|
|
211
|
+
raise StopIteration
|
|
143
212
|
|
|
144
|
-
|
|
213
|
+
# save time
|
|
214
|
+
if self.time_dict is not None:
|
|
215
|
+
self.time_dict.update_field("read/stp", time_start)
|
|
145
216
|
|
|
146
|
-
|
|
147
|
-
vert_rows, _ = self.sto.read(
|
|
148
|
-
"/vtx",
|
|
149
|
-
self.stp_file,
|
|
150
|
-
start_row=self.start_row,
|
|
151
|
-
n_rows=n_rows,
|
|
152
|
-
)
|
|
153
|
-
else:
|
|
154
|
-
vert_rows = None
|
|
155
|
-
# vertex table should have same structure as glm
|
|
217
|
+
self.current_i_entry += 1
|
|
156
218
|
|
|
157
|
-
|
|
158
|
-
return (None, None, self.current_i_entry, 0)
|
|
219
|
+
return (stp_rows, self.current_i_entry - 1, n_steps)
|