reboost 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reboost/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.2.1'
21
- __version_tuple__ = version_tuple = (0, 2, 1)
20
+ __version__ = version = '0.2.3'
21
+ __version_tuple__ = version_tuple = (0, 2, 3)
reboost/build_evt.py CHANGED
@@ -71,7 +71,6 @@ def build_evt(
71
71
  path to the evt tier (output) file, if `None` the :class:`Table` is returned in memory
72
72
  config
73
73
  dictionary of the configuration.
74
-
75
74
  buffer
76
75
  number of events to process simultaneously
77
76
 
reboost/build_glm.py CHANGED
@@ -224,7 +224,7 @@ def build_glm(
224
224
  lh5_table_list = list(lh5.ls(stp_file, "stp/"))
225
225
 
226
226
  # get rows in the table
227
- if files.glm is None:
227
+ if files.glm[file_idx] is None:
228
228
  for lh5_table in lh5_table_list:
229
229
  if lh5_table.replace("stp/", "") not in glm_sum:
230
230
  glm_sum[lh5_table.replace("stp/", "")] = None
@@ -232,7 +232,7 @@ def build_glm(
232
232
  glm_sum = None
233
233
 
234
234
  # start row for each table
235
- start_row = {lh5_tab: 0 for lh5_tab in lh5_table_list}
235
+ start_row = dict.fromkeys(lh5_table_list, 0)
236
236
 
237
237
  vfield = f"stp/vertices/{id_name}"
238
238
 
@@ -274,7 +274,7 @@ def build_glm(
274
274
 
275
275
  lh5_subgroup = lh5_table.replace("stp/", "")
276
276
 
277
- if files.glm is not None:
277
+ if files.glm[file_idx] is not None:
278
278
  store.write(
279
279
  out_tab,
280
280
  f"{out_table_name}/{lh5_subgroup}",
reboost/build_hit.py CHANGED
@@ -179,7 +179,7 @@ def build_hit(
179
179
  config: Mapping | str,
180
180
  args: Mapping | AttrsDict,
181
181
  stp_files: str | list[str],
182
- glm_files: str | list[str],
182
+ glm_files: str | list[str] | None,
183
183
  hit_files: str | list[str] | None,
184
184
  *,
185
185
  start_evtid: int = 0,
@@ -199,7 +199,7 @@ def build_hit(
199
199
  stp_files
200
200
  list of strings or string of the stp file path.
201
201
  glm_files
202
- list of strings or string of the glm file path.
202
+ list of strings or string of the glm file path, if `None` will be build in memory.
203
203
  hit_files
204
204
  list of strings or string of the hit file path. The `hit` file can also be `None` in which
205
205
  case the hits are returned as an `ak.Array` in memory.
@@ -246,6 +246,8 @@ def build_hit(
246
246
  # loop over processing groups
247
247
  for group_idx, proc_group in enumerate(config["processing_groups"]):
248
248
  proc_name = proc_group.get("name", "default")
249
+ msg = f"... starting group {proc_name}"
250
+ log.info(msg)
249
251
 
250
252
  if proc_name not in time_dict:
251
253
  time_dict[proc_name] = ProfileDict()
@@ -261,9 +263,11 @@ def build_hit(
261
263
  for mapping in proc_group.get("detector_mapping")
262
264
  ]
263
265
  )
264
-
265
266
  # loop over detectors
266
267
  for in_det_idx, (in_detector, out_detectors) in enumerate(detectors_mapping.items()):
268
+ msg = f"... processing {in_detector} (to {out_detectors})"
269
+ log.info(msg)
270
+
267
271
  # get detector objects
268
272
  det_objects = core.get_detector_objects(
269
273
  output_detectors=out_detectors,
@@ -286,23 +290,29 @@ def build_hit(
286
290
  time_dict=time_dict[proc_name],
287
291
  )
288
292
  for stps, _, chunk_idx, _ in glm_it:
289
- # converting to awwkard
293
+ # converting to awkward
290
294
  if stps is None:
291
295
  continue
292
296
 
293
- # produce the hit table
294
297
  ak_obj = stps.view_as("ak")
295
298
 
299
+ # produce the hit table
296
300
  for out_det_idx, out_detector in enumerate(out_detectors):
297
301
  # loop over the rows
298
302
  if out_detector not in output_tables and files.hit is None:
299
303
  output_tables[out_detector] = None
300
304
 
301
- hit_table = core.evaluate_hit_table_layout(
302
- copy.deepcopy(ak_obj),
303
- expression=proc_group["hit_table_layout"],
304
- time_dict=time_dict[proc_name],
305
- )
305
+ # get the attributes
306
+ attrs = utils.copy_units(stps)
307
+
308
+ if "hit_table_layout" in proc_group:
309
+ hit_table = core.evaluate_hit_table_layout(
310
+ copy.deepcopy(ak_obj),
311
+ expression=proc_group["hit_table_layout"],
312
+ time_dict=time_dict[proc_name],
313
+ )
314
+ else:
315
+ hit_table = copy.deepcopy(stps)
306
316
 
307
317
  local_dict = {
308
318
  "DETECTOR_OBJECTS": det_objects[out_detector],
@@ -310,7 +320,7 @@ def build_hit(
310
320
  "DETECTOR": out_detector,
311
321
  }
312
322
  # add fields
313
- for field, expression in proc_group["operations"].items():
323
+ for field, expression in proc_group.get("operations", {}).items():
314
324
  # evaluate the expression
315
325
  col = core.evaluate_output_column(
316
326
  hit_table,
@@ -323,10 +333,20 @@ def build_hit(
323
333
  hit_table.add_field(field, col)
324
334
 
325
335
  # remove unwanted fields
326
- hit_table = core.remove_columns(hit_table, outputs=proc_group["outputs"])
336
+ if "outputs" in proc_group:
337
+ hit_table = core.remove_columns(
338
+ hit_table, outputs=proc_group["outputs"]
339
+ )
340
+
341
+ # assign units in the output table
342
+ hit_table = utils.assign_units(hit_table, attrs)
327
343
 
328
344
  # get the IO mode
329
345
 
346
+ new_hit_file = (file_idx == 0) or (
347
+ files.hit[file_idx] != files.hit[file_idx - 1]
348
+ )
349
+
330
350
  wo_mode = (
331
351
  "of"
332
352
  if (
@@ -334,6 +354,7 @@ def build_hit(
334
354
  and out_det_idx == 0
335
355
  and in_det_idx == 0
336
356
  and chunk_idx == 0
357
+ and new_hit_file
337
358
  )
338
359
  else "append"
339
360
  )
reboost/build_tcm.py CHANGED
@@ -6,7 +6,7 @@ import re
6
6
  import awkward as ak
7
7
  from lgdo import Table, lh5
8
8
 
9
- from reboost import shape
9
+ from reboost.shape import group
10
10
 
11
11
  log = logging.getLogger(__name__)
12
12
 
@@ -102,7 +102,7 @@ def get_tcm_from_ak(
102
102
 
103
103
  obj_tot = ak.concatenate(sort_objs)
104
104
 
105
- return shape.group.group_by_time(
105
+ return group.group_by_time(
106
106
  obj_tot,
107
107
  time_name=time_name,
108
108
  evtid_name=idx_name,
reboost/core.py CHANGED
@@ -59,6 +59,7 @@ def evaluate_output_column(
59
59
  expr = expression.replace(f"{table_name}.", "")
60
60
 
61
61
  # get func call and modules to import
62
+
62
63
  func_call, globals_dict = utils.get_function_string(expr)
63
64
 
64
65
  msg = f"evaluating table with command {expr} and local_dict {local_dict.keys()}"
@@ -153,7 +154,7 @@ def get_global_objects(
153
154
 
154
155
 
155
156
  def get_detectors_mapping(
156
- output_detector_expression: str,
157
+ output_detector_expression: str | list,
157
158
  objects: AttrsDict | None = None,
158
159
  input_detector_name: str | None = None,
159
160
  ) -> dict:
@@ -210,15 +211,21 @@ def get_detectors_mapping(
210
211
  input_detector_name = "dets",objects=objs)
211
212
  {'dets': ['ch0', 'ch1', 'ch2']}
212
213
  """
213
- func, globs = utils.get_function_string(output_detector_expression)
214
214
  out_names = []
215
+ if isinstance(output_detector_expression, str):
216
+ out_list = [output_detector_expression]
217
+ else:
218
+ out_list = list(output_detector_expression)
219
+
220
+ for expression_tmp in out_list:
221
+ func, globs = utils.get_function_string(expression_tmp)
215
222
 
216
- # if no package was imported its just a name
217
- try:
218
- objs = evaluate_object(output_detector_expression, local_dict={"OBJECTS": objects})
219
- out_names.extend(objs)
220
- except Exception:
221
- out_names.append(output_detector_expression)
223
+ # if no package was imported its just a name
224
+ try:
225
+ objs = evaluate_object(expression_tmp, local_dict={"OBJECTS": objects})
226
+ out_names.extend(objs)
227
+ except Exception:
228
+ out_names.append(expression_tmp)
222
229
 
223
230
  # simple one to one mapping
224
231
  if input_detector_name is None:
@@ -273,19 +280,19 @@ def get_detector_objects(
273
280
 
274
281
  det_objects_dict = {}
275
282
  for output_detector in output_detectors:
276
- det_objects_dict[output_detector] = AttrsDict(
277
- {
278
- obj_name: evaluate_object(
279
- obj_expression,
280
- local_dict={
281
- "ARGS": args,
282
- "DETECTOR": output_detector,
283
- "OBJECTS": global_objects,
284
- },
285
- )
286
- for obj_name, obj_expression in expressions.items()
287
- }
288
- )
283
+ obj_dict = {}
284
+ for obj_name, obj_expression in expressions.items():
285
+ obj_dict[obj_name] = evaluate_object(
286
+ obj_expression,
287
+ local_dict={
288
+ "ARGS": args,
289
+ "DETECTOR": output_detector,
290
+ "OBJECTS": global_objects,
291
+ "DETECTOR_OBJECTS": AttrsDict(obj_dict),
292
+ },
293
+ )
294
+
295
+ det_objects_dict[output_detector] = AttrsDict(obj_dict)
289
296
  res = AttrsDict(det_objects_dict)
290
297
 
291
298
  if time_dict is not None:
reboost/iterator.py CHANGED
@@ -5,7 +5,9 @@ import time
5
5
  import typing
6
6
 
7
7
  from lgdo.lh5 import LH5Store
8
- from lgdo.types import LGDO
8
+ from lgdo.types import LGDO, Table
9
+
10
+ from reboost import build_glm
9
11
 
10
12
  log = logging.getLogger(__name__)
11
13
 
@@ -15,7 +17,7 @@ class GLMIterator:
15
17
 
16
18
  def __init__(
17
19
  self,
18
- glm_file: str,
20
+ glm_file: str | None,
19
21
  stp_file: str,
20
22
  lh5_group: str,
21
23
  start_row: int,
@@ -31,7 +33,8 @@ class GLMIterator:
31
33
  Parameters
32
34
  ----------
33
35
  glm_file
34
- the file containing the event lookup map.
36
+ the file containing the event lookup map, if `None` the glm will
37
+ be created in memory.
35
38
  stp_file
36
39
  the file containing the steps to read.
37
40
  lh5_group
@@ -65,6 +68,11 @@ class GLMIterator:
65
68
  self.sto = LH5Store()
66
69
  self.n_rows_read = 0
67
70
  self.time_dict = time_dict
71
+ self.glm = None
72
+
73
+ # build the glm in memory
74
+ if self.glm_file is None:
75
+ self.glm = build_glm.build_glm(stp_file, None, out_table_name="glm", id_name="evtid")
68
76
 
69
77
  def __iter__(self) -> typing.Iterator:
70
78
  self.current_i_entry = 0
@@ -83,10 +91,21 @@ class GLMIterator:
83
91
  if self.time_dict is not None:
84
92
  time_start = time.time()
85
93
 
86
- # read the glm rows
87
- glm_rows, n_rows_read = self.sto.read(
88
- f"glm/{self.lh5_group}", self.glm_file, start_row=self.start_row_tmp, n_rows=n_rows
89
- )
94
+ # read the glm rows]
95
+ if self.glm_file is not None:
96
+ glm_rows, n_rows_read = self.sto.read(
97
+ f"glm/{self.lh5_group}", self.glm_file, start_row=self.start_row_tmp, n_rows=n_rows
98
+ )
99
+ else:
100
+ # get the maximum row to read
101
+ max_row = self.start_row_tmp + n_rows
102
+ max_row = min(len(self.glm[self.lh5_group]), max_row)
103
+
104
+ if max_row != self.start_row_tmp:
105
+ glm_rows = Table(self.glm[self.lh5_group][self.start_row_tmp : max_row])
106
+
107
+ n_rows_read = max_row - self.start_row_tmp
108
+
90
109
  if self.time_dict is not None:
91
110
  self.time_dict.update_field("read/glm", time_start)
92
111
 
@@ -106,7 +125,6 @@ class GLMIterator:
106
125
  # extract range of stp rows to read
107
126
  start = glm_ak.start_row[0]
108
127
  n = sum(glm_ak.n_rows)
109
-
110
128
  if self.time_dict is not None:
111
129
  time_start = time.time()
112
130
 
reboost/math/functions.py CHANGED
@@ -11,7 +11,7 @@ log = logging.getLogger(__name__)
11
11
 
12
12
 
13
13
  def piecewise_linear_activeness(
14
- distances: VectorOfVectors | ak.Array, fccd: float, tl: float
14
+ distances: VectorOfVectors | ak.Array, fccd: float, dlf: float
15
15
  ) -> VectorOfVectors | Array:
16
16
  r"""Piecewise linear HPGe activeness model.
17
17
 
@@ -21,14 +21,15 @@ def piecewise_linear_activeness(
21
21
 
22
22
  f(d) =
23
23
  \begin{cases}
24
- 0 & \text{if } d < t, \\
25
- \frac{x-l}{f - l} & \text{if } t \leq d < f, \\
24
+ 0 & \text{if } d < f*l, \\
25
+ \frac{x-f*l}{f - f*l} & \text{if } t \leq d < f, \\
26
26
  1 & \text{otherwise.}
27
27
  \end{cases}
28
28
 
29
29
  Where:
30
+
30
31
  - `d`: Distance to surface,
31
- - `l`: Depth of transition layer start
32
+ - `l`: Dead layer fraction, the fraction of the FCCD which is fully inactive
32
33
  - `f`: Full charge collection depth (FCCD).
33
34
 
34
35
  In addition, any distance of `np.nan` (for example if the calculation
@@ -43,8 +44,8 @@ def piecewise_linear_activeness(
43
44
 
44
45
  fccd
45
46
  the value of the FCCD
46
- tl
47
- the start of the transition layer.
47
+ dlf
48
+ the fraction of the FCCD which is fully inactive.
48
49
 
49
50
  Returns
50
51
  -------
@@ -58,10 +59,117 @@ def piecewise_linear_activeness(
58
59
  else:
59
60
  distances_ak = distances
60
61
 
62
+ dl = fccd * dlf
63
+ distances_flat = (
64
+ ak.flatten(distances_ak).to_numpy() if distances_ak.ndim > 1 else distances_ak.to_numpy()
65
+ )
66
+
61
67
  # compute the linear piecewise
62
- results = ak.where(
63
- (distances_ak > fccd) | np.isnan(distances_ak),
64
- 1,
65
- ak.where(distances_ak <= tl, 0, (distances_ak - tl) / (fccd - tl)),
68
+ results = np.full_like(distances_flat, np.nan, dtype=np.float64)
69
+ lengths = ak.num(distances_ak) if distances_ak.ndim > 1 else len(distances_ak)
70
+
71
+ mask1 = (distances_flat > fccd) | np.isnan(distances_flat)
72
+ mask2 = (distances_flat <= dl) & (~mask1)
73
+ mask3 = ~(mask1 | mask2)
74
+
75
+ # assign the values
76
+ results[mask1] = 1
77
+ results[mask2] = 0
78
+ results[mask3] = (distances_flat[mask3] - dl) / (fccd - dl)
79
+
80
+ # reshape
81
+ results = ak.unflatten(ak.Array(results), lengths) if distances_ak.ndim > 1 else results
82
+
83
+ return VectorOfVectors(results) if results.ndim > 1 else Array(results)
84
+
85
+
86
+ def vectorised_active_energy(
87
+ distances: VectorOfVectors | ak.Array,
88
+ edep: VectorOfVectors | ak.Array,
89
+ fccd: float | list,
90
+ dlf: float | list,
91
+ ) -> VectorOfVectors | Array:
92
+ r"""Energy after piecewise linear HPGe activeness model vectorised over FCCD or dead layer fraction.
93
+
94
+ Based on the same linear activeness function as :func:`piecewise_linear_activeness`. However,
95
+ this function vectorises the calculation to provide a range of output energies varying the fccd or
96
+ dead layer fraction. Either fccd or dlf can be a list. This adds an extra dimension to the
97
+ output, with the same length as the input fccd or dlf list.
98
+
99
+ .. warning:
100
+ It is not currently implemented to vary both dlf and fccd.
101
+
102
+ Parameters
103
+ ----------
104
+ distances
105
+ the distance from each step to the detector surface. Can be either a
106
+ `awkward` array, or a LGDO `VectorOfVectors` . The computation
107
+ is performed for each element and the first dimension is preserved, a
108
+ new dimension is added vectorising over the FCCD or DLF.
109
+ edep
110
+ the energy for each step.
111
+ fccd
112
+ the value of the FCCD, can be a list.
113
+ dlf
114
+ the fraction of the FCCD which is fully inactive, can be a list.
115
+
116
+ Returns
117
+ -------
118
+ a :class:`VectorOfVectors` or :class:`Array` of the activeness
119
+ """
120
+ # add checks on fccd, dlf
121
+ fccd = np.array(fccd)
122
+ dlf = np.array(dlf)
123
+
124
+ if (fccd.ndim + dlf.ndim) > 1:
125
+ msg = "Currently only one of FCCD and dlf can be varied"
126
+ raise NotImplementedError(msg)
127
+
128
+ # convert fccd and or dlf to the right shape
129
+ if fccd.ndim == 0:
130
+ if dlf.ndim == 0:
131
+ dlf = dlf[np.newaxis]
132
+ fccd = np.full_like(dlf, fccd)
133
+
134
+ dl = fccd * dlf
135
+
136
+ def _convert(field):
137
+ # convert to ak
138
+ if isinstance(field, VectorOfVectors):
139
+ field_ak = field.view_as("ak")
140
+ elif not isinstance(field, ak.Array):
141
+ field_ak = ak.Array(field)
142
+ else:
143
+ msg = f"{field} must be an awkward array or VectorOfVectors"
144
+ raise TypeError(msg)
145
+
146
+ return field_ak, ak.flatten(field_ak).to_numpy()[:, np.newaxis]
147
+
148
+ distances_ak, distances_flat = _convert(distances)
149
+ _, edep_flat = _convert(edep)
150
+ runs = ak.num(distances_ak, axis=-1)
151
+
152
+ # vectorise fccd or tl
153
+
154
+ fccd_list = np.tile(fccd, (len(distances_flat), 1))
155
+ dl_list = np.tile(dl, (len(distances_flat), 1))
156
+ distances_shaped = np.tile(distances_flat, (1, len(dl)))
157
+
158
+ # compute the linear piecewise
159
+ results = np.full_like(fccd_list, np.nan, dtype=np.float64)
160
+
161
+ # Masks
162
+ mask1 = (distances_shaped > fccd_list) | np.isnan(distances_shaped)
163
+ mask2 = ((distances_shaped <= dl_list) | (fccd_list == dl_list)) & ~mask1
164
+ mask3 = ~(mask1 | mask2) # Safe, avoids recomputing anything expensive
165
+
166
+ # Assign values
167
+ results[mask1] = 1.0
168
+ results[mask2] = 0.0
169
+ results[mask3] = (distances_shaped[mask3] - dl_list[mask3]) / (
170
+ fccd_list[mask3] - dl_list[mask3]
66
171
  )
67
- return VectorOfVectors(results) if results.ndim > 1 else Array(results.to_numpy())
172
+
173
+ energy = ak.sum(ak.unflatten(results * edep_flat, runs), axis=-2)
174
+
175
+ return VectorOfVectors(energy) if energy.ndim > 1 else Array(energy.to_numpy())
reboost/shape/cluster.py CHANGED
@@ -40,17 +40,17 @@ def cluster_by_step_length(
40
40
  pos_x: ak.Array | VectorOfVectors,
41
41
  pos_y: ak.Array | VectorOfVectors,
42
42
  pos_z: ak.Array | VectorOfVectors,
43
- dist: ak.Array | VectorOfVectors,
44
- surf_cut: float = 2,
43
+ dist: ak.Array | VectorOfVectors | None = None,
44
+ surf_cut: float | None = None,
45
45
  threshold: float = 0.1,
46
- threshold_surf: float = 0.0,
46
+ threshold_surf: float | None = None,
47
47
  ) -> VectorOfVectors:
48
48
  """Perform clustering based on the step length.
49
49
 
50
50
  Steps are clustered based on distance, if either:
51
51
  - a step is in a new track,
52
52
  - a step moves from surface to bulk region (or visa versa),
53
- - the distance between the first step and the cluster and the current is above a threshold.
53
+ - the distance between the current step and the first step of the current cluster is above a threshold.
54
54
 
55
55
  Then a new cluster is started. The surface region is defined as the volume
56
56
  less than surf_cut distance to the surface. This allows for a fine tuning of the
@@ -67,9 +67,9 @@ def cluster_by_step_length(
67
67
  pos_z
68
68
  z position of the step.
69
69
  dist
70
- distance to the detector surface.
70
+ distance to the detector surface. Can be `None` in which case all steps are treated as being in the "bulk".
71
71
  surf_cut
72
- Size of the surface region (in mm)
72
+ Size of the surface region (in mm), if `None` no selection is applied (default).
73
73
  threshold
74
74
  Distance threshold in mm to combine steps in the bulk.
75
75
  threshold_surf
@@ -107,7 +107,7 @@ def cluster_by_step_length(
107
107
  ak.flatten(ak.local_index(trackid)).to_numpy(),
108
108
  ak.flatten(trackid).to_numpy(),
109
109
  pos,
110
- ak.flatten(dist).to_numpy(),
110
+ dist_to_surf=ak.flatten(dist).to_numpy() if dist is not None else dist,
111
111
  surf_cut=surf_cut,
112
112
  threshold=threshold,
113
113
  threshold_surf=threshold_surf,
@@ -127,10 +127,10 @@ def cluster_by_distance_numba(
127
127
  local_index: np.ndarray,
128
128
  trackid: np.ndarray,
129
129
  pos: np.ndarray,
130
- dist_to_surf: np.ndarray,
131
- surf_cut: float = 2,
130
+ dist_to_surf: np.ndarray | None,
131
+ surf_cut: float | None = None,
132
132
  threshold: float = 0.1,
133
- threshold_surf: float = 0.0,
133
+ threshold_surf: float | None = None,
134
134
  ) -> np.ndarray:
135
135
  """Cluster steps by the distance between points in the same track.
136
136
 
@@ -146,9 +146,9 @@ def cluster_by_distance_numba(
146
146
  pos
147
147
  `(n,3)` size array of the positions
148
148
  dist_to_surf
149
- 1D array of the distance to the detector surface.
149
+ 1D array of the distance to the detector surface. Can be `None` in which case all steps are treated as being in the bulk.
150
150
  surf_cut
151
- Size of the surface region (in mm)
151
+ Size of the surface region (in mm), if `None` no selection is applied.
152
152
  threshold
153
153
  Distance threshold in mm to combine steps in the bulk.
154
154
  threshold_surf
@@ -172,14 +172,20 @@ def cluster_by_distance_numba(
172
172
  is_surf_prev = False
173
173
 
174
174
  for idx in range(n):
175
- thr = threshold if dist_to_surf[idx] > surf_cut else threshold_surf
176
-
177
- new_cluster = (
178
- (trackid[idx] != trackid_prev)
179
- or (is_surf_prev and (dist_to_surf[idx] > surf_cut))
180
- or ((not is_surf_prev) and (dist_to_surf[idx] < surf_cut))
181
- or (_dist(pos[idx, :], pos_prev) > thr)
182
- )
175
+ # consider a surface and a bulk region
176
+ if dist_to_surf is not None:
177
+ thr = threshold if dist_to_surf[idx] > surf_cut else threshold_surf
178
+
179
+ new_cluster = (
180
+ (trackid[idx] != trackid_prev)
181
+ or (is_surf_prev and (dist_to_surf[idx] > surf_cut))
182
+ or ((not is_surf_prev) and (dist_to_surf[idx] < surf_cut))
183
+ or (_dist(pos[idx, :], pos_prev) > thr)
184
+ )
185
+ # basic clustering without split into surface / bulk
186
+ else:
187
+ thr = threshold
188
+ new_cluster = (trackid[idx] != trackid_prev) or (_dist(pos[idx, :], pos_prev) > thr)
183
189
 
184
190
  # New hit, reset cluster index
185
191
  if idx == 0 or local_index[idx] == 0:
@@ -197,7 +203,8 @@ def cluster_by_distance_numba(
197
203
 
198
204
  # Update previous values
199
205
  trackid_prev = trackid[idx]
200
- is_surf_prev = dist_to_surf[idx] < surf_cut
206
+ if dist_to_surf is not None:
207
+ is_surf_prev = dist_to_surf[idx] < surf_cut
201
208
 
202
209
  return out
203
210
 
reboost/shape/group.py CHANGED
@@ -25,7 +25,9 @@ def _sort_data(obj: ak.Array, *, time_name: str = "time", evtid_name: str = "evt
25
25
  -------
26
26
  sorted awkward array
27
27
  """
28
+ obj = obj[ak.argsort(obj[evtid_name])]
28
29
  obj_unflat = ak.unflatten(obj, ak.run_lengths(obj[evtid_name]))
30
+
29
31
  indices = ak.argsort(obj_unflat[time_name], axis=-1)
30
32
  sorted_obj = obj_unflat[indices]
31
33
 
@@ -120,9 +122,9 @@ def group_by_time(
120
122
 
121
123
  # get difference
122
124
  time_diffs = np.diff(obj[time_name])
123
- index_diffs = np.diff(obj[evtid_name])
125
+ index_diffs = np.array(np.diff(obj[evtid_name]), dtype=np.int32)
124
126
 
125
- # index of thhe last element in each run
127
+ # index of the last element in each run
126
128
  time_change = (time_diffs > window * 1000) & (index_diffs == 0)
127
129
  index_change = index_diffs > 0
128
130
 
reboost/utils.py CHANGED
@@ -1,25 +1,62 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import importlib
4
+ import itertools
4
5
  import logging
5
6
  import re
6
- from collections.abc import Iterable
7
+ from collections.abc import Iterable, Mapping
7
8
  from contextlib import contextmanager
8
9
  from pathlib import Path
9
10
 
10
11
  from dbetto import AttrsDict
12
+ from lgdo.types import Table
11
13
 
12
14
  log = logging.getLogger(__name__)
13
15
 
14
16
 
15
17
  def get_file_dict(
16
18
  stp_files: list[str] | str,
17
- glm_files: list[str] | str,
19
+ glm_files: list[str] | str | None,
18
20
  hit_files: list[str] | str | None = None,
19
21
  ) -> AttrsDict:
20
- """Get the file info as a AttrsDict."""
22
+ """Get the file info as a AttrsDict.
23
+
24
+ Creates an :class:`dbetto.AttrsDict` with keys `stp_files`,
25
+ `glm_files` and `hit_files`. Each key contains a list of
26
+ file-paths (or `None`).
27
+
28
+ Parameters
29
+ ----------
30
+ stp_files
31
+ string or list of strings of the stp files.
32
+ glm_files
33
+ string or list of strings of the glm files, or None in which
34
+ case the glm will be created in memory.
35
+ hit_files
36
+ string or list of strings of the hit files, if None the output
37
+ files will be created in memory.
38
+ """
39
+ # make a list of the right length
40
+ glm_files_list = [None] * len(stp_files) if glm_files is None else glm_files
41
+
42
+ # make a list of files in case
43
+ # 1) hit_files is a str and stp_files is a list
44
+ # 2) hit_files and stp_files are both lists of different length
45
+
46
+ hit_is_list = isinstance(hit_files, list)
47
+ stp_is_list = isinstance(stp_files, list)
48
+
49
+ make_files_list = (not hit_is_list and stp_is_list) or (
50
+ hit_is_list and stp_is_list and len(hit_files) == 1 and len(stp_files) > 1
51
+ )
52
+
53
+ hit_files_list = [hit_files] * len(stp_files) if (make_files_list) else hit_files
54
+
21
55
  files = {}
22
- for file_type, file_list in zip(["stp", "glm", "hit"], [stp_files, glm_files, hit_files]):
56
+
57
+ for file_type, file_list in zip(
58
+ ["stp", "glm", "hit"], [stp_files, glm_files_list, hit_files_list]
59
+ ):
23
60
  if isinstance(file_list, str):
24
61
  files[file_type] = [file_list]
25
62
  else:
@@ -35,6 +72,48 @@ def get_file_list(path: str | None, threads: int | None = None) -> list[str]:
35
72
  return [f"{(Path(path).with_suffix(''))}_t{idx}.lh5" for idx in range(threads)]
36
73
 
37
74
 
75
+ def copy_units(tab: Table) -> dict:
76
+ """Extract a dictionary of attributes (i.e. units).
77
+
78
+ Parameters
79
+ ----------
80
+ tab
81
+ Table to get the units from.
82
+
83
+ Returns
84
+ -------
85
+ a dictionary with the units for each field
86
+ in the table.
87
+ """
88
+ units = {}
89
+
90
+ for field in tab:
91
+ if "units" in tab[field].attrs:
92
+ units[field] = tab[field].attrs["units"]
93
+
94
+ return units
95
+
96
+
97
+ def assign_units(tab: Table, units: Mapping) -> Table:
98
+ """Copy the attributes from the map of attributes to the table.
99
+
100
+ Parameters
101
+ ----------
102
+ tab
103
+ Table to add attributes to.
104
+ units
105
+ mapping (dictionary like) of units of each field
106
+
107
+ Returns
108
+ -------
109
+ an updated table with LGDO attributes.
110
+ """
111
+ for field in tab:
112
+ if field in units:
113
+ tab[field].attrs["units"] = units[field]
114
+ return tab
115
+
116
+
38
117
  def _search_string(string: str):
39
118
  """Capture the characters matching the pattern for a function call."""
40
119
  pattern = r"\b([a-zA-Z_][a-zA-Z0-9_\.]*)\s*\("
@@ -119,6 +198,33 @@ def get_function_string(expr: str, aliases: dict | None = None) -> tuple[str, di
119
198
  return expr, globs
120
199
 
121
200
 
201
+ def get_channels_from_groups(names: list | str | None, groupings: dict | None = None) -> list:
202
+ """Get a list of channels from a list of groups.
203
+
204
+ Parameters
205
+ ----------
206
+ names
207
+ list of channel names
208
+ groupings
209
+ dictionary of the groupings of channels
210
+
211
+ Returns
212
+ -------
213
+ list of channels
214
+ """
215
+ if names is None:
216
+ channels_e = []
217
+ elif isinstance(names, str):
218
+ channels_e = groupings[names]
219
+ elif isinstance(names, list):
220
+ channels_e = list(itertools.chain.from_iterable([groupings[e] for e in names]))
221
+ else:
222
+ msg = f"names {names} must be list or str or `None`"
223
+ raise ValueError(msg)
224
+
225
+ return channels_e
226
+
227
+
122
228
  def merge_dicts(dict_list: list) -> dict:
123
229
  """Merge a list of dictionaries, concatenating the items where they exist.
124
230
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: reboost
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: New LEGEND Monte-Carlo simulation post-processing
5
5
  Author-email: Manuel Huber <info@manuelhu.de>, Toby Dixon <toby.dixon.23@ucl.ac.uk>, Luigi Pertoldi <gipert@pm.me>
6
6
  Maintainer: The LEGEND Collaboration
@@ -1,20 +1,20 @@
1
1
  reboost/__init__.py,sha256=RVNl3Qgx_hTUeBGXaWYmiTcmXUDhTfvlAGGC8bo_jP8,316
2
- reboost/_version.py,sha256=UoNvMtd4wCG76RwoSpNCUtaFyTwakGcZolfjXzNVSMY,511
3
- reboost/build_evt.py,sha256=5Q3T0LCl8xMtyRRhcs6layC1xh4vp2f26PgB1yab2zs,4798
4
- reboost/build_glm.py,sha256=kxQN6MYl-vfKnOHufPqf5ifEoaToqaR30iMXISxIhYQ,9253
5
- reboost/build_hit.py,sha256=sJR2qXup-qP1IoDLRxquuTyeI7DP_1S80QZ-w94qaZY,13293
6
- reboost/build_tcm.py,sha256=N1rZwht88ZaKWmURch1VrVUbQROXfP56D0aj_JLsRhU,2951
2
+ reboost/_version.py,sha256=wD8hnA5gV5UmPkQnpT3xR6V2csgj9K5NEADogbLK79M,511
3
+ reboost/build_evt.py,sha256=zj3wG_kaV3EoRMQ33AkCNa_2Fv8cLtRuhyRyRmSrOYQ,4797
4
+ reboost/build_glm.py,sha256=LQkM6x6mMOE92-c78uoclOvP9zp3vdMuLQCSP2f2Zk4,9263
5
+ reboost/build_hit.py,sha256=KKfTJgoR5JnAMQVru58B76zPcqZxiexIUlWPUhb1zmU,14260
6
+ reboost/build_tcm.py,sha256=-PawBHoHj0zsm4XsZu5bco9d9a09STicZchduefSNfI,2951
7
7
  reboost/cli.py,sha256=HTZ05DRnDodcf_D6BJCCavx5HqhKDadJCgf-oh8HTJk,6365
8
- reboost/core.py,sha256=AamREubQsAqJ-y10NRx18r-PuqlQj3iTl2PzOTWXGQI,10540
9
- reboost/iterator.py,sha256=cqfh3c0uLP67S0YGaw05-McZQzdMb8BISULIm3PEbKA,3990
8
+ reboost/core.py,sha256=7Nclc6RUCOSJ1CWVAX0rFNJGM1LEgqvc4tD04CxEAtg,10766
9
+ reboost/iterator.py,sha256=72AyoRTgMpWghZt2UOqRj0RGiNzaiBAwgNIUZdduK2s,4698
10
10
  reboost/log_utils.py,sha256=VqS_9OC5NeNU3jcowVOBB0NJ6ssYvNWnirEY-JVduEA,766
11
11
  reboost/profile.py,sha256=EOTmjmS8Rm_nYgBWNh6Rntl2XDsxdyed7yEdWtsZEeg,2598
12
- reboost/utils.py,sha256=PMnHvSD5MpIzJyA3IQD_fLAK-O1RMY68DPGbQJp7Yww,4967
12
+ reboost/utils.py,sha256=T9GIknSKWsKAalbQT9Ny3u9UTYEvy8gghFhmoCs41Io,7751
13
13
  reboost/hpge/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
14
  reboost/hpge/psd.py,sha256=vFs8Y5XVW261pB6aOvWmIDzqOaBg-gEOLhL9PbjlEKI,2113
15
15
  reboost/hpge/surface.py,sha256=SZyTmOCTipf27jYaJhtdInzGF1RZ2wKpbtf6HlOQYwM,3662
16
16
  reboost/math/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
- reboost/math/functions.py,sha256=ZgQpm87pGE0wH4Ekjm-8SbEmzfZ5MlAxS-fTw0RsNMc,1875
17
+ reboost/math/functions.py,sha256=OymiYTcA0NXxxm-MBDw5kqyNwHoLCmuv4J48AwnSrbU,5633
18
18
  reboost/math/stats.py,sha256=iiOEi87x93kqPWeSmlRiA5Oe-R8XR-plm6Z532PhC9M,1401
19
19
  reboost/optmap/__init__.py,sha256=imvuyld-GLw8qdwqW-lXCg2feptcTyQo3wIzPvDHwmY,93
20
20
  reboost/optmap/cli.py,sha256=wBexh-zrr5ABherEyk9xigxdArvOAKiiRQwAYon9Sro,9408
@@ -25,12 +25,12 @@ reboost/optmap/mapview.py,sha256=73kpe0_SKDj9bIhEx1ybX1sBP8TyvufiLfps84A_ijA,679
25
25
  reboost/optmap/numba_pdg.py,sha256=y8cXR5PWE2Liprp4ou7vl9do76dl84vXU52ZJD9_I7A,731
26
26
  reboost/optmap/optmap.py,sha256=j4rfbQ84PYSpE-BvP4Rdt96ZjPdwy8P4e4eZz1mATys,12817
27
27
  reboost/shape/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
- reboost/shape/cluster.py,sha256=Cj4V1maPR-q_w6rKwF_hLW3Zmsv6zHva_I5oA2mm3PY,7442
29
- reboost/shape/group.py,sha256=bSmFCl_yi1hGaKudjiicDEJsiBNyAHiKYdr8ZuH4pSM,4406
28
+ reboost/shape/cluster.py,sha256=RIvBlhHzp88aaUZGofp5SD9bimnoiqIOddhQ84jiwoM,8135
29
+ reboost/shape/group.py,sha256=Q3DhEPxbhw3p4bwvpswSd0A-p224l5vRZnfQIEkOVJE,4475
30
30
  reboost/shape/reduction.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- reboost-0.2.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
32
- reboost-0.2.1.dist-info/METADATA,sha256=zSFjGcEzPVcoBtwwbVyRdbI95BooP7swkYvojFfqNjU,44219
33
- reboost-0.2.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
34
- reboost-0.2.1.dist-info/entry_points.txt,sha256=DxhD6BidSWNot9BrejHJjQ7RRLmrMaBIl52T75oWTwM,93
35
- reboost-0.2.1.dist-info/top_level.txt,sha256=q-IBsDepaY_AbzbRmQoW8EZrITXRVawVnNrB-_zyXZs,8
36
- reboost-0.2.1.dist-info/RECORD,,
31
+ reboost-0.2.3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
32
+ reboost-0.2.3.dist-info/METADATA,sha256=9cPQ0Bz2cGao4exSC1XwIqp61rfSHNIDLrIwD8SdbB8,44219
33
+ reboost-0.2.3.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
34
+ reboost-0.2.3.dist-info/entry_points.txt,sha256=DxhD6BidSWNot9BrejHJjQ7RRLmrMaBIl52T75oWTwM,93
35
+ reboost-0.2.3.dist-info/top_level.txt,sha256=q-IBsDepaY_AbzbRmQoW8EZrITXRVawVnNrB-_zyXZs,8
36
+ reboost-0.2.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5