reboost 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reboost/core.py ADDED
@@ -0,0 +1,526 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import logging
5
+ import time
6
+ from typing import Any
7
+
8
+ import awkward as ak
9
+ import numpy as np
10
+ from dbetto import AttrsDict
11
+ from lgdo import lh5
12
+ from lgdo.types import LGDO, Table
13
+
14
+ from . import utils
15
+ from .profile import ProfileDict
16
+
17
+ log = logging.getLogger(__name__)
18
+
19
+
20
+ def read_data_at_channel_as_ak(
21
+ channels: ak.Array, rows: ak.Array, file: str, field: str, group: str, tab_map: dict[int, str]
22
+ ) -> ak.Array:
23
+ r"""Read the data from a particular field to an awkward array. This replaces the TCM like object defined by the channels and rows with the corresponding data field.
24
+
25
+ Parameters
26
+ ----------
27
+ channels
28
+ Array of the channel indices (uids).
29
+ rows
30
+ Array of the rows in the files to gather data from.
31
+ file
32
+ File to read the data from.
33
+ field
34
+ the field to read.
35
+ group
36
+ the group to read data from (eg. `hit` or `stp`.)
37
+ tab_map
38
+ mapping between indices and table names. Of the form:
39
+
40
+ .. code:: python
41
+
42
+ {NAME: UID}
43
+
44
+ For example:
45
+
46
+ .. code:: python
47
+
48
+ {"det001": 1, "det002": 2}
49
+
50
+ Returns
51
+ -------
52
+ an array with the data, of the same same as the channels and rows.
53
+ """
54
+ # initialise the output
55
+ data_flat = None
56
+ tcm_rows_full = None
57
+
58
+ # save the unflattening
59
+ reorder = ak.num(rows)
60
+
61
+ for tab_name, key in tab_map.items():
62
+ # get the rows to read
63
+
64
+ idx = ak.flatten(rows[channels == key]).to_numpy()
65
+ arg_idx = np.argsort(idx)
66
+
67
+ # get the rows in the flattened data we want to append to
68
+ tcm_rows = np.where(ak.flatten(channels == key))[0]
69
+
70
+ # read the data with sorted idx
71
+ data_ch = lh5.read(f"{group}/{tab_name}/{field}", file, idx=idx[arg_idx]).view_as("ak")
72
+
73
+ # sort back to order for tcm
74
+ data_ch = data_ch[np.argsort(arg_idx)]
75
+
76
+ # append to output
77
+ data_flat = ak.concatenate((data_flat, data_ch)) if data_flat is not None else data_ch
78
+ tcm_rows_full = (
79
+ np.concatenate((tcm_rows_full, tcm_rows)) if tcm_rows_full is not None else tcm_rows
80
+ )
81
+
82
+ if len(data_flat) != len(tcm_rows_full):
83
+ msg = "every index in the tcm should have been read"
84
+ raise ValueError(msg)
85
+
86
+ # sort the final data
87
+ data_flat = data_flat[np.argsort(tcm_rows_full)]
88
+
89
+ return ak.unflatten(data_flat, reorder)
90
+
91
+
92
+ def evaluate_output_column(
93
+ hit_table: Table,
94
+ expression: str,
95
+ local_dict: dict,
96
+ *,
97
+ table_name: str = "HITS",
98
+ time_dict: ProfileDict | None = None,
99
+ name: str = " ",
100
+ ) -> LGDO:
101
+ """Evaluate an expression returning an LGDO.
102
+
103
+ Uses :func:`lgdo.Table.eval()` to compute a new column for the
104
+ hit table. The expression can depend on any field in the Table
105
+ (prefixed with table_name.) or objects contained in the local dict.
106
+ In addition, the expression can use packages which are then imported.
107
+
108
+ Parameters
109
+ ----------
110
+ hit_table
111
+ the table containing the hit fields.
112
+ expression
113
+ the expression to evaluate.
114
+ local_dict
115
+ local dictionary to pass to :func:`lgdo.Table.eval()`.
116
+ table_name
117
+ keyword used to refer to the fields in the table.
118
+ time_dict
119
+ time profiling data structure.
120
+ name
121
+ name to use in `time_dict`.
122
+
123
+ Returns
124
+ -------
125
+ an LGDO with the new field.
126
+ """
127
+ if time_dict is not None:
128
+ time_start = time.time()
129
+
130
+ if local_dict is None:
131
+ local_dict = {}
132
+
133
+ expr = expression.replace(f"{table_name}.", "")
134
+
135
+ # get func call and modules to import
136
+
137
+ func_call, globals_dict = utils.get_function_string(expr)
138
+
139
+ msg = f"evaluating table with command {expr} and local_dict {local_dict.keys()}"
140
+ log.debug(msg)
141
+
142
+ # remove np and ak
143
+ globals_dict.pop("np", None)
144
+ globals_dict.pop("ak", None)
145
+
146
+ if globals_dict == {}:
147
+ globals_dict = None
148
+
149
+ ctx = contextlib.nullcontext()
150
+ if globals_dict is not None and "pyg4ometry" in globals_dict:
151
+ ctx = utils.filter_logging(logging.CRITICAL)
152
+
153
+ with ctx:
154
+ res = hit_table.eval(
155
+ func_call, local_dict, modules=globals_dict, library="ak", with_units=True
156
+ )
157
+
158
+ # how long did it take
159
+ if time_dict is not None:
160
+ time_dict.update_field(name=f"expressions/{name}", time_start=time_start)
161
+
162
+ return res
163
+
164
+
165
+ def evaluate_object(
166
+ expression: str,
167
+ local_dict: dict,
168
+ ) -> Any:
169
+ """Evaluate an expression returning any object.
170
+
171
+ The expression should be a function call. It can depend on any objects contained in the local dict.
172
+ In addition, the expression can use packages which are then imported.
173
+
174
+ Parameters
175
+ ----------
176
+ expression
177
+ the expression to evaluate.
178
+ local_dict
179
+ local dictionary to pass to `eval()`.
180
+
181
+ Returns
182
+ -------
183
+ the evaluated object.
184
+ """
185
+ msg = f"Evaluating object with expression {expression} and {local_dict}"
186
+ log.debug(msg)
187
+
188
+ func_call, globals_dict = utils.get_function_string(expression)
189
+
190
+ if "pyg4ometry" in globals_dict:
191
+ with utils.filter_logging(logging.CRITICAL):
192
+ return eval(func_call, local_dict, globals_dict)
193
+ else:
194
+ return eval(func_call, local_dict, globals_dict)
195
+
196
+
197
+ def get_global_objects(
198
+ expressions: dict[str, str], *, local_dict: dict, time_dict: dict | None = None
199
+ ) -> AttrsDict:
200
+ """Extract global objects used in the processing.
201
+
202
+ Parameters
203
+ ----------
204
+ expressions
205
+ a dictionary containing the expressions to evaluate for each object.
206
+ local_dict
207
+ other objects used in the evaluation of the expressions, passed to
208
+ `eval()` as the locals keyword.
209
+ time_dict
210
+ time profiling data structure.
211
+
212
+ Returns
213
+ -------
214
+ dictionary of objects with the same keys as the expressions.
215
+ """
216
+ if time_dict is not None:
217
+ time_start = time.time()
218
+
219
+ msg = f"Getting global objects with {expressions.keys()} and {local_dict}"
220
+ log.debug(msg)
221
+ res = {}
222
+
223
+ for obj_name, expression in expressions.items():
224
+ res[obj_name] = evaluate_object(
225
+ expression, local_dict=local_dict | {"OBJECTS": AttrsDict(res)}
226
+ )
227
+
228
+ if time_dict is not None:
229
+ time_dict.update_field(name="global_objects", time_start=time_start)
230
+
231
+ return AttrsDict(res)
232
+
233
+
234
+ def get_detector_mapping(
235
+ detector_mapping: dict, global_objects: AttrsDict, args: AttrsDict
236
+ ) -> dict:
237
+ """Get all the detector mapping using :func:`get_one_detector_mapping`.
238
+
239
+ Parameters
240
+ ----------
241
+ detector_mapping
242
+ dictionary of detector mapping
243
+ global_objects
244
+ dictionary of global objects to use in evaluating the mapping.
245
+ args
246
+ any arguments the expression can depend on, is passed as `locals` to `eval()`.
247
+ """
248
+ return utils.merge_dicts(
249
+ [
250
+ get_one_detector_mapping(
251
+ mapping["output"],
252
+ input_detector_name=mapping.get("input", None),
253
+ objects=global_objects,
254
+ args=args,
255
+ )
256
+ for mapping in detector_mapping
257
+ ]
258
+ )
259
+
260
+
261
+ def get_one_detector_mapping(
262
+ output_detector_expression: str | list,
263
+ objects: AttrsDict | None = None,
264
+ input_detector_name: str | None = None,
265
+ args: AttrsDict | None = None,
266
+ ) -> dict:
267
+ """Extract the output detectors and the list of input to outputs by parsing the expressions.
268
+
269
+ The output_detector_expression can be a name or a string evaluating to a list of names.
270
+ This expression can depend on any objects in the objects dictionary, referred to by the keyword
271
+ "OBJECTS".
272
+
273
+ The function produces a dictionary mapping input detectors to output detectors with the following
274
+ format:
275
+
276
+ .. code-block:: python
277
+
278
+ {
279
+ "input1": ["output1", "output2"],
280
+ "input2": ["ouput3", ...],
281
+ }
282
+
283
+ If only output_detector_expression is supplied the mapping is one-to-one (i.e. every
284
+ input detector maps to the same output detector). If instead a name for the input_detector_name
285
+ is also supplied this will be the only key with all output detectors being mapped to this.
286
+
287
+ Parameters
288
+ ----------
289
+ output_detector_expression
290
+ An output detector name or a string evaluating to a list of output tables.
291
+ objects
292
+ dictionary of objects that can be referenced in the expression.
293
+ input_detector_name
294
+ Optional input detector name for all the outputs.
295
+ args
296
+ any arguments the expression can depend on, is passed as `locals` to `eval()`.
297
+
298
+ Returns
299
+ -------
300
+ a dictionary with the input detectors as key and a list of output detectors for each.
301
+
302
+ Examples
303
+ --------
304
+ For a direct one-to-one mapping:
305
+
306
+ >>> get_detectors_mapping("[str(i) for i in range(2)]")
307
+ {'0':['0'],'1':['1'],'2':['2']}
308
+
309
+ With an input detector name:
310
+
311
+ >>> get_detectors_mapping("[str(i) for i in range(2)])",input_detector_name = "dets")
312
+ {'dets':['0','1','2']}
313
+
314
+ With objects:
315
+
316
+ >>> objs = AttrsDict({"format": "ch"})
317
+ >>> get_detectors_mapping("[f'{OBJECTS.format}{i}' for i in range(2)])",
318
+ input_detector_name = "dets",objects=objs)
319
+ {'dets': ['ch0', 'ch1', 'ch2']}
320
+ """
321
+ out_names = []
322
+ if isinstance(output_detector_expression, str):
323
+ out_list = [output_detector_expression]
324
+ else:
325
+ out_list = list(output_detector_expression)
326
+
327
+ for expression_tmp in out_list:
328
+ # if no package was imported its just a name
329
+ try:
330
+ objs = evaluate_object(expression_tmp, local_dict={"ARGS": args, "OBJECTS": objects})
331
+ out_names.extend(objs)
332
+ except Exception:
333
+ out_names.append(expression_tmp)
334
+
335
+ # simple one to one mapping
336
+ if input_detector_name is None:
337
+ return {name: [name] for name in out_names}
338
+ return {input_detector_name: out_names}
339
+
340
+
341
+ def get_detector_objects(
342
+ output_detectors: list,
343
+ expressions: dict,
344
+ args: AttrsDict,
345
+ global_objects: AttrsDict,
346
+ time_dict: ProfileDict | None = None,
347
+ ) -> AttrsDict:
348
+ """Get the detector objects for each detector.
349
+
350
+ This computes a set of objects per output detector. These should be the
351
+ expressions (defined in the `expressions` input). They can depend
352
+ on the keywords:
353
+
354
+ - `ARGS` : in which case values of from the args parameter AttrsDict can be references,
355
+ - `DETECTOR`: referring to the detector name (key of the detector mapping)
356
+ - `OBJECTS` : The global objects.
357
+
358
+ For example expressions like:
359
+
360
+ .. code-block:: python
361
+
362
+ compute_object(arg=ARGS.first_arg, detector=DETECTOR, obj=OBJECTS.meta)
363
+
364
+ are supported.
365
+
366
+ Parameters
367
+ ----------
368
+ output_detectors
369
+ list of output detectors,
370
+ expressions
371
+ dictionary of expressions to evaluate.
372
+ args
373
+ any arguments the expression can depend on, is passed as `locals` to `eval()`.
374
+ global_objects
375
+ a dictionary of objects the expression can depend on.
376
+ time_dict
377
+ time profiling data structure.
378
+
379
+ Returns
380
+ -------
381
+ An AttrsDict of the objects for each detector.
382
+ """
383
+ if time_dict is not None:
384
+ time_start = time.time()
385
+
386
+ det_objects_dict = {}
387
+ for output_detector in output_detectors:
388
+ obj_dict = {}
389
+ for obj_name, obj_expression in expressions.items():
390
+ obj_dict[obj_name] = evaluate_object(
391
+ obj_expression,
392
+ local_dict={
393
+ "ARGS": args,
394
+ "DETECTOR": output_detector,
395
+ "OBJECTS": global_objects,
396
+ "DETECTOR_OBJECTS": AttrsDict(obj_dict),
397
+ },
398
+ )
399
+
400
+ det_objects_dict[output_detector] = AttrsDict(obj_dict)
401
+ res = AttrsDict(det_objects_dict)
402
+
403
+ if time_dict is not None:
404
+ time_dict.update_field(name="detector_objects", time_start=time_start)
405
+
406
+ return res
407
+
408
+
409
+ def evaluate_hit_table_layout(
410
+ steps: ak.Array | Table, expression: str, time_dict: dict | None = None
411
+ ) -> Table:
412
+ """Evaluate the hit_table_layout expression, producing the hit table.
413
+
414
+ This expression should be a function call which performs a restructuring of the steps,
415
+ i.e. it sets the number of rows. The steps array should be referred to
416
+ by "STEPS" in the expression.
417
+
418
+ Parameters
419
+ ----------
420
+ steps
421
+ awkward array or Table of the steps.
422
+ expression
423
+ the expression to evaluate to produce the hit table.
424
+ time_dict
425
+ time profiling data structure.
426
+
427
+ Returns
428
+ -------
429
+ :class:`lgdo.Table` of the hits.
430
+ """
431
+ if time_dict is not None:
432
+ time_start = time.time()
433
+
434
+ group_func, globs = utils.get_function_string(
435
+ expression,
436
+ )
437
+ locs = {"STEPS": steps}
438
+
439
+ msg = f"running step grouping with {group_func} and globals {globs.keys()} and locals {locs.keys()}"
440
+ log.debug(msg)
441
+
442
+ res = eval(group_func, globs, locs)
443
+
444
+ if time_dict is not None:
445
+ time_dict.update_field(name="hit_layout", time_start=time_start)
446
+
447
+ return res
448
+
449
+
450
+ def add_field_with_nesting(tab: Table, col: str, field: LGDO) -> Table:
451
+ """Add a field handling the nesting."""
452
+ subfields = col.strip("/").split("___")
453
+ tab_next = tab
454
+
455
+ for level in subfields:
456
+ # if we are at the end, just add the field
457
+ if level == subfields[-1]:
458
+ tab_next.add_field(level, field)
459
+ break
460
+
461
+ if not level:
462
+ msg = f"invalid field name '{field}'"
463
+ raise RuntimeError(msg)
464
+
465
+ # otherwise, increase nesting
466
+ if level not in tab:
467
+ tab_next.add_field(level, Table(size=len(tab)))
468
+ tab_next = tab[level]
469
+ else:
470
+ tab_next = tab[level]
471
+
472
+ return tab
473
+
474
+
475
+ def _get_table_keys(tab: Table):
476
+ """Get keys in a table."""
477
+ existing_cols = list(tab.keys())
478
+ output_cols = []
479
+ for col in existing_cols:
480
+ if isinstance(tab[col], Table):
481
+ output_cols.extend(
482
+ [f"{col}___{col_second}" for col_second in _get_table_keys(tab[col])]
483
+ )
484
+ else:
485
+ output_cols.append(col)
486
+
487
+ return output_cols
488
+
489
+
490
+ def _remove_col(field: str, tab: Table):
491
+ """Remove column accounting for nesting."""
492
+ if "___" in field:
493
+ base_name, sub_field = field.split("___", 1)[0], field.split("___", 1)[1]
494
+ _remove_col(sub_field, tab[base_name])
495
+ else:
496
+ tab.remove_column(field, delete=True)
497
+
498
+
499
+ def remove_columns(tab: Table, outputs: list) -> Table:
500
+ """Remove columns from the table not found in the outputs.
501
+
502
+ Parameters
503
+ ----------
504
+ tab
505
+ the table to remove columns from.
506
+ outputs
507
+ a list of output fields.
508
+
509
+ Returns
510
+ -------
511
+ the table with columns removed.
512
+ """
513
+ cols = _get_table_keys(tab)
514
+ for col_unrename in cols:
515
+ if col_unrename not in outputs:
516
+ _remove_col(col_unrename, tab)
517
+ return tab
518
+
519
+
520
+ def merge(hit_table: Table, output_table: ak.Array | None):
521
+ """Merge the table with the array."""
522
+ return (
523
+ hit_table.view_as("ak")
524
+ if output_table is None
525
+ else ak.concatenate((output_table, hit_table.view_as("ak")))
526
+ )
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+ from .core import run_daq_non_sparse
4
+
5
+ __all__ = ["run_daq_non_sparse"]