reboost 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reboost/__init__.py ADDED
@@ -0,0 +1,14 @@
1
+ from __future__ import annotations
2
+
3
+ import hdf5plugin
4
+ from lgdo import lh5
5
+
6
+ from ._version import version as __version__
7
+ from .build_hit import build_hit
8
+
9
+ __all__ = [
10
+ "__version__",
11
+ "build_hit",
12
+ ]
13
+
14
+ lh5.settings.DEFAULT_HDF5_SETTINGS = {"compression": hdf5plugin.Zstd()}
reboost/_version.py ADDED
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.8.3'
32
+ __version_tuple__ = version_tuple = (0, 8, 3)
33
+
34
+ __commit_id__ = commit_id = None
reboost/build_evt.py ADDED
@@ -0,0 +1,134 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ import awkward as ak
6
+ import numpy as np
7
+ from dbetto import AttrsDict
8
+ from lgdo import Array, Table, VectorOfVectors, lh5
9
+
10
+ from . import core, math, shape, utils
11
+ from .shape import group
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+
16
+ def build_evt(
17
+ tcm: VectorOfVectors,
18
+ hitfile: str,
19
+ outfile: str | None,
20
+ channel_groups: AttrsDict,
21
+ pars: AttrsDict,
22
+ run_part: AttrsDict,
23
+ ) -> Table | None:
24
+ """Build events out of a TCM.
25
+
26
+ Parameters
27
+ ----------
28
+ tcm
29
+ the time coincidence map.
30
+ hitfile
31
+ file with the hits.
32
+ outfile
33
+ the path to the output-file, if `None` with return
34
+ the events in memory.
35
+ channel_groups
36
+ a dictionary of groups of channels. For example:
37
+
38
+ .. code-block:: python
39
+
40
+ {"det1": "on", "det2": "off", "det3": "ac"}
41
+
42
+ pars
43
+ A dictionary of parameters. The first key should
44
+ be the run ID, followed by different sets of parameters
45
+ arranged in groups. Run numbers should be given in the
46
+ format `"p00-r001"`, etc.
47
+
48
+ For example:
49
+
50
+ .. code-block:: python
51
+
52
+ {"p03-r000": {"reso": {"det1": [1, 2], "det2": [0, 1]}}}
53
+
54
+ run_part
55
+ The run partitioning file giving the number of events
56
+ for each run. This should be organized as a dictionary
57
+ with the following format:
58
+
59
+ .. code-block:: python
60
+
61
+ {"p03-r000": 1000, "p03-r001": 2000}
62
+
63
+ Returns
64
+ -------
65
+ the event file in memory as a table if no output file is specified.
66
+ """
67
+ tcm_tables = utils.get_table_names(tcm)
68
+ tcm_ak = tcm.view_as("ak")
69
+
70
+ # loop over the runs
71
+ cum_sum = 0
72
+ tab = None
73
+
74
+ for idx, (run_full, n_event) in enumerate(run_part.items()):
75
+ period, run = run_full.split("-")
76
+ pars_tmp = pars[run_full]
77
+
78
+ # create an output table
79
+ out_tab = Table(size=n_event)
80
+
81
+ tcm_tmp = tcm_ak[cum_sum : cum_sum + n_event]
82
+
83
+ # usabilities
84
+
85
+ is_off = shape.group.get_isin_group(
86
+ tcm_tmp.table_key, channel_groups, tcm_tables, group="off"
87
+ )
88
+
89
+ # filter out off channels
90
+ channels = tcm_tmp.table_key[~is_off]
91
+ rows = tcm_tmp.row_in_table[~is_off]
92
+ out_tab.add_field("channel", VectorOfVectors(channels))
93
+ out_tab.add_field("row_in_table", VectorOfVectors(rows))
94
+
95
+ out_tab.add_field("period", Array(np.ones(len(channels)) * int(period[1:])))
96
+ out_tab.add_field("run", Array(np.ones(len(channels)) * int(run[1:])))
97
+
98
+ # now check for channels in ac
99
+ is_good = group.get_isin_group(channels, channel_groups, tcm_tables, group="on")
100
+
101
+ # get energy
102
+ energy_true = core.read_data_at_channel_as_ak(
103
+ channels, rows, hitfile, "energy", "hit", tcm_tables
104
+ )
105
+
106
+ energy = math.stats.apply_energy_resolution(
107
+ energy_true,
108
+ channels,
109
+ tcm_tables,
110
+ pars_tmp.reso,
111
+ lambda energy, sig0, sig1: np.sqrt(energy * sig1**2 + sig0**2),
112
+ )
113
+
114
+ out_tab.add_field("is_good", VectorOfVectors(is_good[energy > 25]))
115
+
116
+ out_tab.add_field("energy", VectorOfVectors(energy[energy > 25]))
117
+ out_tab.add_field("multiplicity", Array(ak.sum(energy > 25, axis=-1).to_numpy()))
118
+
119
+ # write table
120
+ wo_mode = "of" if idx == 0 else "append"
121
+
122
+ # add attrs
123
+ out_tab.attrs["tables"] = tcm.attrs["tables"]
124
+
125
+ if outfile is not None:
126
+ lh5.write(out_tab, "evt", outfile, wo_mode=wo_mode)
127
+ else:
128
+ tab = (
129
+ ak.concatenate((tab, out_tab.view_as("ak")))
130
+ if tab is not None
131
+ else out_tab.view_as("ak")
132
+ )
133
+
134
+ return Table(tab)
reboost/build_glm.py ADDED
@@ -0,0 +1,305 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+ import logging
5
+
6
+ import awkward as ak
7
+ import numpy as np
8
+ from lgdo import Array, Table, lh5
9
+ from lgdo.lh5 import LH5Iterator, LH5Store
10
+ from numpy.typing import ArrayLike
11
+
12
+ from . import utils
13
+
14
+ log = logging.getLogger(__name__)
15
+
16
+
17
+ def get_glm_rows(stp_evtids: ArrayLike, vert: ArrayLike, *, start_row: int = 0) -> ak.Array:
18
+ """Get the rows of the Geant4 event lookup map (glm).
19
+
20
+ Parameters
21
+ ----------
22
+ stp_evtids
23
+ Array of evtids for the steps
24
+ vert
25
+ Array of simulated evtid for the vertices.
26
+ start_row
27
+ The index of the first element of stp_evtids.
28
+
29
+ Returns
30
+ -------
31
+ an awkward array of the `glm`.
32
+ """
33
+ # convert inputs
34
+ if stp_evtids is None:
35
+ output = ak.Array({"evtid": vert})
36
+ output["n_rows"] = np.array([0] * len(vert), dtype=float)
37
+ output["start_row"] = np.array([np.nan] * len(vert), dtype=float)
38
+
39
+ return output
40
+
41
+ if not isinstance(stp_evtids, np.ndarray):
42
+ stp_evtids = (
43
+ stp_evtids.to_numpy() if isinstance(stp_evtids, ak.Array) else np.array(stp_evtids)
44
+ )
45
+
46
+ if not isinstance(vert, np.ndarray):
47
+ vert = vert.to_numpy() if isinstance(vert, ak.Array) else np.array(vert)
48
+
49
+ # check that the steps and vertices are sorted or the algorithm will fail
50
+
51
+ if not np.all(vert[:-1] <= vert[1:]):
52
+ msg = "The vertices must be sorted"
53
+ raise ValueError(msg)
54
+
55
+ if len(stp_evtids) > 0 and not np.all(stp_evtids[:-1] <= stp_evtids[1:]):
56
+ msg = "The steps must be sorted"
57
+ raise ValueError(msg)
58
+
59
+ # convert to numpy
60
+ stp_indices = np.arange(len(stp_evtids)) + start_row
61
+
62
+ # cut the arrays
63
+ sel = (stp_evtids >= vert[0]) & (stp_evtids <= vert[-1])
64
+ stp_evtids = stp_evtids[sel]
65
+ stp_indices = stp_indices[sel]
66
+
67
+ # build output
68
+ output = ak.Array({"evtid": vert})
69
+
70
+ # restructure to jagged array
71
+ counts = ak.run_lengths(stp_evtids)
72
+ steps = ak.unflatten(stp_evtids, counts)
73
+ indices = ak.unflatten(stp_indices, counts)
74
+
75
+ ak_tmp = ak.Array({"evtid": ak.fill_none(ak.firsts(steps), np.nan), "indices": indices})
76
+
77
+ # find indices to insert the new entries
78
+ positions = np.searchsorted(output.evtid, ak_tmp.evtid)
79
+
80
+ # add a check that every ak_tmp evtid is in output.evtid ?
81
+ if not np.all(np.isin(ak_tmp.evtid, output.evtid)):
82
+ bad_evtid = ak_tmp.evtid[~np.isin(ak_tmp.evtid, output.evtid)]
83
+ msg = f"Error not every evtid in the stp table is present in the vertex table {bad_evtid} are not"
84
+ raise ValueError(msg)
85
+
86
+ # get the start row
87
+ start_row = np.array([np.nan] * len(vert), dtype=float)
88
+ start_row[positions] = ak.fill_none(ak.firsts(ak_tmp.indices), np.nan)
89
+
90
+ n_row = np.array([0] * len(vert), dtype=float)
91
+ n_row[positions] = ak.num(ak_tmp.indices)
92
+
93
+ # add to the output
94
+ output["n_rows"] = n_row
95
+ output["start_row"] = start_row
96
+
97
+ return output
98
+
99
+
100
+ def get_stp_evtids(
101
+ lh5_table: str,
102
+ stp_file: str,
103
+ id_name: str,
104
+ start_row: int,
105
+ last_vertex_evtid: int,
106
+ stp_buffer: int,
107
+ ) -> tuple[int, int, ak.Array]:
108
+ """Extracts the rows of a stp file corresponding to a particular range of `evtid`.
109
+
110
+ The reading starts at `start_row` to allow for iterating through
111
+ the file. The iteration stops when the `evtid` being read is larger than
112
+ `last_vertex_evtid`.
113
+
114
+ Parameters
115
+ ----------
116
+ lh5_table
117
+ the table name to read.
118
+ stp_file
119
+ the file name path.
120
+ id_name
121
+ the name of the `evtid` field.
122
+ start_row
123
+ the row to begin reading.
124
+ last_vertex_evtid
125
+ the last evtid to read up to.
126
+ stp_buffer
127
+ the number of rows to read at once.
128
+
129
+ Returns
130
+ -------
131
+ a tuple of the updated `start_row`, the first row for the chunk and an awkward Array of the steps.
132
+ """
133
+ # make a LH5Store
134
+ store = LH5Store()
135
+
136
+ # some outputs
137
+ evtids_proc = None
138
+ last_evtid = 0
139
+ chunk_start = 0
140
+
141
+ # get the total number of rows
142
+ n_rows_tot = store.read_n_rows(f"{lh5_table}/{id_name}", stp_file)
143
+
144
+ # iterate over the file
145
+ # stop when the entire file is read
146
+
147
+ while start_row < n_rows_tot:
148
+ # read the file
149
+ lh5_obj = store.read(
150
+ f"{lh5_table}/{id_name}",
151
+ stp_file,
152
+ start_row=start_row,
153
+ n_rows=stp_buffer,
154
+ )
155
+ evtids = lh5_obj.view_as("ak")
156
+ n_read = len(evtids)
157
+
158
+ # pick the first evtid
159
+ if evtids.ndim > 1:
160
+ evtids = ak.fill_none(ak.firsts(evtids, axis=-1), -1)
161
+
162
+ # if the evtids_proc is not set then this is the first valid chunk
163
+ if evtids_proc is None:
164
+ evtids_proc = evtids
165
+ chunk_start = start_row
166
+ elif evtids[0] <= last_vertex_evtid:
167
+ evtids_proc = ak.concatenate((evtids_proc, evtids))
168
+
169
+ # get the last evtid
170
+ last_evtid = evtids[-1]
171
+
172
+ # if the last evtid is greater than the last vertex we can stop reading
173
+ if last_evtid > last_vertex_evtid or (start_row + n_read >= n_rows_tot):
174
+ break
175
+
176
+ # increase rhe start row for the next read
177
+
178
+ if start_row + n_read <= n_rows_tot:
179
+ start_row += n_read
180
+
181
+ return start_row, chunk_start, evtids_proc
182
+
183
+
184
+ def build_glm(
185
+ stp_files: str | list[str],
186
+ glm_files: str | list[str] | None,
187
+ lh5_groups: list | None = None,
188
+ *,
189
+ out_table_name: str = "glm",
190
+ id_name: str = "g4_evtid",
191
+ evtid_buffer: int = int(1e7),
192
+ stp_buffer: int = int(1e7),
193
+ ) -> ak.Array | None:
194
+ """Builds a g4_evtid look up (glm) from the stp data.
195
+
196
+ This object is used by `reboost` to efficiency iterate through the data.
197
+ It consists of a :class:`lgdo.VectorOfVectors` for each lh5_table in the input files.
198
+ The rows of this :class:`lgdo.VectorOfVectors` correspond to the `id_name` while the data
199
+ are the `stp` indices for this event.
200
+
201
+ Parameters
202
+ ----------
203
+ stp_files
204
+ path to the stp (input) file.
205
+ glm_files
206
+ path to the glm data, can also be `None` in which case an `ak.Array` is returned in memory.
207
+ out_table_name
208
+ name for the output table.
209
+ id_name
210
+ name of the evtid file, default `g4_evtid`.
211
+ stp_buffer
212
+ the number of rows of the step file to read at a time
213
+ evtid_buffer
214
+ the number of evtids to read at a time
215
+
216
+ Returns
217
+ -------
218
+ either `None` or an `ak.Array`
219
+ """
220
+ store = LH5Store()
221
+ files = utils.get_file_dict(stp_files=stp_files, glm_files=glm_files)
222
+
223
+ # loop over files
224
+ glm_sum = {}
225
+
226
+ for file_idx, stp_file in enumerate(files.stp):
227
+ msg = f"start generating glm for {stp_file} "
228
+ log.debug(msg)
229
+
230
+ # loop over the lh5_tables
231
+ lh5_table_list = [
232
+ det
233
+ for det in lh5.ls(stp_file, "stp/")
234
+ if lh5_groups is None or det.split("/")[1] in lh5_groups
235
+ ]
236
+
237
+ # get rows in the table
238
+ if files.glm[file_idx] is None:
239
+ for lh5_table in lh5_table_list:
240
+ if lh5_table.replace("stp/", "") not in glm_sum:
241
+ glm_sum[lh5_table.replace("stp/", "")] = None
242
+ else:
243
+ glm_sum = None
244
+
245
+ # start row for each table
246
+ start_row = dict.fromkeys(lh5_table_list, 0)
247
+
248
+ vfield = f"vtx/{id_name}"
249
+
250
+ # iterate over the vertex table
251
+ for vidx, vert_obj in enumerate(LH5Iterator(stp_file, vfield, buffer_len=evtid_buffer)):
252
+ # range of vertices
253
+ vert_ak = vert_obj.view_as("ak")
254
+
255
+ msg = f"... read chunk {vidx}"
256
+ log.debug(msg)
257
+
258
+ for idx, lh5_table in enumerate(lh5_table_list):
259
+ # create the output table
260
+ out_tab = Table(size=len(vert_ak))
261
+
262
+ # read the stp rows starting from `start_row` until the
263
+ # evtid is larger than that in the vertices
264
+
265
+ start_row_tmp, chunk_row, evtids = get_stp_evtids(
266
+ lh5_table,
267
+ stp_file,
268
+ id_name,
269
+ start_row[lh5_table],
270
+ last_vertex_evtid=vert_ak[-1],
271
+ stp_buffer=stp_buffer,
272
+ )
273
+
274
+ # set the start row for the next chunk
275
+ start_row[lh5_table] = start_row_tmp
276
+
277
+ # now get the glm rows
278
+ glm = get_glm_rows(evtids, vert_ak, start_row=chunk_row)
279
+
280
+ for field in ["evtid", "n_rows", "start_row"]:
281
+ out_tab.add_field(field, Array(glm[field].to_numpy()))
282
+
283
+ # write the output file
284
+ mode = "overwrite_file" if (vidx == 0 and idx == 0) else "append"
285
+
286
+ lh5_subgroup = lh5_table.replace("stp/", "")
287
+
288
+ if files.glm[file_idx] is not None:
289
+ store.write(
290
+ out_tab,
291
+ f"{out_table_name}/{lh5_subgroup}",
292
+ files.glm[file_idx],
293
+ wo_mode=mode,
294
+ )
295
+ else:
296
+ glm_sum[lh5_subgroup] = (
297
+ copy.deepcopy(glm)
298
+ if glm_sum[lh5_subgroup] is None
299
+ else ak.concatenate((glm_sum[lh5_subgroup], glm))
300
+ )
301
+
302
+ # return if it was requested to keep glm in memory
303
+ if glm_sum is not None:
304
+ return ak.Array(glm_sum)
305
+ return None