reboost 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,260 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ import awkward as ak
6
+ import numba
7
+ import numpy as np
8
+ from lgdo import VectorOfVectors
9
+
10
+ log = logging.getLogger(__name__)
11
+
12
+
13
+ def apply_cluster(
14
+ cluster_run_lengths: VectorOfVectors | ak.Array, field: ak.Array | VectorOfVectors
15
+ ) -> VectorOfVectors:
16
+ """Apply clustering to a field.
17
+
18
+ Parameters
19
+ ----------
20
+ cluster_ids
21
+ run lengths of each cluster
22
+ field
23
+ the field to cluster
24
+ """
25
+ if isinstance(cluster_run_lengths, VectorOfVectors):
26
+ cluster_run_lengths = cluster_run_lengths.view_as("ak")
27
+
28
+ if isinstance(field, VectorOfVectors):
29
+ field = field.view_as("ak")
30
+
31
+ n_cluster = ak.num(cluster_run_lengths, axis=-1)
32
+ clusters = ak.unflatten(ak.flatten(field), ak.flatten(cluster_run_lengths))
33
+
34
+ # reshape into cluster oriented
35
+ return VectorOfVectors(ak.unflatten(clusters, n_cluster))
36
+
37
+
38
+ def cluster_by_step_length(
39
+ trackid: ak.Array | VectorOfVectors,
40
+ pos_x: ak.Array | VectorOfVectors,
41
+ pos_y: ak.Array | VectorOfVectors,
42
+ pos_z: ak.Array | VectorOfVectors,
43
+ dist: ak.Array | VectorOfVectors | None = None,
44
+ surf_cut: float | None = None,
45
+ threshold: float = 0.1,
46
+ threshold_surf: float | None = None,
47
+ ) -> VectorOfVectors:
48
+ """Perform clustering based on the step length.
49
+
50
+ Steps are clustered based on distance, if either:
51
+ - a step is in a new track,
52
+ - a step moves from surface to bulk region (or visa versa),
53
+ - the distance between the current step and the first step of the current cluster is above a threshold.
54
+
55
+ Then a new cluster is started. The surface region is defined as the volume
56
+ less than surf_cut distance to the surface. This allows for a fine tuning of the
57
+ parameters to be different for bulk and surface.
58
+
59
+ Parameters
60
+ ----------
61
+ trackid
62
+ index of the track.
63
+ pos_x
64
+ x position of the step.
65
+ pos_y
66
+ y position of the step.
67
+ pos_z
68
+ z position of the step.
69
+ dist
70
+ distance to the detector surface. Can be `None` in which case all steps are treated as being in the "bulk".
71
+ surf_cut
72
+ Size of the surface region (in mm), if `None` no selection is applied (default).
73
+ threshold
74
+ Distance threshold in mm to combine steps in the bulk.
75
+ threshold_surf
76
+ Distance threshold in mm to combine steps in the surface.
77
+
78
+ Returns
79
+ -------
80
+ Array of the run lengths of each cluster within a hit.
81
+ """
82
+ # type conversions
83
+ if isinstance(pos_x, VectorOfVectors):
84
+ pos_x = pos_x.view_as("ak")
85
+
86
+ if isinstance(pos_y, VectorOfVectors):
87
+ pos_y = pos_y.view_as("ak")
88
+
89
+ if isinstance(pos_z, VectorOfVectors):
90
+ pos_z = pos_z.view_as("ak")
91
+
92
+ if isinstance(trackid, VectorOfVectors):
93
+ trackid = trackid.view_as("ak")
94
+
95
+ if isinstance(dist, VectorOfVectors):
96
+ dist = dist.view_as("ak")
97
+
98
+ pos = np.vstack(
99
+ [
100
+ ak.flatten(pos_x).to_numpy().astype(np.float64),
101
+ ak.flatten(pos_y).to_numpy().astype(np.float64),
102
+ ak.flatten(pos_z).to_numpy().astype(np.float64),
103
+ ]
104
+ ).T
105
+
106
+ indices_flat = cluster_by_distance_numba(
107
+ ak.flatten(ak.local_index(trackid)).to_numpy(),
108
+ ak.flatten(trackid).to_numpy(),
109
+ pos,
110
+ dist_to_surf=ak.flatten(dist).to_numpy() if dist is not None else dist,
111
+ surf_cut=surf_cut,
112
+ threshold=threshold,
113
+ threshold_surf=threshold_surf,
114
+ )
115
+
116
+ # reshape into being event oriented
117
+ indices = ak.unflatten(indices_flat, ak.num(ak.local_index(trackid)))
118
+
119
+ # number of steps per cluster
120
+ counts = ak.run_lengths(indices)
121
+
122
+ return VectorOfVectors(counts)
123
+
124
+
125
+ @numba.njit
126
+ def cluster_by_distance_numba(
127
+ local_index: np.ndarray,
128
+ trackid: np.ndarray,
129
+ pos: np.ndarray,
130
+ dist_to_surf: np.ndarray | None,
131
+ surf_cut: float | None = None,
132
+ threshold: float = 0.1,
133
+ threshold_surf: float | None = None,
134
+ ) -> np.ndarray:
135
+ """Cluster steps by the distance between points in the same track.
136
+
137
+ This function gives the basic numerical calculations for
138
+ :func:`cluster_by_step_length`.
139
+
140
+ Parameters
141
+ ----------
142
+ local_index
143
+ 1D array of the local index within each hit (step group)
144
+ trackid
145
+ 1D array of index of the track
146
+ pos
147
+ `(n,3)` size array of the positions
148
+ dist_to_surf
149
+ 1D array of the distance to the detector surface. Can be `None` in which case all steps are treated as being in the bulk.
150
+ surf_cut
151
+ Size of the surface region (in mm), if `None` no selection is applied.
152
+ threshold
153
+ Distance threshold in mm to combine steps in the bulk.
154
+ threshold_surf
155
+ Distance threshold in mm to combine steps in the surface.
156
+
157
+ Returns
158
+ -------
159
+ np.ndarray
160
+ 1D array of cluster indices
161
+ """
162
+
163
+ def _dist(a, b):
164
+ return np.sqrt(np.sum((a - b) ** 2))
165
+
166
+ n = len(local_index)
167
+ out = np.zeros((n,), dtype=numba.int32)
168
+
169
+ trackid_prev = -1
170
+ pos_prev = np.zeros(3, dtype=numba.float64)
171
+ cluster_idx = -1
172
+ is_surf_prev = False
173
+
174
+ for idx in range(n):
175
+ # consider a surface and a bulk region
176
+ if dist_to_surf is not None:
177
+ thr = threshold if dist_to_surf[idx] > surf_cut else threshold_surf
178
+
179
+ new_cluster = (
180
+ (trackid[idx] != trackid_prev)
181
+ or (is_surf_prev and (dist_to_surf[idx] > surf_cut))
182
+ or ((not is_surf_prev) and (dist_to_surf[idx] < surf_cut))
183
+ or (_dist(pos[idx, :], pos_prev) > thr)
184
+ )
185
+ # basic clustering without split into surface / bulk
186
+ else:
187
+ thr = threshold
188
+ new_cluster = (trackid[idx] != trackid_prev) or (_dist(pos[idx, :], pos_prev) > thr)
189
+
190
+ # New hit, reset cluster index
191
+ if idx == 0 or local_index[idx] == 0:
192
+ cluster_idx = 0
193
+ pos_prev = pos[idx]
194
+
195
+ # either new track, moving from surface to bulk,
196
+ # moving from bulk to surface, or stepping more than
197
+ # the threshold. Start a new cluster.
198
+ elif new_cluster:
199
+ cluster_idx += 1
200
+ pos_prev = pos[idx, :]
201
+
202
+ out[idx] = cluster_idx
203
+
204
+ # Update previous values
205
+ trackid_prev = trackid[idx]
206
+ if dist_to_surf is not None:
207
+ is_surf_prev = dist_to_surf[idx] < surf_cut
208
+
209
+ return out
210
+
211
+
212
+ def step_lengths(
213
+ x_cluster: ak.Array | VectorOfVectors,
214
+ y_cluster: ak.Array | VectorOfVectors,
215
+ z_cluster: ak.Array | VectorOfVectors,
216
+ ) -> VectorOfVectors:
217
+ """Compute the distance between consecutive steps.
218
+
219
+ This is based on calculating the distance between consecutive steps in the same track,
220
+ thus the input arrays should already be clustered (have dimension 3). The output
221
+ will have a similar shape to the input with one less entry in the outermost dimension.
222
+
223
+ Example config (assuming that the clustered positions are obtained already):
224
+
225
+ .. code-block:: yaml
226
+
227
+ step_lengths: reboost.shape.cluster.step_lengths(HITS.cluster_x,HITS.cluster_y,HITS.cluster_z))
228
+
229
+ Parameters
230
+ ----------
231
+ x_cluster
232
+ The x location of each step in each cluster and event.
233
+ y_cluster
234
+ The y location of each step in each cluster and event.
235
+ z_cluster
236
+ The z location of each step in each cluster and event.
237
+
238
+ Returns
239
+ -------
240
+ a `VectorOfVectors` of the step lengths in each cluster.
241
+ """
242
+ data = [x_cluster, y_cluster, z_cluster]
243
+
244
+ for idx, var in enumerate(data):
245
+ if isinstance(var, VectorOfVectors):
246
+ data[idx] = var.view_as("ak")
247
+ # check shape
248
+ if data[idx].ndim != 3:
249
+ msg = f"The input array for step lengths must be 3 dimensional not {data[idx.dim]}"
250
+ raise ValueError(msg)
251
+
252
+ counts = ak.num(data[0], axis=-1)
253
+ data = np.vstack([ak.flatten(ak.flatten(var)).to_numpy() for var in data])
254
+ dist = np.append(np.sqrt(np.sum(np.diff(data, axis=1) ** 2, axis=0)), 0)
255
+
256
+ n_cluster = ak.num(counts, axis=-1)
257
+ clusters = ak.unflatten(ak.Array(dist), ak.flatten(counts))
258
+
259
+ out = ak.unflatten(clusters, n_cluster)
260
+ return VectorOfVectors(out[:, :, :-1])
reboost/shape/group.py ADDED
@@ -0,0 +1,189 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ import awkward as ak
6
+ import numpy as np
7
+ from dbetto import AttrsDict
8
+ from lgdo import Table, VectorOfVectors
9
+ from numpy.typing import ArrayLike
10
+
11
+ log = logging.getLogger(__name__)
12
+
13
+
14
+ def isin(channels: ak.Array, chan_list: list):
15
+ """Check if each element of the awkward array channels is in the channel list."""
16
+ num_channels = ak.num(channels, axis=-1)
17
+ channels_flat = ak.flatten(channels)
18
+ isin = np.isin(channels_flat, chan_list)
19
+
20
+ # unflatten
21
+ return ak.unflatten(isin, num_channels)
22
+
23
+
24
+ def get_isin_group(
25
+ channels: ArrayLike, groups: AttrsDict, tcm_tables: dict, group: str = "off"
26
+ ) -> ak.Array:
27
+ """For each channel check if it is in the group.
28
+
29
+ Parameters
30
+ ----------
31
+ channels
32
+ Array of the channel indices.
33
+ groups
34
+ A mapping of the group for every channel name.
35
+ tcm_tables
36
+ the mapping of indices to table names
37
+ group
38
+ the group to select.
39
+
40
+ Returns
41
+ -------
42
+ an awkward array of the same shape of channels of booleans.
43
+ """
44
+ usability = {uid: groups[name] for name, uid in tcm_tables.items()}
45
+ group_idx = [key for key, item in usability.items() if item == group]
46
+
47
+ return isin(channels, group_idx)
48
+
49
+
50
+ def _sort_data(obj: ak.Array, *, time_name: str = "time", evtid_name: str = "evtid") -> ak.Array:
51
+ """Sort the data by evtid then time.
52
+
53
+ Parameters
54
+ ----------
55
+ obj
56
+ array of records containing fields `time` and `evtid`.
57
+ time_name
58
+ name of the time field in `obj`.
59
+ evtid_name
60
+ name of the evtid field in `obj`.
61
+
62
+ Returns
63
+ -------
64
+ sorted awkward array
65
+ """
66
+ obj = obj[ak.argsort(obj[evtid_name])]
67
+ obj_unflat = ak.unflatten(obj, ak.run_lengths(obj[evtid_name]))
68
+
69
+ indices = ak.argsort(obj_unflat[time_name], axis=-1)
70
+ sorted_obj = obj_unflat[indices]
71
+
72
+ return ak.flatten(sorted_obj)
73
+
74
+
75
+ def group_by_evtid(data: Table | ak.Array, *, evtid_name: str = "evtid") -> Table:
76
+ """Simple grouping by evtid.
77
+
78
+ Takes the input `stp` :class:`lgdo.Table` from remage and defines groupings of steps (i.e the
79
+ `cumulative_length` for a vector of vectors). This then defines the output table (also :class:`lgdo.Table`),
80
+ on which processors can add fields.
81
+
82
+ Parameters
83
+ ----------
84
+ data
85
+ LGDO Table which must contain the `evtid` field.
86
+ evtid_name
87
+ the name of the index field in the input table.
88
+
89
+ Returns
90
+ -------
91
+ LGDO table of :class:`VectorOfVector` for each field.
92
+
93
+ Note
94
+ ----
95
+ The input table must be sorted (by `evtid`).
96
+ """
97
+ # convert to awkward
98
+ obj_ak = data.view_as("ak") if isinstance(data, Table) else data
99
+
100
+ # extract cumulative lengths
101
+ counts = ak.run_lengths(obj_ak[evtid_name])
102
+ cumulative_length = np.cumsum(counts)
103
+
104
+ # convert to numpy
105
+ if isinstance(cumulative_length, ak.Array):
106
+ cumulative_length = cumulative_length.to_numpy()
107
+
108
+ # build output table
109
+ out_tbl = Table(size=len(cumulative_length))
110
+
111
+ for f in obj_ak.fields:
112
+ out_tbl.add_field(
113
+ f,
114
+ VectorOfVectors(
115
+ cumulative_length=cumulative_length, flattened_data=obj_ak[f].to_numpy()
116
+ ),
117
+ )
118
+ return out_tbl
119
+
120
+
121
+ def group_by_time(
122
+ data: Table | ak.Array,
123
+ window: float = 10,
124
+ time_name: str = "time",
125
+ evtid_name: str = "evtid",
126
+ fields: list | None = None,
127
+ ) -> Table:
128
+ """Grouping of steps by `evtid` and `time`.
129
+
130
+ Takes the input `stp` :class:`lgdo.Table` from remage and defines groupings of steps (i.e the
131
+ `cumulative_length` for a vector of vectors). This then defines the output table (also :class:`lgdo.Table`),
132
+ on which processors can add fields.
133
+
134
+ The windowing is based on defining a new group when the `evtid` changes or when the time increases by `> window`,
135
+ which is in units of us.
136
+
137
+ Parameters
138
+ ----------
139
+ data
140
+ :class:`lgdo.Table` or `ak.Array` which must contain the time_name and evtid_name fields
141
+ window
142
+ time window in us used to search for coincident hits
143
+ time_name
144
+ name of the timing field
145
+ evtid_name
146
+ name of the evtid field
147
+ fields
148
+ names of fields to include in the output table, if None includes all
149
+
150
+ Returns
151
+ -------
152
+ LGDO table of :class:`VectorOfVector` for each field.
153
+
154
+ Note
155
+ ----
156
+ The input table must be sorted (first by `evtid` then `time`).
157
+ """
158
+ obj = data.view_as("ak") if isinstance(data, Table) else data
159
+ obj = _sort_data(obj, time_name=time_name, evtid_name=evtid_name)
160
+
161
+ # get difference
162
+ time_diffs = np.diff(obj[time_name])
163
+ index_diffs = np.diff(obj[evtid_name])
164
+
165
+ # index of the last element in each run
166
+ time_change = (time_diffs > window * 1000) & (index_diffs == 0)
167
+ index_change = index_diffs > 0
168
+
169
+ # cumulative length is just the index of changes plus 1
170
+ cumulative_length = np.array(np.where(time_change | index_change))[0] + 1
171
+
172
+ # add the las grouping
173
+ cumulative_length = np.append(cumulative_length, len(obj[time_name]))
174
+
175
+ # convert to numpy
176
+ if isinstance(cumulative_length, ak.Array):
177
+ cumulative_length = cumulative_length.to_numpy()
178
+
179
+ # build output table
180
+ out_tbl = Table(size=len(cumulative_length))
181
+
182
+ fields = obj.fields if fields is None else fields
183
+ for f in fields:
184
+ out_tbl.add_field(
185
+ f,
186
+ VectorOfVectors(cumulative_length=cumulative_length, flattened_data=obj[f].to_numpy()),
187
+ )
188
+
189
+ return out_tbl
File without changes
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+ from .pe import detected_photoelectrons, emitted_scintillation_photons, load_optmap
4
+
5
+ __all__ = ["detected_photoelectrons", "emitted_scintillation_photons", "load_optmap"]
reboost/spms/pe.py ADDED
@@ -0,0 +1,178 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ import awkward as ak
6
+ import numpy as np
7
+ from lgdo import VectorOfVectors
8
+
9
+ from ..optmap import convolve
10
+ from ..units import units_conv_ak
11
+
12
+ log = logging.getLogger(__name__)
13
+
14
+
15
+ def load_optmap_all(map_file: str) -> convolve.OptmapForConvolve:
16
+ """Load an optical map file for later use with :py:func:`detected_photoelectrons`."""
17
+ return convolve.open_optmap(map_file)
18
+
19
+
20
+ def load_optmap(map_file: str, spm_det_uid: int) -> convolve.OptmapForConvolve:
21
+ """Load an optical map file for later use with :py:func:`detected_photoelectrons`."""
22
+ return convolve.open_optmap_single(map_file, spm_det_uid)
23
+
24
+
25
+ def _nested_unflatten(data: ak.Array, lengths: ak.Array):
26
+ return ak.unflatten(ak.unflatten(ak.flatten(data), ak.flatten(lengths)), ak.num(lengths))
27
+
28
+
29
+ def corrected_photoelectrons(
30
+ simulated_pe: ak.Array,
31
+ simulated_uids: ak.Array,
32
+ data_pe: ak.Array,
33
+ data_uids: ak.Array,
34
+ *,
35
+ seed: int | None = None,
36
+ ) -> tuple[ak.Array, ak.Array]:
37
+ r"""Add a correction to the observed number of photoelectrons (p.e.) using forced trigger data.
38
+
39
+ For every simulated event a corresponding forced trigger event in data is chosen
40
+ and the resulting number of p.e. for each channel (i) is:
41
+
42
+ .. math::
43
+
44
+ n_i = n_{\text{sim},i} + n_{\text{data},i}
45
+
46
+ .. warning::
47
+ The number of supplied forced trigger events in data should ideally be
48
+ more than that in the simulations. If this is not the case and "allow_data_reuse"
49
+ is True then some data events will be used multiple times. This introduces
50
+ a small amount of correlation between the simulated events, but is probably acceptable
51
+ in most circumstances.
52
+
53
+ Parameters
54
+ ----------
55
+ simulated_pe
56
+ The number of number of detected pe per sipm channel.
57
+ simulated_uids
58
+ The unique identifier (uid) for each sipm hit.
59
+ data_pe
60
+ The collection of forced trigger pe.
61
+ data_uids
62
+ The uids for each forced trigger event.
63
+ seed
64
+ Seed for random number generator
65
+
66
+ Returns
67
+ -------
68
+ a tuple of the corrected pe and sipm uids.
69
+ """
70
+ rand = np.random.default_rng(seed=seed)
71
+ rand_ints = rand.integers(0, len(data_pe), size=len(simulated_pe))
72
+
73
+ selected_data_pe = data_pe[rand_ints]
74
+ selected_data_uids = data_uids[rand_ints]
75
+
76
+ # combine sims with data
77
+ pe_tot = ak.concatenate([simulated_pe, selected_data_pe], axis=1)
78
+ uid_tot = ak.concatenate([simulated_uids, selected_data_uids], axis=1)
79
+
80
+ # sort by uid
81
+ order = ak.argsort(uid_tot)
82
+ pe_tot = pe_tot[order]
83
+ uid_tot = uid_tot[order]
84
+
85
+ # add an extra axis
86
+ n = ak.run_lengths(uid_tot)
87
+
88
+ # add another dimension
89
+ pe_tot = _nested_unflatten(pe_tot, n)
90
+ uid_tot = _nested_unflatten(uid_tot, n)
91
+
92
+ # sum pe and take the first uid (should all be the same)
93
+ corrected_pe = ak.sum(pe_tot, axis=-1)
94
+ uid_tot = ak.fill_none(ak.firsts(uid_tot, axis=-1), np.nan)
95
+
96
+ return corrected_pe, uid_tot
97
+
98
+
99
+ def detected_photoelectrons(
100
+ num_scint_ph: ak.Array,
101
+ particle: ak.Array,
102
+ time: ak.Array,
103
+ xloc: ak.Array,
104
+ yloc: ak.Array,
105
+ zloc: ak.Array,
106
+ optmap: convolve.OptmapForConvolve,
107
+ material: str,
108
+ spm_detector: str,
109
+ map_scaling: float = 1,
110
+ map_scaling_sigma: float = 0,
111
+ ) -> VectorOfVectors:
112
+ """Derive the number of detected photoelectrons (p.e.) from scintillator hits using an optical map.
113
+
114
+ Parameters
115
+ ----------
116
+ num_scint_ph
117
+ array of emitted scintillation photons, as generated by
118
+ :func:`emitted_scintillation_photons`.
119
+ particle
120
+ array of particle PDG IDs of scintillation events.
121
+ time
122
+ array of timestamps of scintillation events.
123
+ xloc
124
+ array of x coordinate position of scintillation events.
125
+ yloc
126
+ array of y coordinate position of scintillation events.
127
+ zloc
128
+ array of z coordinate position of scintillation events.
129
+ optmap
130
+ the optical map loaded via py:func:`load_optmap`.
131
+ material
132
+ scintillating material name.
133
+ spm_detector
134
+ SiPM detector name as used in the optical map.
135
+ map_scaling
136
+ scale the detection probability in the map for this detector by this factor.
137
+ map_scaling_sigma
138
+ if larger than zero, sample the used scaling factor for each (reshaped) event
139
+ from a normal distribution with this standard deviation.
140
+ """
141
+ hits = ak.Array(
142
+ {
143
+ "num_scint_ph": num_scint_ph,
144
+ "particle": particle,
145
+ "time": units_conv_ak(time, "ns"),
146
+ "xloc": units_conv_ak(xloc, "m"),
147
+ "yloc": units_conv_ak(yloc, "m"),
148
+ "zloc": units_conv_ak(zloc, "m"),
149
+ }
150
+ )
151
+
152
+ scint_mat_params = convolve._get_scint_params(material)
153
+ pe = convolve.iterate_stepwise_depositions_pois(
154
+ hits, optmap, scint_mat_params, spm_detector, map_scaling, map_scaling_sigma
155
+ )
156
+
157
+ return VectorOfVectors(pe, attrs={"units": "ns"})
158
+
159
+
160
+ def emitted_scintillation_photons(
161
+ edep: ak.Array, particle: ak.Array, material: str
162
+ ) -> VectorOfVectors:
163
+ """Derive the number of emitted scintillation photons from scintillator hits.
164
+
165
+ Parameters
166
+ ----------
167
+ edep
168
+ array of deposited energy in scintillation events.
169
+ particle
170
+ array of particle PDG IDs of scintillation events.
171
+ material
172
+ scintillating material name.
173
+ """
174
+ hits = ak.Array({"edep": units_conv_ak(edep, "keV"), "particle": particle})
175
+
176
+ scint_mat_params = convolve._get_scint_params(material)
177
+ ph = convolve.iterate_stepwise_depositions_scintillate(hits, scint_mat_params)
178
+ return VectorOfVectors(ph)