reboost 0.5.5__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {reboost-0.5.5 → reboost-0.6.0}/PKG-INFO +2 -1
  2. {reboost-0.5.5 → reboost-0.6.0}/pyproject.toml +2 -0
  3. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/__init__.py +2 -1
  4. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/_version.py +2 -2
  5. reboost-0.6.0/src/reboost/build_evt.py +134 -0
  6. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/core.py +74 -0
  7. reboost-0.6.0/src/reboost/math/stats.py +119 -0
  8. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/shape/group.py +38 -0
  9. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/utils.py +10 -0
  10. {reboost-0.5.5 → reboost-0.6.0}/src/reboost.egg-info/PKG-INFO +2 -1
  11. {reboost-0.5.5 → reboost-0.6.0}/src/reboost.egg-info/SOURCES.txt +1 -0
  12. {reboost-0.5.5 → reboost-0.6.0}/src/reboost.egg-info/requires.txt +1 -0
  13. reboost-0.6.0/tests/evt/test_evt.py +60 -0
  14. {reboost-0.5.5 → reboost-0.6.0}/tests/hit/test_build_hit.py +4 -2
  15. {reboost-0.5.5 → reboost-0.6.0}/tests/hpge/test_dt_heuristic.py +1 -1
  16. {reboost-0.5.5 → reboost-0.6.0}/tests/test_core.py +53 -1
  17. {reboost-0.5.5 → reboost-0.6.0}/tests/test_math.py +25 -0
  18. {reboost-0.5.5 → reboost-0.6.0}/tests/test_shape.py +20 -0
  19. {reboost-0.5.5 → reboost-0.6.0}/tests/test_utils.py +10 -3
  20. reboost-0.5.5/src/reboost/build_evt.py +0 -166
  21. reboost-0.5.5/src/reboost/math/stats.py +0 -57
  22. {reboost-0.5.5 → reboost-0.6.0}/LICENSE +0 -0
  23. {reboost-0.5.5 → reboost-0.6.0}/README.md +0 -0
  24. {reboost-0.5.5 → reboost-0.6.0}/setup.cfg +0 -0
  25. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/build_glm.py +0 -0
  26. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/build_hit.py +0 -0
  27. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/cli.py +0 -0
  28. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/hpge/__init__.py +0 -0
  29. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/hpge/psd.py +0 -0
  30. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/hpge/surface.py +0 -0
  31. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/hpge/utils.py +0 -0
  32. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/iterator.py +0 -0
  33. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/log_utils.py +0 -0
  34. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/math/__init__.py +0 -0
  35. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/math/functions.py +0 -0
  36. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/optmap/__init__.py +0 -0
  37. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/optmap/cli.py +0 -0
  38. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/optmap/convolve.py +0 -0
  39. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/optmap/create.py +0 -0
  40. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/optmap/evt.py +0 -0
  41. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/optmap/mapview.py +0 -0
  42. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/optmap/numba_pdg.py +0 -0
  43. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/optmap/optmap.py +0 -0
  44. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/profile.py +0 -0
  45. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/shape/__init__.py +0 -0
  46. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/shape/cluster.py +0 -0
  47. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/shape/reduction.py +0 -0
  48. {reboost-0.5.5 → reboost-0.6.0}/src/reboost/units.py +0 -0
  49. {reboost-0.5.5 → reboost-0.6.0}/src/reboost.egg-info/dependency_links.txt +0 -0
  50. {reboost-0.5.5 → reboost-0.6.0}/src/reboost.egg-info/entry_points.txt +0 -0
  51. {reboost-0.5.5 → reboost-0.6.0}/src/reboost.egg-info/not-zip-safe +0 -0
  52. {reboost-0.5.5 → reboost-0.6.0}/src/reboost.egg-info/top_level.txt +0 -0
  53. {reboost-0.5.5 → reboost-0.6.0}/tests/conftest.py +0 -0
  54. {reboost-0.5.5 → reboost-0.6.0}/tests/glm/test_build_glm.py +0 -0
  55. {reboost-0.5.5 → reboost-0.6.0}/tests/hit/configs/args.yaml +0 -0
  56. {reboost-0.5.5 → reboost-0.6.0}/tests/hit/configs/basic.yaml +0 -0
  57. {reboost-0.5.5 → reboost-0.6.0}/tests/hit/configs/geom.gdml +0 -0
  58. {reboost-0.5.5 → reboost-0.6.0}/tests/hit/configs/hit_config.yaml +0 -0
  59. {reboost-0.5.5 → reboost-0.6.0}/tests/hit/configs/pars.yaml +0 -0
  60. {reboost-0.5.5 → reboost-0.6.0}/tests/hit/configs/reshape.yaml +0 -0
  61. {reboost-0.5.5 → reboost-0.6.0}/tests/hpge/simulation/gammas.mac +0 -0
  62. {reboost-0.5.5 → reboost-0.6.0}/tests/hpge/simulation/geometry.gdml +0 -0
  63. {reboost-0.5.5 → reboost-0.6.0}/tests/hpge/simulation/make_dt_map.jl +0 -0
  64. {reboost-0.5.5 → reboost-0.6.0}/tests/hpge/simulation/make_geom.py +0 -0
  65. {reboost-0.5.5 → reboost-0.6.0}/tests/hpge/test_current.py +0 -0
  66. {reboost-0.5.5 → reboost-0.6.0}/tests/hpge/test_files/drift_time_maps.lh5 +0 -0
  67. {reboost-0.5.5 → reboost-0.6.0}/tests/hpge/test_files/internal_electron.lh5 +0 -0
  68. {reboost-0.5.5 → reboost-0.6.0}/tests/hpge/test_hpge_map.py +0 -0
  69. {reboost-0.5.5 → reboost-0.6.0}/tests/hpge/test_r90.py +0 -0
  70. {reboost-0.5.5 → reboost-0.6.0}/tests/hpge/test_surface.py +0 -0
  71. {reboost-0.5.5 → reboost-0.6.0}/tests/test_cli.py +0 -0
  72. {reboost-0.5.5 → reboost-0.6.0}/tests/test_optmap.py +0 -0
  73. {reboost-0.5.5 → reboost-0.6.0}/tests/test_profile.py +0 -0
  74. {reboost-0.5.5 → reboost-0.6.0}/tests/test_units.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: reboost
3
- Version: 0.5.5
3
+ Version: 0.6.0
4
4
  Summary: New LEGEND Monte-Carlo simulation post-processing
5
5
  Author-email: Manuel Huber <info@manuelhu.de>, Toby Dixon <toby.dixon.23@ucl.ac.uk>, Luigi Pertoldi <gipert@pm.me>
6
6
  Maintainer: The LEGEND Collaboration
@@ -696,6 +696,7 @@ Classifier: Topic :: Scientific/Engineering
696
696
  Requires-Python: >=3.9
697
697
  Description-Content-Type: text/markdown
698
698
  License-File: LICENSE
699
+ Requires-Dist: hdf5plugin
699
700
  Requires-Dist: colorlog
700
701
  Requires-Dist: numpy
701
702
  Requires-Dist: scipy
@@ -32,6 +32,7 @@ classifiers = [
32
32
  ]
33
33
  requires-python = ">=3.9"
34
34
  dependencies = [
35
+ "hdf5plugin",
35
36
  "colorlog",
36
37
  "numpy",
37
38
  "scipy",
@@ -147,6 +148,7 @@ ignore = [
147
148
  "D213", # Multi-line docstring summary should start at the first line
148
149
  "D401", # Summary does not need to be in imperative mood
149
150
  "D413", # No blank line after last section in docstring
151
+ "PLC0415", # we sometimes use relative imports for performance reasons
150
152
  "PLC2401", # We like non-ASCII characters for math
151
153
  ]
152
154
  isort.required-imports = ["from __future__ import annotations"]
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import hdf5plugin
3
4
  from lgdo import lh5
4
5
 
5
6
  from ._version import version as __version__
@@ -10,4 +11,4 @@ __all__ = [
10
11
  "build_hit",
11
12
  ]
12
13
 
13
- lh5.settings.DEFAULT_HDF5_SETTINGS = {"shuffle": True, "compression": "lzf"}
14
+ lh5.settings.DEFAULT_HDF5_SETTINGS = {"compression": hdf5plugin.Zstd()}
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.5.5'
21
- __version_tuple__ = version_tuple = (0, 5, 5)
20
+ __version__ = version = '0.6.0'
21
+ __version_tuple__ = version_tuple = (0, 6, 0)
@@ -0,0 +1,134 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ import awkward as ak
6
+ import numpy as np
7
+ from dbetto import AttrsDict
8
+ from lgdo import Array, Table, VectorOfVectors, lh5
9
+
10
+ from . import core, math, shape, utils
11
+ from .shape import group
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+
16
+ def build_evt(
17
+ tcm: VectorOfVectors,
18
+ hitfile: str,
19
+ outfile: str | None,
20
+ channel_groups: AttrsDict,
21
+ pars: AttrsDict,
22
+ run_part: AttrsDict,
23
+ ) -> Table | None:
24
+ """Build events out of a TCM.
25
+
26
+ Parameters
27
+ ----------
28
+ tcm
29
+ the time coincidence map.
30
+ hitfile
31
+ file with the hits.
32
+ outfile
33
+ the path to the output-file, if `None` with return
34
+ the events in memory.
35
+ channel_groups
36
+ a dictionary of groups of channels. For example:
37
+
38
+ .. code-block:: python
39
+
40
+ {"det1": "on", "det2": "off", "det3": "ac"}
41
+
42
+ pars
43
+ A dictionary of parameters. The first key should
44
+ be the run ID, followed by different sets of parameters
45
+ arranged in groups. Run numbers should be given in the
46
+ format `"p00-r001"`, etc.
47
+
48
+ For example:
49
+
50
+ .. code-block:: python
51
+
52
+ {"p03-r000": {"reso": {"det1": [1, 2], "det2": [0, 1]}}}
53
+
54
+ run_part
55
+ The run partitioning file giving the number of events
56
+ for each run. This should be organized as a dictionary
57
+ with the following format:
58
+
59
+ .. code-block:: python
60
+
61
+ {"p03-r000": 1000, "p03-r001": 2000}
62
+
63
+ Returns
64
+ -------
65
+ the event file in memory as a table if no output file is specified.
66
+ """
67
+ tcm_tables = utils.get_table_names(tcm)
68
+ tcm_ak = tcm.view_as("ak")
69
+
70
+ # loop over the runs
71
+ cum_sum = 0
72
+ tab = None
73
+
74
+ for idx, (run_full, n_event) in enumerate(run_part.items()):
75
+ period, run = run_full.split("-")
76
+ pars_tmp = pars[run_full]
77
+
78
+ # create an output table
79
+ out_tab = Table(size=n_event)
80
+
81
+ tcm_tmp = tcm_ak[cum_sum : cum_sum + n_event]
82
+
83
+ # usabilities
84
+
85
+ is_off = shape.group.get_isin_group(
86
+ tcm_tmp.table_key, channel_groups, tcm_tables, group="off"
87
+ )
88
+
89
+ # filter out off channels
90
+ channels = tcm_tmp.table_key[~is_off]
91
+ rows = tcm_tmp.row_in_table[~is_off]
92
+ out_tab.add_field("channel", VectorOfVectors(channels))
93
+ out_tab.add_field("row_in_table", VectorOfVectors(rows))
94
+
95
+ out_tab.add_field("period", Array(np.ones(len(channels)) * int(period[1:])))
96
+ out_tab.add_field("run", Array(np.ones(len(channels)) * int(run[1:])))
97
+
98
+ # now check for channels in ac
99
+ is_good = group.get_isin_group(channels, channel_groups, tcm_tables, group="on")
100
+
101
+ # get energy
102
+ energy_true = core.read_data_at_channel_as_ak(
103
+ channels, rows, hitfile, "energy", "hit", tcm_tables
104
+ )
105
+
106
+ energy = math.stats.apply_energy_resolution(
107
+ energy_true,
108
+ channels,
109
+ tcm_tables,
110
+ pars_tmp.reso,
111
+ lambda energy, sig0, sig1: np.sqrt(energy * sig1**2 + sig0**2),
112
+ )
113
+
114
+ out_tab.add_field("is_good", VectorOfVectors(is_good[energy > 25]))
115
+
116
+ out_tab.add_field("energy", VectorOfVectors(energy[energy > 25]))
117
+ out_tab.add_field("multiplicity", Array(ak.sum(energy > 25, axis=-1).to_numpy()))
118
+
119
+ # write table
120
+ wo_mode = "of" if idx == 0 else "append"
121
+
122
+ # add attrs
123
+ out_tab.attrs["tables"] = tcm.attrs["tables"]
124
+
125
+ if outfile is not None:
126
+ lh5.write(out_tab, "evt", outfile, wo_mode=wo_mode)
127
+ else:
128
+ tab = (
129
+ ak.concatenate((tab, out_tab.view_as("ak")))
130
+ if tab is not None
131
+ else out_tab.view_as("ak")
132
+ )
133
+
134
+ return Table(tab)
@@ -5,7 +5,9 @@ import time
5
5
  from typing import Any
6
6
 
7
7
  import awkward as ak
8
+ import numpy as np
8
9
  from dbetto import AttrsDict
10
+ from lgdo import lh5
9
11
  from lgdo.types import LGDO, Table
10
12
 
11
13
  from . import utils
@@ -14,6 +16,78 @@ from .profile import ProfileDict
14
16
  log = logging.getLogger(__name__)
15
17
 
16
18
 
19
+ def read_data_at_channel_as_ak(
20
+ channels: ak.Array, rows: ak.Array, file: str, field: str, group: str, tab_map: dict[int, str]
21
+ ) -> ak.Array:
22
+ r"""Read the data from a particular field to an awkward array. This replaces the TCM like object defined by the channels and rows with the corresponding data field.
23
+
24
+ Parameters
25
+ ----------
26
+ channels
27
+ Array of the channel indices (uids).
28
+ rows
29
+ Array of the rows in the files to gather data from.
30
+ file
31
+ File to read the data from.
32
+ field
33
+ the field to read.
34
+ group
35
+ the group to read data from (eg. `hit` or `stp`.)
36
+ tab_map
37
+ mapping between indices and table names. Of the form:
38
+
39
+ .. code:: python
40
+
41
+ {NAME: UID}
42
+
43
+ For example:
44
+
45
+ .. code:: python
46
+
47
+ {"det001": 1, "det002": 2}
48
+
49
+ Returns
50
+ -------
51
+ an array with the data, of the same same as the channels and rows.
52
+ """
53
+ # initialise the output
54
+ data_flat = None
55
+ tcm_rows_full = None
56
+
57
+ # save the unflattening
58
+ reorder = ak.num(rows)
59
+
60
+ for tab_name, key in tab_map.items():
61
+ # get the rows to read
62
+
63
+ idx = ak.flatten(rows[channels == key]).to_numpy()
64
+ arg_idx = np.argsort(idx)
65
+
66
+ # get the rows in the flattened data we want to append to
67
+ tcm_rows = np.where(ak.flatten(channels == key))[0]
68
+
69
+ # read the data with sorted idx
70
+ data_ch = lh5.read(f"{group}/{tab_name}/{field}", file, idx=idx[arg_idx]).view_as("ak")
71
+
72
+ # sort back to order for tcm
73
+ data_ch = data_ch[np.argsort(arg_idx)]
74
+
75
+ # append to output
76
+ data_flat = ak.concatenate((data_flat, data_ch)) if data_flat is not None else data_ch
77
+ tcm_rows_full = (
78
+ np.concatenate((tcm_rows_full, tcm_rows)) if tcm_rows_full is not None else tcm_rows
79
+ )
80
+
81
+ if len(data_flat) != len(tcm_rows_full):
82
+ msg = "every index in the tcm should have been read"
83
+ raise ValueError(msg)
84
+
85
+ # sort the final data
86
+ data_flat = data_flat[np.argsort(tcm_rows_full)]
87
+
88
+ return ak.unflatten(data_flat, reorder)
89
+
90
+
17
91
  def evaluate_output_column(
18
92
  hit_table: Table,
19
93
  expression: str,
@@ -0,0 +1,119 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Callable
5
+
6
+ import awkward as ak
7
+ import numpy as np
8
+ from lgdo import Array
9
+ from numpy.typing import ArrayLike
10
+
11
+ log = logging.getLogger(__name__)
12
+
13
+
14
+ def get_resolution(
15
+ energies: ak.Array, channels: ak.Array, tcm_tables: dict, reso_pars: dict, reso_func: Callable
16
+ ) -> ak.Array:
17
+ """Get the resolution for each energy.
18
+
19
+ Parameters
20
+ ----------
21
+ energies
22
+ the energies to smear
23
+ channels
24
+ the channel index for each energy
25
+ tcm_tables
26
+ the mapping from indices to channel names.
27
+ reso_pars
28
+ the pars for each channel.
29
+ reso_func
30
+ the function to compute the resolution.
31
+ """
32
+ n_pars = len(reso_pars[next(iter(reso_pars))])
33
+
34
+ pars_shaped = []
35
+
36
+ for _ in range(n_pars):
37
+ pars_shaped.append(np.zeros(len(ak.flatten(channels))))
38
+
39
+ num = ak.num(channels, axis=-1)
40
+
41
+ for key, value in tcm_tables.items():
42
+ for i in range(n_pars):
43
+ pars_shaped[i][ak.flatten(channels) == value] = reso_pars[key][i]
44
+
45
+ ch_reso = reso_func(ak.flatten(energies), *pars_shaped)
46
+ return ak.unflatten(ch_reso, num)
47
+
48
+
49
+ def apply_energy_resolution(
50
+ energies: ak.Array, channels: ak.Array, tcm_tables: dict, reso_pars: dict, reso_func: Callable
51
+ ):
52
+ """Apply the energy resolution sampling to an array with many channels.
53
+
54
+ Parameters
55
+ ----------
56
+ energies
57
+ the energies to smear
58
+ channels
59
+ the channel index for each energy
60
+ tcm_tables
61
+ the mapping from indices to channel names.
62
+ reso_pars
63
+ the pars for each channel.
64
+ reso_func
65
+ the function to compute the resolution.
66
+ """
67
+ num = ak.num(channels, axis=-1)
68
+
69
+ ch_reso = get_resolution(energies, channels, tcm_tables, reso_pars, reso_func)
70
+ energies_flat_smear = gaussian_sample(ak.flatten(energies), ak.flatten(ch_reso))
71
+
72
+ return ak.unflatten(energies_flat_smear, num)
73
+
74
+
75
+ def gaussian_sample(mu: ArrayLike, sigma: ArrayLike | float, *, seed: int | None = None) -> Array:
76
+ r"""Generate samples from a gaussian.
77
+
78
+ Based on:
79
+
80
+ .. math::
81
+
82
+ y_i \sim \mathcal{N}(\mu_i,\sigma_i)
83
+
84
+ where $y_i$ is the output, $x_i$ the input (mu) and $\sigma$ is the standard
85
+ deviation for each point.
86
+
87
+ Parameters
88
+ ----------
89
+ mu
90
+ the mean positions to sample from, should be a flat (ArrayLike) object.
91
+ sigma
92
+ the standard deviation for each input value, can also be a single float.
93
+ seed
94
+ the random seed.
95
+
96
+ Returns
97
+ -------
98
+ sampled values.
99
+ """
100
+ # convert inputs
101
+
102
+ if isinstance(mu, Array):
103
+ mu = mu.view_as("np")
104
+ elif isinstance(mu, ak.Array):
105
+ mu = mu.to_numpy()
106
+ elif not isinstance(mu, np.ndarray):
107
+ mu = np.array(mu)
108
+
109
+ # similar for sigma
110
+ if isinstance(sigma, Array):
111
+ sigma = sigma.view_as("np")
112
+ elif isinstance(sigma, ak.Array):
113
+ sigma = sigma.to_numpy()
114
+ elif not isinstance(sigma, (float, int, np.ndarray)):
115
+ sigma = np.array(sigma)
116
+
117
+ rng = np.random.default_rng(seed=seed) # Create a random number generator
118
+
119
+ return Array(rng.normal(loc=mu, scale=sigma))
@@ -4,11 +4,49 @@ import logging
4
4
 
5
5
  import awkward as ak
6
6
  import numpy as np
7
+ from dbetto import AttrsDict
7
8
  from lgdo import Table, VectorOfVectors
9
+ from numpy.typing import ArrayLike
8
10
 
9
11
  log = logging.getLogger(__name__)
10
12
 
11
13
 
14
+ def isin(channels: ak.Array, chan_list: list):
15
+ """Check if each element of the awkward array channels is in the channel list."""
16
+ num_channels = ak.num(channels, axis=-1)
17
+ channels_flat = ak.flatten(channels)
18
+ isin = np.isin(channels_flat, chan_list)
19
+
20
+ # unflatten
21
+ return ak.unflatten(isin, num_channels)
22
+
23
+
24
+ def get_isin_group(
25
+ channels: ArrayLike, groups: AttrsDict, tcm_tables: dict, group: str = "off"
26
+ ) -> ak.Array:
27
+ """For each channel check if it is in the group.
28
+
29
+ Parameters
30
+ ----------
31
+ channels
32
+ Array of the channel indices.
33
+ groups
34
+ A mapping of the group for every channel name.
35
+ tcm_tables
36
+ the mapping of indices to table names
37
+ group
38
+ the group to select.
39
+
40
+ Returns
41
+ -------
42
+ an awkward array of the same shape of channels of booleans.
43
+ """
44
+ usability = {uid: groups[name] for name, uid in tcm_tables.items()}
45
+ group_idx = [key for key, item in usability.items() if item == group]
46
+
47
+ return isin(channels, group_idx)
48
+
49
+
12
50
  def _sort_data(obj: ak.Array, *, time_name: str = "time", evtid_name: str = "evtid") -> ak.Array:
13
51
  """Sort the data by evtid then time.
14
52
 
@@ -18,6 +18,16 @@ from .profile import ProfileDict
18
18
  log = logging.getLogger(__name__)
19
19
 
20
20
 
21
+ def get_table_names(tcm: VectorOfVectors) -> dict:
22
+ """Extract table names from tcm.attrs['tables'] and return them as a dictionary."""
23
+ raw = tcm.attrs["tables"]
24
+ cleaned = raw.strip("[]").replace(" ", "").replace("'", "")
25
+ tables = cleaned.split(",")
26
+ tables = [tab.split("/")[-1] for tab in tables]
27
+
28
+ return {name: idx for idx, name in enumerate(tables)}
29
+
30
+
21
31
  def get_wo_mode(
22
32
  group: int, out_det: int, in_det: int, chunk: int, new_hit_file: bool, overwrite: bool = False
23
33
  ) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: reboost
3
- Version: 0.5.5
3
+ Version: 0.6.0
4
4
  Summary: New LEGEND Monte-Carlo simulation post-processing
5
5
  Author-email: Manuel Huber <info@manuelhu.de>, Toby Dixon <toby.dixon.23@ucl.ac.uk>, Luigi Pertoldi <gipert@pm.me>
6
6
  Maintainer: The LEGEND Collaboration
@@ -696,6 +696,7 @@ Classifier: Topic :: Scientific/Engineering
696
696
  Requires-Python: >=3.9
697
697
  Description-Content-Type: text/markdown
698
698
  License-File: LICENSE
699
+ Requires-Dist: hdf5plugin
699
700
  Requires-Dist: colorlog
700
701
  Requires-Dist: numpy
701
702
  Requires-Dist: scipy
@@ -48,6 +48,7 @@ tests/test_profile.py
48
48
  tests/test_shape.py
49
49
  tests/test_units.py
50
50
  tests/test_utils.py
51
+ tests/evt/test_evt.py
51
52
  tests/glm/test_build_glm.py
52
53
  tests/hit/test_build_hit.py
53
54
  tests/hit/configs/args.yaml
@@ -1,3 +1,4 @@
1
+ hdf5plugin
1
2
  colorlog
2
3
  numpy
3
4
  scipy
@@ -0,0 +1,60 @@
1
+ from __future__ import annotations
2
+
3
+ import awkward as ak
4
+ import pytest
5
+ from dbetto import AttrsDict
6
+ from lgdo import Array, Struct, Table, VectorOfVectors, lh5
7
+
8
+ from reboost.build_evt import build_evt
9
+
10
+
11
+ @pytest.fixture(scope="module")
12
+ def test_gen_lh5(tmptestdir):
13
+ # write a basic lh5 file
14
+
15
+ hit_path = str(tmptestdir / "basic_hit.lh5")
16
+
17
+ data_ch1 = {}
18
+ data_ch1["energy"] = Array([100, 200, 300])
19
+ tab1 = Table(data_ch1)
20
+
21
+ data_ch2 = {}
22
+ data_ch2["energy"] = Array([2615, 2042, 100, 500])
23
+ tab2 = Table(data_ch2)
24
+
25
+ lh5.write(Struct({"det1": tab1}), "hit", hit_path, wo_mode="of")
26
+ lh5.write(Struct({"det2": tab2}), "hit", hit_path, wo_mode="append_column")
27
+
28
+ # now make a TCM
29
+
30
+ channels = ak.Array([[0], [1], [1], [0, 1], [1, 0]])
31
+ rows = ak.Array([[0], [0], [1], [1, 2], [3, 2]])
32
+
33
+ tcm = Table(
34
+ {"table_key": VectorOfVectors(channels), "row_in_table": VectorOfVectors(rows)},
35
+ attrs={"tables": "['stp/det1','stp/det2']"},
36
+ )
37
+
38
+ return hit_path, tcm
39
+
40
+
41
+ def test_basic(test_gen_lh5):
42
+ ch_groups = AttrsDict({"det1": "on", "det2": "off"})
43
+ pars = AttrsDict(
44
+ {
45
+ "p01-r001": {"reso": {"det1": [1, 0.1], "det2": [2, 0.2]}},
46
+ "p01-r002": {"reso": {"det1": [1, 0.2], "det2": [1, 0.2]}},
47
+ }
48
+ )
49
+ run_part = AttrsDict({"p01-r001": 2, "p01-r002": 3})
50
+
51
+ evts = build_evt(
52
+ test_gen_lh5[1],
53
+ hitfile=test_gen_lh5[0],
54
+ outfile=None,
55
+ channel_groups=ch_groups,
56
+ pars=pars,
57
+ run_part=run_part,
58
+ )
59
+ assert isinstance(evts, Table)
60
+ print(evts)
@@ -122,8 +122,10 @@ def test_basic(test_gen_lh5, tmptestdir):
122
122
  assert lh5.ls(outfile) == ["hit", "vtx"]
123
123
 
124
124
  with h5py.File(outfile) as h5f:
125
- assert h5f["/hit/det1/energy"].shuffle is True
126
- assert h5f["/hit/det1/energy"].compression == "lzf"
125
+ assert (
126
+ h5f["/hit/det1/energy"].id.get_create_plist().get_filter(0)[3]
127
+ == b"Zstandard compression: http://www.zstd.net"
128
+ )
127
129
 
128
130
  hits = lh5.read("hit/det1", outfile).view_as("ak")
129
131
 
@@ -100,7 +100,7 @@ def dt_map_dummy(legendtestdata):
100
100
  data = lh5.read("V99000A", legendtestdata["lh5/hpge-drift-time-maps.lh5"])
101
101
  data = AttrsDict({k: data[k].view_as("np", with_units=True) for k in ("r", "z", "drift_time")})
102
102
 
103
- nan_idx = np.where(data.drift_time.m == np.nan)
103
+ nan_idx = np.isnan(data.drift_time.m)
104
104
 
105
105
  dt_dummy_z = np.arange(0.1, 2, step=0.023)
106
106
  drift_time = np.tile(dt_dummy_z, 38).reshape((38, 83))
@@ -11,7 +11,7 @@ import pygeomtools
11
11
  import pytest
12
12
  from dbetto import AttrsDict
13
13
  from legendtestdata import LegendTestData
14
- from lgdo import Array, Table
14
+ from lgdo import Array, Struct, Table, VectorOfVectors, lh5
15
15
 
16
16
  import reboost
17
17
 
@@ -54,6 +54,58 @@ def make_gdml(test_data_configs):
54
54
  return f"{test_data_configs}/geometry.gdml"
55
55
 
56
56
 
57
+ @pytest.fixture(scope="module")
58
+ def hitfiles(tmptestdir):
59
+ # make some hit tier files
60
+ channel1 = Table(
61
+ {
62
+ "energy": Array([100, 200, 400, 300]),
63
+ "times": VectorOfVectors([[0.1], [0.2, 0.3], [0.4, 98], [2]]),
64
+ }
65
+ )
66
+ channel2 = Table(
67
+ {
68
+ "energy": Array([10, 70, 0, 56, 400, 400]),
69
+ "times": VectorOfVectors([[12], [], [-0.4, 0.4], [89], [1], [2]]),
70
+ }
71
+ )
72
+
73
+ lh5.write(Struct({"det001": channel1}), "hit", f"{tmptestdir}/hit_file_test.lh5", wo_mode="of")
74
+ lh5.write(
75
+ Struct({"det002": channel2}),
76
+ "hit",
77
+ f"{tmptestdir}/hit_file_test.lh5",
78
+ wo_mode="append_column",
79
+ )
80
+
81
+ return channel1.view_as("ak"), channel2.view_as("ak"), f"{tmptestdir}/hit_file_test.lh5"
82
+
83
+
84
+ def test_read_data_at_channel(hitfiles):
85
+ # make a TCM
86
+ tcm_channels = ak.Array([[0], [0], [0, 1], [1], [1], [0, 1], [1], [1]])
87
+ tcm_rows = ak.Array([[0], [1], [2, 0], [1], [2], [3, 3], [4], [5]])
88
+
89
+ energy = reboost.core.read_data_at_channel_as_ak(
90
+ tcm_channels, tcm_rows, hitfiles[2], "energy", "hit", {"det001": 0, "det002": 1}
91
+ )
92
+
93
+ # check the same
94
+ assert len(energy) == len(tcm_channels)
95
+ assert ak.all(ak.num(energy, axis=-1) == ak.num(tcm_channels, axis=-1))
96
+
97
+ # check the data itself
98
+ assert energy[0] == hitfiles[0].energy[0]
99
+ assert energy[1] == hitfiles[0].energy[1]
100
+ assert ak.all(energy[2] == [hitfiles[0].energy[2], hitfiles[1].energy[0]])
101
+
102
+ # also check for VoV
103
+ times = reboost.core.read_data_at_channel_as_ak(
104
+ tcm_channels, tcm_rows, hitfiles[2], "times", "hit", {"det001": 0, "det002": 1}
105
+ )
106
+ assert len(times) == len(tcm_channels)
107
+
108
+
57
109
  def test_get_objects(test_data_configs, make_gdml):
58
110
  # check basic eval
59
111
  expression = "pyg4ometry.geant4.Registry()"
@@ -98,3 +98,28 @@ def test_sample():
98
98
  # sigma float
99
99
  samples = stats.gaussian_sample([1, 2, 3], 0.1)
100
100
  assert isinstance(samples, Array)
101
+
102
+
103
+ def test_energy_res():
104
+ energy = ak.Array([[100, 100], [200], [300, 100, 100]])
105
+ channels = ak.Array([[0, 1], [1], [2, 0, 1]])
106
+
107
+ tcm_tables = {"det000": 0, "det001": 1, "det002": 2}
108
+
109
+ reso_pars = {"det000": [1, 0], "det001": [1, 0.01], "det002": [2, 0.05]}
110
+
111
+ def reso_func(energy, p0, p1):
112
+ return np.sqrt(energy * p1 + p0)
113
+
114
+ reso = stats.get_resolution(energy, channels, tcm_tables, reso_pars, reso_func)
115
+
116
+ assert len(reso) == len(energy)
117
+ assert ak.all(ak.num(reso, axis=-1) == ak.num(energy, axis=-1))
118
+
119
+ # test a few values
120
+ assert reso[0][0] == np.sqrt(100 * 0 + 1)
121
+ assert reso[0][1] == np.sqrt(100 * 0.01 + 1)
122
+
123
+ smeared = stats.apply_energy_resolution(energy, channels, tcm_tables, reso_pars, reso_func)
124
+ assert len(smeared) == len(energy)
125
+ assert ak.all(ak.num(smeared, axis=-1) == ak.num(energy, axis=-1))
@@ -93,6 +93,26 @@ def test_time_group():
93
93
  )
94
94
 
95
95
 
96
+ def test_isin_group():
97
+ channels = ak.Array([[1, 2, 3], [4, 5]])
98
+ chan_list = [1, 2]
99
+
100
+ assert ak.all(group.isin(channels, chan_list) == ak.Array([[1, 1, 0], [0, 0]]))
101
+
102
+ chan_list = [4]
103
+ assert ak.all(group.isin(channels, chan_list) == ak.Array([[0, 0, 0], [1, 0]]))
104
+
105
+ tcm_tables = {"det001": 1, "det002": 2, "det003": 3}
106
+ channels = ak.Array([[1, 2], [1], [3]])
107
+ groups = {"det001": "on", "det002": "on", "det003": "off"}
108
+ off = group.get_isin_group(channels, groups, tcm_tables, group="off")
109
+
110
+ assert ak.all(off == ak.Array([[0, 0], [0], [1]]))
111
+
112
+ on = group.get_isin_group(channels, groups, tcm_tables, group="on")
113
+ assert ak.all(on == ak.Array([[1, 1], [1], [0]]))
114
+
115
+
96
116
  def test_cluster_basic():
97
117
  trackid = ak.Array([[1, 1, 1, 2, 2, 3, 3, 7], [2, 2, 2, 3, 3, 3], [1]])
98
118
 
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
 
7
7
  import pytest
8
8
  import yaml
9
- from lgdo.types import Array, Table
9
+ from lgdo.types import Array, Table, VectorOfVectors
10
10
 
11
11
  import reboost
12
12
  from reboost.shape import group
@@ -15,6 +15,7 @@ from reboost.utils import (
15
15
  copy_units,
16
16
  get_file_dict,
17
17
  get_function_string,
18
+ get_table_names,
18
19
  get_wo_mode,
19
20
  merge_dicts,
20
21
  )
@@ -216,5 +217,11 @@ def test_units():
216
217
  assert reshaped.c.attrs["units"] == "keV"
217
218
 
218
219
 
219
- def test_get_channels():
220
- pass
220
+ def test_table_names():
221
+ names = "['hit/det001','hit/det002']"
222
+
223
+ tcm = VectorOfVectors([[]], attrs={"tables": names})
224
+
225
+ table_names = get_table_names(tcm)
226
+ assert table_names["det001"] == 0
227
+ assert table_names["det002"] == 1
@@ -1,166 +0,0 @@
1
- """A program for combining the hits from various detectors, to build events.
2
-
3
- Is able to parse a config file with the following format config file:
4
-
5
- .. code-block:: yaml
6
-
7
- channels:
8
- geds_on:
9
- - det001
10
- - det002
11
- geds_ac:
12
- - det003
13
-
14
- outputs:
15
- - energy
16
- - multiplicity
17
-
18
- operations:
19
- energy_id:
20
- channels: geds_on
21
- aggregation_mode: gather
22
- query: "hit.energy > 25"
23
- expression: tcm.channel_id
24
-
25
- energy:
26
- aggregation_mode: keep_at_ch:evt.energy_id
27
- expression: "hit.energy > 25"
28
- channels: geds_on
29
-
30
- multiplicity:
31
- channels: geds_on
32
- aggregation_mode: sum
33
- expression: "hit.energy > 25"
34
- initial: 0
35
-
36
-
37
- Must contain:
38
- - "channels": dictionary of channel groupings
39
- - "outputs": fields for the output file
40
- - "operations": operations to perform see :func:`pygama.evt.build_evt.evaluate_expression` for more details.
41
- """
42
-
43
- from __future__ import annotations
44
-
45
- import logging
46
-
47
- import awkward as ak
48
- import numpy as np
49
- from lgdo import Table
50
- from lgdo.lh5 import LH5Iterator, write
51
- from pygama.evt.build_evt import evaluate_expression
52
- from pygama.evt.utils import TCMData
53
-
54
- from . import utils
55
-
56
- log = logging.getLogger(__name__)
57
-
58
-
59
- def build_evt(
60
- hit_file: str, tcm_file: str, evt_file: str | None, config: dict, buffer: int = int(5e6)
61
- ) -> ak.Array | None:
62
- """Generates the event tier from the hit and tcm.
63
-
64
- Parameters
65
- ----------
66
- hit_file
67
- path to the hit tier file
68
- tcm_file
69
- path to the tcm tier file
70
- evt_file
71
- path to the evt tier (output) file, if `None` the :class:`Table` is returned in memory
72
- config
73
- dictionary of the configuration.
74
- buffer
75
- number of events to process simultaneously
76
-
77
- Returns
78
- -------
79
- ak.Array of the evt tier data (if the data is not saved to disk)
80
- """
81
- # create the objects needed for evaluate expression
82
-
83
- file_info = {
84
- "hit": (hit_file, "hit", "det{:03}"),
85
- "evt": (evt_file, "evt"),
86
- }
87
-
88
- # iterate through the TCM
89
-
90
- out_ak = ak.Array([])
91
- mode = "overwrite_file"
92
-
93
- # get channel groupings
94
- channels = {}
95
- for group, info in config["channels"].items():
96
- if isinstance(info, str):
97
- channels[group] = [info]
98
-
99
- elif isinstance(info, list):
100
- channels[group] = info
101
-
102
- for tcm_lh5 in LH5Iterator(tcm_file, "tcm", buffer_len=buffer):
103
- tcm_lh5_sel = tcm_lh5
104
- tcm_ak = tcm_lh5_sel.view_as("ak")
105
-
106
- tcm = TCMData(
107
- id=np.array(ak.flatten(tcm_ak.array_id)),
108
- idx=np.array(ak.flatten(tcm_ak.array_idx)),
109
- cumulative_length=np.array(np.cumsum(ak.num(tcm_ak.array_id, axis=-1))),
110
- )
111
-
112
- n_rows = len(tcm.cumulative_length)
113
- out_tab = Table(size=n_rows)
114
-
115
- for name, info in config["operations"].items():
116
- msg = f"computing field {name}"
117
- log.debug(msg)
118
-
119
- defaultv = info.get("initial", np.nan)
120
- if isinstance(defaultv, str) and (defaultv in ["np.nan", "np.inf", "-np.inf"]):
121
- defaultv = eval(defaultv)
122
-
123
- channels_use = utils.get_channels_from_groups(info.get("channels", []), channels)
124
- channels_exclude = utils.get_channels_from_groups(
125
- info.get("exclude_channels", []), channels
126
- )
127
-
128
- if "aggregation_mode" not in info:
129
- field = out_tab.eval(
130
- info["expression"].replace("evt.", ""), info.get("parameters", {})
131
- )
132
- else:
133
- field = evaluate_expression(
134
- file_info,
135
- tcm,
136
- channels_use,
137
- table=out_tab,
138
- mode=info["aggregation_mode"],
139
- expr=info["expression"],
140
- query=info.get("query", None),
141
- sorter=info.get("sort", None),
142
- channels_skip=channels_exclude,
143
- default_value=defaultv,
144
- n_rows=n_rows,
145
- )
146
-
147
- msg = f"field {field}"
148
- log.debug(msg)
149
- out_tab.add_field(name, field)
150
-
151
- # remove fields if necessary
152
- existing_cols = list(out_tab.keys())
153
- for col in existing_cols:
154
- if col not in config["outputs"]:
155
- out_tab.remove_column(col, delete=True)
156
-
157
- # write
158
- if evt_file is not None:
159
- write(out_tab, "evt", evt_file, wo_mode=mode)
160
- mode = "append"
161
- else:
162
- out_ak = ak.concatenate((out_ak, out_tab.view_as("ak")))
163
-
164
- if evt_file is None:
165
- return out_ak
166
- return None
@@ -1,57 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import logging
4
-
5
- import awkward as ak
6
- import numpy as np
7
- from lgdo import Array
8
- from numpy.typing import ArrayLike
9
-
10
- log = logging.getLogger(__name__)
11
-
12
-
13
- def gaussian_sample(mu: ArrayLike, sigma: ArrayLike | float, *, seed: int = 999) -> Array:
14
- r"""Generate samples from a gaussian.
15
-
16
- Based on:
17
-
18
- .. math::
19
-
20
- y_i \sim \mathcal{N}(\mu_i,\sigma_i)
21
-
22
- where $y_i$ is the output, $x_i$ the input (mu) and $\sigma$ is the standard
23
- deviation for each point.
24
-
25
- Parameters
26
- ----------
27
- mu
28
- the mean positions to sample from, should be a flat (ArrayLike) object.
29
- sigma
30
- the standard deviation for each input value, can also be a single float.
31
- seed
32
- the random seed.
33
-
34
- Returns
35
- -------
36
- sampled values.
37
- """
38
- # convert inputs
39
-
40
- if isinstance(mu, Array):
41
- mu = mu.view_as("np")
42
- elif isinstance(mu, ak.Array):
43
- mu = mu.to_numpy()
44
- elif not isinstance(mu, np.ndarray):
45
- mu = np.array(mu)
46
-
47
- # similar for sigma
48
- if isinstance(sigma, Array):
49
- sigma = sigma.view_as("np")
50
- elif isinstance(sigma, ak.Array):
51
- sigma = sigma.to_numpy()
52
- elif not isinstance(sigma, (float, int, np.ndarray)):
53
- sigma = np.array(sigma)
54
-
55
- rng = np.random.default_rng(seed=seed) # Create a random number generator
56
-
57
- return Array(rng.normal(loc=mu, scale=sigma))
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes