legend-pydataobj 1.8.1__py3-none-any.whl → 1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.8.1.dist-info → legend_pydataobj-1.10.0.dist-info}/METADATA +3 -2
- legend_pydataobj-1.10.0.dist-info/RECORD +55 -0
- {legend_pydataobj-1.8.1.dist-info → legend_pydataobj-1.10.0.dist-info}/WHEEL +1 -1
- lgdo/__init__.py +4 -0
- lgdo/_version.py +2 -2
- lgdo/lh5/_serializers/__init__.py +2 -0
- lgdo/lh5/_serializers/read/array.py +9 -9
- lgdo/lh5/_serializers/read/composite.py +122 -70
- lgdo/lh5/_serializers/read/encoded.py +31 -9
- lgdo/lh5/_serializers/read/ndarray.py +51 -37
- lgdo/lh5/_serializers/read/scalar.py +10 -3
- lgdo/lh5/_serializers/read/utils.py +26 -3
- lgdo/lh5/_serializers/read/vector_of_vectors.py +35 -13
- lgdo/lh5/_serializers/write/array.py +6 -1
- lgdo/lh5/_serializers/write/composite.py +20 -4
- lgdo/lh5/_serializers/write/scalar.py +6 -1
- lgdo/lh5/core.py +78 -7
- lgdo/lh5/datatype.py +1 -0
- lgdo/lh5/exceptions.py +3 -3
- lgdo/lh5/store.py +101 -11
- lgdo/lh5/tools.py +1 -1
- lgdo/lh5/utils.py +13 -2
- lgdo/lh5_store.py +1 -0
- lgdo/types/__init__.py +2 -0
- lgdo/types/histogram.py +419 -0
- lgdo/types/table.py +1 -1
- legend_pydataobj-1.8.1.dist-info/RECORD +0 -54
- {legend_pydataobj-1.8.1.dist-info → legend_pydataobj-1.10.0.dist-info}/LICENSE +0 -0
- {legend_pydataobj-1.8.1.dist-info → legend_pydataobj-1.10.0.dist-info}/entry_points.txt +0 -0
- {legend_pydataobj-1.8.1.dist-info → legend_pydataobj-1.10.0.dist-info}/top_level.txt +0 -0
lgdo/lh5/store.py
CHANGED
@@ -5,13 +5,16 @@ HDF5 files.
|
|
5
5
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
+
import bisect
|
8
9
|
import logging
|
9
10
|
import os
|
10
11
|
import sys
|
11
12
|
from collections.abc import Mapping, Sequence
|
13
|
+
from inspect import signature
|
12
14
|
from typing import Any
|
13
15
|
|
14
16
|
import h5py
|
17
|
+
import numpy as np
|
15
18
|
from numpy.typing import ArrayLike
|
16
19
|
|
17
20
|
from .. import types
|
@@ -34,7 +37,9 @@ class LH5Store:
|
|
34
37
|
lgdo.waveformtable.WaveformTable
|
35
38
|
"""
|
36
39
|
|
37
|
-
def __init__(
|
40
|
+
def __init__(
|
41
|
+
self, base_path: str = "", keep_open: bool = False, locking: bool = False
|
42
|
+
) -> None:
|
38
43
|
"""
|
39
44
|
Parameters
|
40
45
|
----------
|
@@ -43,12 +48,21 @@ class LH5Store:
|
|
43
48
|
keep_open
|
44
49
|
whether to keep files open by storing the :mod:`h5py` objects as
|
45
50
|
class attributes.
|
51
|
+
locking
|
52
|
+
whether to lock files when reading
|
46
53
|
"""
|
47
54
|
self.base_path = "" if base_path == "" else utils.expand_path(base_path)
|
48
55
|
self.keep_open = keep_open
|
56
|
+
self.locking = locking
|
49
57
|
self.files = {}
|
50
58
|
|
51
|
-
def gimme_file(
|
59
|
+
def gimme_file(
|
60
|
+
self,
|
61
|
+
lh5_file: str | h5py.File,
|
62
|
+
mode: str = "r",
|
63
|
+
page_buffer: int = 0,
|
64
|
+
**file_kwargs,
|
65
|
+
) -> h5py.File:
|
52
66
|
"""Returns a :mod:`h5py` file object from the store or creates a new one.
|
53
67
|
|
54
68
|
Parameters
|
@@ -57,12 +71,20 @@ class LH5Store:
|
|
57
71
|
LH5 file name.
|
58
72
|
mode
|
59
73
|
mode in which to open file. See :class:`h5py.File` documentation.
|
74
|
+
page_buffer
|
75
|
+
enable paged aggregation with a buffer of this size in bytes
|
76
|
+
Only used when creating a new file. Useful when writing a file
|
77
|
+
with a large number of small datasets. This is a short-hand for
|
78
|
+
``(fs_stragety="page", fs_pagesize=[page_buffer])``
|
79
|
+
file_kwargs
|
80
|
+
Keyword arguments for :class:`h5py.File`
|
60
81
|
"""
|
61
82
|
if isinstance(lh5_file, h5py.File):
|
62
83
|
return lh5_file
|
63
84
|
|
64
85
|
if mode == "r":
|
65
86
|
lh5_file = utils.expand_path(lh5_file, base_path=self.base_path)
|
87
|
+
file_kwargs["locking"] = self.locking
|
66
88
|
|
67
89
|
if lh5_file in self.files:
|
68
90
|
return self.files[lh5_file]
|
@@ -72,20 +94,30 @@ class LH5Store:
|
|
72
94
|
else:
|
73
95
|
full_path = lh5_file
|
74
96
|
|
97
|
+
file_exists = os.path.exists(full_path)
|
75
98
|
if mode != "r":
|
76
99
|
directory = os.path.dirname(full_path)
|
77
100
|
if directory != "" and not os.path.exists(directory):
|
78
101
|
log.debug(f"making path {directory}")
|
79
102
|
os.makedirs(directory)
|
80
103
|
|
81
|
-
if mode == "r" and not
|
104
|
+
if mode == "r" and not file_exists:
|
82
105
|
msg = f"file {full_path} not found"
|
83
106
|
raise FileNotFoundError(msg)
|
107
|
+
if not file_exists:
|
108
|
+
mode = "w"
|
84
109
|
|
85
|
-
if mode != "r" and
|
110
|
+
if mode != "r" and file_exists:
|
86
111
|
log.debug(f"opening existing file {full_path} in mode '{mode}'")
|
87
112
|
|
88
|
-
|
113
|
+
if mode == "w":
|
114
|
+
file_kwargs.update(
|
115
|
+
{
|
116
|
+
"fs_strategy": "page",
|
117
|
+
"fs_page_size": page_buffer,
|
118
|
+
}
|
119
|
+
)
|
120
|
+
h5f = h5py.File(full_path, mode, **file_kwargs)
|
89
121
|
|
90
122
|
if self.keep_open:
|
91
123
|
self.files[lh5_file] = h5f
|
@@ -135,6 +167,7 @@ class LH5Store:
|
|
135
167
|
obj_buf: types.LGDO = None,
|
136
168
|
obj_buf_start: int = 0,
|
137
169
|
decompress: bool = True,
|
170
|
+
**file_kwargs,
|
138
171
|
) -> tuple[types.LGDO, int]:
|
139
172
|
"""Read LH5 object data from a file in the store.
|
140
173
|
|
@@ -143,13 +176,62 @@ class LH5Store:
|
|
143
176
|
.lh5.core.read
|
144
177
|
"""
|
145
178
|
# grab files from store
|
146
|
-
if
|
147
|
-
lh5_obj =
|
179
|
+
if isinstance(lh5_file, (str, h5py.File)):
|
180
|
+
lh5_obj = self.gimme_file(lh5_file, "r", **file_kwargs)[name]
|
148
181
|
else:
|
149
|
-
|
150
|
-
|
182
|
+
lh5_files = list(lh5_file)
|
183
|
+
n_rows_read = 0
|
184
|
+
|
185
|
+
for i, h5f in enumerate(lh5_files):
|
186
|
+
if (
|
187
|
+
isinstance(idx, (list, tuple))
|
188
|
+
and len(idx) > 0
|
189
|
+
and not np.isscalar(idx[0])
|
190
|
+
):
|
191
|
+
# a list of lists: must be one per file
|
192
|
+
idx_i = idx[i]
|
193
|
+
elif idx is not None:
|
194
|
+
# make idx a proper tuple if it's not one already
|
195
|
+
if not (isinstance(idx, tuple) and len(idx) == 1):
|
196
|
+
idx = (idx,)
|
197
|
+
# idx is a long continuous array
|
198
|
+
n_rows_i = utils.read_n_rows(name, h5f)
|
199
|
+
# find the length of the subset of idx that contains indices
|
200
|
+
# that are less than n_rows_i
|
201
|
+
n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
|
202
|
+
# now split idx into idx_i and the remainder
|
203
|
+
idx_i = np.array(idx[0])[:n_rows_to_read_i]
|
204
|
+
idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
|
205
|
+
else:
|
206
|
+
idx_i = None
|
207
|
+
n_rows_i = n_rows - n_rows_read
|
208
|
+
|
209
|
+
obj_buf, n_rows_read_i = self.read(
|
210
|
+
name,
|
211
|
+
h5f,
|
212
|
+
start_row,
|
213
|
+
n_rows_i,
|
214
|
+
idx_i,
|
215
|
+
use_h5idx,
|
216
|
+
field_mask,
|
217
|
+
obj_buf,
|
218
|
+
obj_buf_start,
|
219
|
+
decompress,
|
220
|
+
)
|
221
|
+
|
222
|
+
n_rows_read += n_rows_read_i
|
223
|
+
if n_rows_read >= n_rows or obj_buf is None:
|
224
|
+
return obj_buf, n_rows_read
|
225
|
+
start_row = 0
|
226
|
+
obj_buf_start += n_rows_read_i
|
227
|
+
return obj_buf, n_rows_read
|
228
|
+
|
229
|
+
if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
|
230
|
+
idx = idx[0]
|
151
231
|
return _serializers._h5_read_lgdo(
|
152
|
-
lh5_obj,
|
232
|
+
lh5_obj.id,
|
233
|
+
lh5_obj.file.filename,
|
234
|
+
lh5_obj.name,
|
153
235
|
start_row=start_row,
|
154
236
|
n_rows=n_rows,
|
155
237
|
idx=idx,
|
@@ -170,6 +252,7 @@ class LH5Store:
|
|
170
252
|
n_rows: int | None = None,
|
171
253
|
wo_mode: str = "append",
|
172
254
|
write_start: int = 0,
|
255
|
+
page_buffer: int = 0,
|
173
256
|
**h5py_kwargs,
|
174
257
|
) -> None:
|
175
258
|
"""Write an LGDO into an LH5 file.
|
@@ -199,10 +282,17 @@ class LH5Store:
|
|
199
282
|
# write_object:overwrite.
|
200
283
|
mode = "w" if wo_mode == "of" else "a"
|
201
284
|
|
285
|
+
file_kwargs = {
|
286
|
+
k: h5py_kwargs[k]
|
287
|
+
for k in h5py_kwargs & signature(h5py.File).parameters.keys()
|
288
|
+
}
|
289
|
+
|
202
290
|
return _serializers._h5_write_lgdo(
|
203
291
|
obj,
|
204
292
|
name,
|
205
|
-
self.gimme_file(
|
293
|
+
self.gimme_file(
|
294
|
+
lh5_file, mode=mode, page_buffer=page_buffer, **file_kwargs
|
295
|
+
),
|
206
296
|
group=group,
|
207
297
|
start_row=start_row,
|
208
298
|
n_rows=n_rows,
|
lgdo/lh5/tools.py
CHANGED
lgdo/lh5/utils.py
CHANGED
@@ -125,7 +125,12 @@ def get_h5_group(
|
|
125
125
|
else:
|
126
126
|
group = base_group.create_group(group)
|
127
127
|
if grp_attrs is not None:
|
128
|
-
group.attrs.update(
|
128
|
+
group.attrs.update(
|
129
|
+
{
|
130
|
+
k: v.encode("utf-8") if isinstance(v, str) else v
|
131
|
+
for k, v in grp_attrs.items()
|
132
|
+
}
|
133
|
+
)
|
129
134
|
return group
|
130
135
|
if (
|
131
136
|
grp_attrs is not None
|
@@ -141,7 +146,13 @@ def get_h5_group(
|
|
141
146
|
log.debug(f"overwriting {group}.attrs...")
|
142
147
|
for key in group.attrs:
|
143
148
|
group.attrs.pop(key)
|
144
|
-
|
149
|
+
|
150
|
+
group.attrs.update(
|
151
|
+
{
|
152
|
+
k: v.encode("utf-8") if isinstance(v, str) else v
|
153
|
+
for k, v in grp_attrs.items()
|
154
|
+
}
|
155
|
+
)
|
145
156
|
|
146
157
|
return group
|
147
158
|
|
lgdo/lh5_store.py
CHANGED
lgdo/types/__init__.py
CHANGED
@@ -6,6 +6,7 @@ from .array import Array
|
|
6
6
|
from .arrayofequalsizedarrays import ArrayOfEqualSizedArrays
|
7
7
|
from .encoded import ArrayOfEncodedEqualSizedArrays, VectorOfEncodedVectors
|
8
8
|
from .fixedsizearray import FixedSizeArray
|
9
|
+
from .histogram import Histogram
|
9
10
|
from .lgdo import LGDO
|
10
11
|
from .scalar import Scalar
|
11
12
|
from .struct import Struct
|
@@ -18,6 +19,7 @@ __all__ = [
|
|
18
19
|
"ArrayOfEqualSizedArrays",
|
19
20
|
"ArrayOfEncodedEqualSizedArrays",
|
20
21
|
"FixedSizeArray",
|
22
|
+
"Histogram",
|
21
23
|
"LGDO",
|
22
24
|
"Scalar",
|
23
25
|
"Struct",
|
lgdo/types/histogram.py
ADDED
@@ -0,0 +1,419 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
from collections.abc import Iterable
|
5
|
+
from typing import Any
|
6
|
+
|
7
|
+
import hist
|
8
|
+
import numpy as np
|
9
|
+
from numpy.typing import NDArray
|
10
|
+
|
11
|
+
from .array import Array
|
12
|
+
from .lgdo import LGDO
|
13
|
+
from .scalar import Scalar
|
14
|
+
from .struct import Struct
|
15
|
+
|
16
|
+
log = logging.getLogger(__name__)
|
17
|
+
|
18
|
+
|
19
|
+
class Histogram(Struct):
|
20
|
+
class Axis(Struct):
|
21
|
+
def __init__(
|
22
|
+
self,
|
23
|
+
edges: NDArray | Array | None,
|
24
|
+
first: float | None,
|
25
|
+
last: float | None,
|
26
|
+
step: float | None,
|
27
|
+
closedleft: bool = True,
|
28
|
+
binedge_attrs: dict[str, Any] | None = None,
|
29
|
+
) -> None:
|
30
|
+
"""
|
31
|
+
A special struct to group axis parameters for use in a :class:`Histogram`.
|
32
|
+
|
33
|
+
Depending on the parameters, an axis either can have
|
34
|
+
|
35
|
+
* a binning described by a range object, if ``first``, ``last`` and ``step``
|
36
|
+
are passed, or
|
37
|
+
* a variable binning described by the ``edges`` array.
|
38
|
+
|
39
|
+
Parameters
|
40
|
+
----------
|
41
|
+
edges
|
42
|
+
an array of edges that describe the binning of this axis.
|
43
|
+
first
|
44
|
+
left edge of the leftmost bin
|
45
|
+
last
|
46
|
+
right edge of the rightmost bin
|
47
|
+
step
|
48
|
+
step size (width of each bin)
|
49
|
+
closedleft
|
50
|
+
if True, the bin intervals are left-closed :math:`[a,b)`;
|
51
|
+
if False, intervals are right-closed :math:`(a,b]`.
|
52
|
+
binedge_attrs
|
53
|
+
attributes that will be added to the ``binedges`` LGDO that
|
54
|
+
is part of the axis struct.
|
55
|
+
"""
|
56
|
+
if edges is not None and (
|
57
|
+
first is not None or last is not None or step is not None
|
58
|
+
):
|
59
|
+
msg = "can only construct Axis either from edges or from range"
|
60
|
+
raise ValueError(msg)
|
61
|
+
if edges is None and (first is None or last is None or step is None):
|
62
|
+
msg = "did not pass all range parameters"
|
63
|
+
raise ValueError(msg)
|
64
|
+
|
65
|
+
if edges is None:
|
66
|
+
edges = Struct(
|
67
|
+
{
|
68
|
+
"first": Scalar(first),
|
69
|
+
"last": Scalar(last),
|
70
|
+
"step": Scalar(step),
|
71
|
+
},
|
72
|
+
binedge_attrs,
|
73
|
+
)
|
74
|
+
else:
|
75
|
+
if not isinstance(edges, Array):
|
76
|
+
edges = Array(edges, attrs=binedge_attrs)
|
77
|
+
elif binedge_attrs is not None:
|
78
|
+
msg = "passed both binedge as Array LGDO instance and binedge_attrs"
|
79
|
+
raise ValueError(msg)
|
80
|
+
|
81
|
+
if len(edges.nda.shape) != 1:
|
82
|
+
msg = "must pass an array<1>{real} as edges vector"
|
83
|
+
raise ValueError(msg)
|
84
|
+
|
85
|
+
super().__init__({"binedges": edges, "closedleft": Scalar(closedleft)})
|
86
|
+
|
87
|
+
@classmethod
|
88
|
+
def from_edges(
|
89
|
+
cls,
|
90
|
+
edges: NDArray | Iterable[float],
|
91
|
+
binedge_attrs: dict[str, Any] | None = None,
|
92
|
+
) -> Histogram.Axis:
|
93
|
+
"""Create a new axis with variable binning described by ``edges``."""
|
94
|
+
edges = np.array(edges)
|
95
|
+
return cls(edges, None, None, None, True, binedge_attrs)
|
96
|
+
|
97
|
+
@classmethod
|
98
|
+
def from_range_edges(
|
99
|
+
cls,
|
100
|
+
edges: NDArray | Iterable[float],
|
101
|
+
binedge_attrs: dict[str, Any] | None = None,
|
102
|
+
) -> Histogram.Axis:
|
103
|
+
"""Create a new axis from the binning described by ``edges``, but try to convert it to
|
104
|
+
a evenly-spaced range object first.
|
105
|
+
|
106
|
+
.. warning ::
|
107
|
+
|
108
|
+
This function might return a wrong binning, especially in the case of very small
|
109
|
+
magnitudes of the spacing. See the documentation of :func:`numpy.isclose` for
|
110
|
+
details. Use this function only with caution, if you know the binning's order of
|
111
|
+
magniutude.
|
112
|
+
"""
|
113
|
+
edges = np.array(edges)
|
114
|
+
edge_diff = np.diff(edges)
|
115
|
+
if np.any(~np.isclose(edge_diff, edge_diff[0])):
|
116
|
+
return cls(edges, None, None, None, True, binedge_attrs)
|
117
|
+
return cls(None, edges[0], edges[-1], edge_diff[0], True, binedge_attrs)
|
118
|
+
|
119
|
+
@property
|
120
|
+
def is_range(self) -> bool:
|
121
|
+
return isinstance(self["binedges"], Struct)
|
122
|
+
|
123
|
+
@property
|
124
|
+
def first(self) -> float:
|
125
|
+
if not self.is_range:
|
126
|
+
msg = "Axis is not a range"
|
127
|
+
raise TypeError(msg)
|
128
|
+
return self["binedges"]["first"].value
|
129
|
+
|
130
|
+
@property
|
131
|
+
def last(self) -> float:
|
132
|
+
if not self.is_range:
|
133
|
+
msg = "Axis is not a range"
|
134
|
+
raise TypeError(msg)
|
135
|
+
return self["binedges"]["last"].value
|
136
|
+
|
137
|
+
@property
|
138
|
+
def step(self) -> float:
|
139
|
+
if not self.is_range:
|
140
|
+
msg = "Axis is not a range"
|
141
|
+
raise TypeError(msg)
|
142
|
+
return self["binedges"]["step"].value
|
143
|
+
|
144
|
+
@property
|
145
|
+
def closedleft(self) -> bool:
|
146
|
+
return self["closedleft"].value
|
147
|
+
|
148
|
+
@property
|
149
|
+
def nbins(self) -> int:
|
150
|
+
"""Return the number of bins, both for variable and range binning."""
|
151
|
+
if self.is_range:
|
152
|
+
bins = (self.last - self.first) / self.step
|
153
|
+
bins_int = int(np.rint(bins))
|
154
|
+
assert np.isclose(bins, bins_int)
|
155
|
+
return bins_int
|
156
|
+
return len(self["binedges"].nda) - 1
|
157
|
+
|
158
|
+
@property
|
159
|
+
def edges(self) -> NDArray:
|
160
|
+
"""Return all binedges, both for variable and range binning."""
|
161
|
+
if self.is_range:
|
162
|
+
return np.linspace(self.first, self.last, self.nbins + 1)
|
163
|
+
return self["binedges"].nda
|
164
|
+
|
165
|
+
def __str__(self) -> str:
|
166
|
+
thr_orig = np.get_printoptions()["threshold"]
|
167
|
+
np.set_printoptions(threshold=8)
|
168
|
+
|
169
|
+
if self.is_range:
|
170
|
+
string = f"first={self.first}, last={self.last}, step={self.step}"
|
171
|
+
else:
|
172
|
+
string = f"edges={self.edges}"
|
173
|
+
string += f", closedleft={self.closedleft}"
|
174
|
+
|
175
|
+
attrs = self.get_binedgeattrs()
|
176
|
+
if attrs:
|
177
|
+
string += f" with attrs={attrs}"
|
178
|
+
|
179
|
+
np.set_printoptions(threshold=thr_orig)
|
180
|
+
return string
|
181
|
+
|
182
|
+
def get_binedgeattrs(self, datatype: bool = False) -> dict:
|
183
|
+
"""Return a copy of the LGDO attributes dictionary of the binedges
|
184
|
+
|
185
|
+
Parameters
|
186
|
+
----------
|
187
|
+
datatype
|
188
|
+
if ``False``, remove ``datatype`` attribute from the output
|
189
|
+
dictionary.
|
190
|
+
"""
|
191
|
+
return self["binedges"].getattrs(datatype)
|
192
|
+
|
193
|
+
def __init__(
|
194
|
+
self,
|
195
|
+
weights: hist.Hist | NDArray | Array,
|
196
|
+
binning: None
|
197
|
+
| Iterable[Histogram.Axis]
|
198
|
+
| Iterable[NDArray]
|
199
|
+
| Iterable[tuple[float, float, float]] = None,
|
200
|
+
isdensity: bool = False,
|
201
|
+
attrs: dict[str, Any] | None = None,
|
202
|
+
binedge_attrs: dict[str, Any] | None = None,
|
203
|
+
flow: bool = True,
|
204
|
+
) -> None:
|
205
|
+
"""A special struct to contain histogrammed data.
|
206
|
+
|
207
|
+
Parameters
|
208
|
+
----------
|
209
|
+
weights
|
210
|
+
An :class:`numpy.ndarray` to be used for this object's internal
|
211
|
+
array, or a :class:`hist.Hist` object, whose data view is used
|
212
|
+
for this object's internal array.
|
213
|
+
Note: the array/histogram view is used directly, not copied
|
214
|
+
binning
|
215
|
+
* has to by None if a :class:`hist.Hist` has been passed as ``weights``
|
216
|
+
* can be a list of pre-initialized :class:`Histogram.Axis`
|
217
|
+
* can be a list of tuples, each representing a range, ``(first, last, step)``
|
218
|
+
* can be a list of numpy arrays, as returned by :func:`numpy.histogramdd`.
|
219
|
+
isdensity
|
220
|
+
If True, all bin contents represent a density (amount per volume), and not
|
221
|
+
an absolute amount.
|
222
|
+
binedge_attrs
|
223
|
+
attributes that will be added to the all ``binedges`` of all axes.
|
224
|
+
This does not work if :class:`Histogram.Axis` instances are directly passed
|
225
|
+
as binning.
|
226
|
+
attrs
|
227
|
+
a set of user attributes to be carried along with this LGDO.
|
228
|
+
flow
|
229
|
+
If ``False``, discard counts in over-/underflow bins of the passed
|
230
|
+
:class:`hist.Hist` instance. If ``True``, this data will also be discarded,
|
231
|
+
but a warning is emitted.
|
232
|
+
|
233
|
+
.. note ::
|
234
|
+
|
235
|
+
:class:`Histogram` does not support storing counts in overflow or
|
236
|
+
underflow bins. This parameter just controls, whether a warning will
|
237
|
+
be emitted.
|
238
|
+
"""
|
239
|
+
if isinstance(weights, hist.Hist):
|
240
|
+
if binning is not None:
|
241
|
+
msg = "not allowed to pass custom binning if constructing from hist.Hist instance"
|
242
|
+
raise ValueError(msg)
|
243
|
+
if isdensity:
|
244
|
+
msg = "not allowed to pass isdensity=True if constructing from hist.Hist instance"
|
245
|
+
raise ValueError(msg)
|
246
|
+
|
247
|
+
if weights.sum(flow=True) != weights.sum(flow=False) and flow:
|
248
|
+
log.warning(
|
249
|
+
"flow bins of hist.Hist cannot be represented, their counts are discarded"
|
250
|
+
)
|
251
|
+
weights_view = weights.view(flow=False)
|
252
|
+
if type(weights_view) is not np.ndarray:
|
253
|
+
msg = "only simple numpy-backed storages can be used in a hist.Hist"
|
254
|
+
raise ValueError(msg)
|
255
|
+
w = Array(weights_view)
|
256
|
+
|
257
|
+
b = []
|
258
|
+
for ax in weights.axes:
|
259
|
+
if not isinstance(ax, (hist.axis.Regular, hist.axis.Variable)):
|
260
|
+
msg = "only regular or variable axes of hist.Hist can be converted"
|
261
|
+
raise ValueError(msg)
|
262
|
+
if isinstance(ax, hist.axis.Regular):
|
263
|
+
step = (ax.edges[-1] - ax.edges[0]) / ax.size
|
264
|
+
bax = Histogram.Axis(
|
265
|
+
None, ax.edges[0], ax.edges[-1], step, True, binedge_attrs
|
266
|
+
)
|
267
|
+
b.append(bax)
|
268
|
+
else:
|
269
|
+
b.append(Histogram.Axis.from_edges(ax.edges, binedge_attrs))
|
270
|
+
else:
|
271
|
+
if binning is None:
|
272
|
+
msg = "need to also pass binning if passing histogram as array"
|
273
|
+
raise ValueError(msg)
|
274
|
+
w = weights if isinstance(weights, Array) else Array(weights)
|
275
|
+
|
276
|
+
if all(isinstance(ax, Histogram.Axis) for ax in binning):
|
277
|
+
if binedge_attrs is not None:
|
278
|
+
msg = "passed both binedges as Axis instances and binedge_attrs"
|
279
|
+
raise ValueError(msg)
|
280
|
+
b = binning
|
281
|
+
elif all(isinstance(ax, np.ndarray) for ax in binning):
|
282
|
+
b = [Histogram.Axis.from_edges(ax, binedge_attrs) for ax in binning]
|
283
|
+
elif all(isinstance(ax, tuple) for ax in binning):
|
284
|
+
b = [Histogram.Axis(None, *ax, True, binedge_attrs) for ax in binning]
|
285
|
+
else:
|
286
|
+
msg = "invalid binning object passed"
|
287
|
+
raise ValueError(msg)
|
288
|
+
|
289
|
+
if len(binning) != len(w.nda.shape):
|
290
|
+
msg = "binning and weight dimensions do not match"
|
291
|
+
raise ValueError(msg)
|
292
|
+
for i, ax in enumerate(b):
|
293
|
+
if ax.nbins != w.nda.shape[i]:
|
294
|
+
msg = f"bin count does not match weight count along axis {i}"
|
295
|
+
raise ValueError(msg)
|
296
|
+
|
297
|
+
b = Struct({f"axis_{i}": a for i, a in enumerate(b)})
|
298
|
+
|
299
|
+
super().__init__(
|
300
|
+
{"binning": b, "weights": w, "isdensity": Scalar(isdensity)},
|
301
|
+
attrs,
|
302
|
+
)
|
303
|
+
|
304
|
+
@property
|
305
|
+
def isdensity(self) -> bool:
|
306
|
+
return self["isdensity"].value
|
307
|
+
|
308
|
+
@property
|
309
|
+
def weights(self) -> Array:
|
310
|
+
return self["weights"]
|
311
|
+
|
312
|
+
@property
|
313
|
+
def binning(self) -> tuple[Histogram.Axis, ...]:
|
314
|
+
bins = sorted(self["binning"].items())
|
315
|
+
assert all(isinstance(v, Histogram.Axis) for k, v in bins)
|
316
|
+
return tuple(v for _, v in bins)
|
317
|
+
|
318
|
+
def __setitem__(self, name: str, obj: LGDO) -> None:
|
319
|
+
# do not allow for new attributes on this
|
320
|
+
msg = "histogram fields cannot be mutated"
|
321
|
+
raise TypeError(msg)
|
322
|
+
|
323
|
+
def __getattr__(self, name: str) -> None:
|
324
|
+
# do not allow for new attributes on this
|
325
|
+
msg = "histogram fields cannot be mutated"
|
326
|
+
raise TypeError(msg)
|
327
|
+
|
328
|
+
def add_field(self, name: str | int, obj: LGDO) -> None: # noqa: ARG002
|
329
|
+
"""
|
330
|
+
.. error ::
|
331
|
+
|
332
|
+
Not applicable: A histogram cannot be used as a struct
|
333
|
+
"""
|
334
|
+
msg = "histogram fields cannot be mutated"
|
335
|
+
raise TypeError(msg)
|
336
|
+
|
337
|
+
def remove_field(self, name: str | int, delete: bool = False) -> None: # noqa: ARG002
|
338
|
+
"""
|
339
|
+
.. error ::
|
340
|
+
|
341
|
+
Not applicable: A histogram cannot be used as a struct
|
342
|
+
"""
|
343
|
+
msg = "histogram fields cannot be mutated"
|
344
|
+
raise TypeError(msg)
|
345
|
+
|
346
|
+
def __str__(self) -> str:
|
347
|
+
string = "{\n"
|
348
|
+
for k, v in enumerate(self.binning):
|
349
|
+
string += f" 'axis_{k}': {v},\n"
|
350
|
+
string += "}"
|
351
|
+
|
352
|
+
attrs = self.getattrs()
|
353
|
+
if attrs:
|
354
|
+
string += f" with attrs={attrs}"
|
355
|
+
|
356
|
+
return string
|
357
|
+
|
358
|
+
def view_as(
|
359
|
+
self,
|
360
|
+
library: str,
|
361
|
+
) -> tuple[NDArray] | hist.Hist:
|
362
|
+
r"""View the histogram data as a third-party format data structure.
|
363
|
+
|
364
|
+
This is typically a zero-copy or nearly zero-copy operation.
|
365
|
+
|
366
|
+
Supported third-party formats are:
|
367
|
+
|
368
|
+
- ``np``: returns a tuple of binning and an :class:`np.ndarray`, similar
|
369
|
+
to the return value of :func:`numpy.histogramdd`.
|
370
|
+
- ``hist``: returns an :class:`hist.Hist` that holds **a copy** of this
|
371
|
+
histogram's data.
|
372
|
+
|
373
|
+
Warning
|
374
|
+
-------
|
375
|
+
Viewing as ``hist`` will perform a copy of the stored histogram data.
|
376
|
+
|
377
|
+
Parameters
|
378
|
+
----------
|
379
|
+
library
|
380
|
+
format of the returned data view.
|
381
|
+
|
382
|
+
See Also
|
383
|
+
--------
|
384
|
+
.LGDO.view_as
|
385
|
+
"""
|
386
|
+
if library == "hist":
|
387
|
+
if self.isdensity:
|
388
|
+
msg = "hist.Hist cannot represent density histograms"
|
389
|
+
raise ValueError(msg)
|
390
|
+
|
391
|
+
hist_axes = []
|
392
|
+
for a in self.binning:
|
393
|
+
if not a.closedleft:
|
394
|
+
msg = "hist.Hist cannot represent right-closed intervals"
|
395
|
+
raise ValueError(msg)
|
396
|
+
if a.is_range:
|
397
|
+
hist_ax = hist.axis.Regular(
|
398
|
+
bins=a.nbins,
|
399
|
+
start=a.first,
|
400
|
+
stop=a.last,
|
401
|
+
underflow=False,
|
402
|
+
overflow=False,
|
403
|
+
)
|
404
|
+
else:
|
405
|
+
hist_ax = hist.axis.Variable(
|
406
|
+
a.edges,
|
407
|
+
underflow=False,
|
408
|
+
overflow=False,
|
409
|
+
)
|
410
|
+
hist_axes.append(hist_ax)
|
411
|
+
|
412
|
+
return hist.Hist(*hist_axes, data=self.weights.view_as("np"))
|
413
|
+
|
414
|
+
if library == "np":
|
415
|
+
edges = tuple([a.edges for a in self.binning])
|
416
|
+
return self.weights.view_as("np"), edges
|
417
|
+
|
418
|
+
msg = f"{library!r} is not a supported third-party format."
|
419
|
+
raise TypeError(msg)
|
lgdo/types/table.py
CHANGED
@@ -168,7 +168,7 @@ class Table(Struct):
|
|
168
168
|
self.add_field(name, obj, use_obj_size=use_obj_size)
|
169
169
|
|
170
170
|
def remove_column(self, name: str, delete: bool = False) -> None:
|
171
|
-
"""Alias for :meth
|
171
|
+
"""Alias for :meth:`Struct.remove_field` using table terminology 'column'."""
|
172
172
|
super().remove_field(name, delete)
|
173
173
|
|
174
174
|
def join(
|