legend-pydataobj 1.9.0__py3-none-any.whl → 1.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/METADATA +2 -2
- legend_pydataobj-1.10.1.dist-info/RECORD +55 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/WHEEL +1 -1
- lgdo/_version.py +2 -2
- lgdo/compression/radware.py +8 -16
- lgdo/lh5/_serializers/read/array.py +9 -9
- lgdo/lh5/_serializers/read/composite.py +67 -78
- lgdo/lh5/_serializers/read/encoded.py +31 -9
- lgdo/lh5/_serializers/read/ndarray.py +55 -42
- lgdo/lh5/_serializers/read/scalar.py +10 -3
- lgdo/lh5/_serializers/read/utils.py +165 -3
- lgdo/lh5/_serializers/read/vector_of_vectors.py +36 -14
- lgdo/lh5/_serializers/write/array.py +6 -1
- lgdo/lh5/_serializers/write/composite.py +14 -5
- lgdo/lh5/_serializers/write/scalar.py +6 -1
- lgdo/lh5/core.py +81 -7
- lgdo/lh5/exceptions.py +3 -3
- lgdo/lh5/iterator.py +258 -74
- lgdo/lh5/store.py +116 -12
- lgdo/lh5/tools.py +1 -1
- lgdo/lh5/utils.py +29 -44
- lgdo/types/histogram.py +122 -6
- lgdo/types/table.py +2 -2
- lgdo/types/vectorofvectors.py +1 -1
- legend_pydataobj-1.9.0.dist-info/RECORD +0 -55
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/LICENSE +0 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/entry_points.txt +0 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/top_level.txt +0 -0
lgdo/lh5/store.py
CHANGED
@@ -5,13 +5,17 @@ HDF5 files.
|
|
5
5
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
+
import bisect
|
8
9
|
import logging
|
9
10
|
import os
|
10
11
|
import sys
|
12
|
+
from collections import OrderedDict
|
11
13
|
from collections.abc import Mapping, Sequence
|
14
|
+
from inspect import signature
|
12
15
|
from typing import Any
|
13
16
|
|
14
17
|
import h5py
|
18
|
+
import numpy as np
|
15
19
|
from numpy.typing import ArrayLike
|
16
20
|
|
17
21
|
from .. import types
|
@@ -34,7 +38,9 @@ class LH5Store:
|
|
34
38
|
lgdo.waveformtable.WaveformTable
|
35
39
|
"""
|
36
40
|
|
37
|
-
def __init__(
|
41
|
+
def __init__(
|
42
|
+
self, base_path: str = "", keep_open: bool = False, locking: bool = False
|
43
|
+
) -> None:
|
38
44
|
"""
|
39
45
|
Parameters
|
40
46
|
----------
|
@@ -42,13 +48,23 @@ class LH5Store:
|
|
42
48
|
directory path to prepend to LH5 files.
|
43
49
|
keep_open
|
44
50
|
whether to keep files open by storing the :mod:`h5py` objects as
|
45
|
-
class attributes.
|
51
|
+
class attributes. If ``keep_open`` is an ``int``, keep only the
|
52
|
+
``n`` most recently opened files; if ``True``, no limit
|
53
|
+
locking
|
54
|
+
whether to lock files when reading
|
46
55
|
"""
|
47
56
|
self.base_path = "" if base_path == "" else utils.expand_path(base_path)
|
48
57
|
self.keep_open = keep_open
|
49
|
-
self.
|
58
|
+
self.locking = locking
|
59
|
+
self.files = OrderedDict()
|
50
60
|
|
51
|
-
def gimme_file(
|
61
|
+
def gimme_file(
|
62
|
+
self,
|
63
|
+
lh5_file: str | h5py.File,
|
64
|
+
mode: str = "r",
|
65
|
+
page_buffer: int = 0,
|
66
|
+
**file_kwargs,
|
67
|
+
) -> h5py.File:
|
52
68
|
"""Returns a :mod:`h5py` file object from the store or creates a new one.
|
53
69
|
|
54
70
|
Parameters
|
@@ -57,14 +73,23 @@ class LH5Store:
|
|
57
73
|
LH5 file name.
|
58
74
|
mode
|
59
75
|
mode in which to open file. See :class:`h5py.File` documentation.
|
76
|
+
page_buffer
|
77
|
+
enable paged aggregation with a buffer of this size in bytes
|
78
|
+
Only used when creating a new file. Useful when writing a file
|
79
|
+
with a large number of small datasets. This is a short-hand for
|
80
|
+
``(fs_stragety="page", fs_pagesize=[page_buffer])``
|
81
|
+
file_kwargs
|
82
|
+
Keyword arguments for :class:`h5py.File`
|
60
83
|
"""
|
61
84
|
if isinstance(lh5_file, h5py.File):
|
62
85
|
return lh5_file
|
63
86
|
|
64
87
|
if mode == "r":
|
65
88
|
lh5_file = utils.expand_path(lh5_file, base_path=self.base_path)
|
89
|
+
file_kwargs["locking"] = self.locking
|
66
90
|
|
67
91
|
if lh5_file in self.files:
|
92
|
+
self.files.move_to_end(lh5_file)
|
68
93
|
return self.files[lh5_file]
|
69
94
|
|
70
95
|
if self.base_path != "":
|
@@ -72,22 +97,34 @@ class LH5Store:
|
|
72
97
|
else:
|
73
98
|
full_path = lh5_file
|
74
99
|
|
100
|
+
file_exists = os.path.exists(full_path)
|
75
101
|
if mode != "r":
|
76
102
|
directory = os.path.dirname(full_path)
|
77
103
|
if directory != "" and not os.path.exists(directory):
|
78
104
|
log.debug(f"making path {directory}")
|
79
105
|
os.makedirs(directory)
|
80
106
|
|
81
|
-
if mode == "r" and not
|
107
|
+
if mode == "r" and not file_exists:
|
82
108
|
msg = f"file {full_path} not found"
|
83
109
|
raise FileNotFoundError(msg)
|
110
|
+
if not file_exists:
|
111
|
+
mode = "w"
|
84
112
|
|
85
|
-
if mode != "r" and
|
113
|
+
if mode != "r" and file_exists:
|
86
114
|
log.debug(f"opening existing file {full_path} in mode '{mode}'")
|
87
115
|
|
88
|
-
|
116
|
+
if mode == "w":
|
117
|
+
file_kwargs.update(
|
118
|
+
{
|
119
|
+
"fs_strategy": "page",
|
120
|
+
"fs_page_size": page_buffer,
|
121
|
+
}
|
122
|
+
)
|
123
|
+
h5f = h5py.File(full_path, mode, **file_kwargs)
|
89
124
|
|
90
125
|
if self.keep_open:
|
126
|
+
if isinstance(self.keep_open, int) and len(self.files) >= self.keep_open:
|
127
|
+
self.files.popitem(last=False)
|
91
128
|
self.files[lh5_file] = h5f
|
92
129
|
|
93
130
|
return h5f
|
@@ -135,6 +172,7 @@ class LH5Store:
|
|
135
172
|
obj_buf: types.LGDO = None,
|
136
173
|
obj_buf_start: int = 0,
|
137
174
|
decompress: bool = True,
|
175
|
+
**file_kwargs,
|
138
176
|
) -> tuple[types.LGDO, int]:
|
139
177
|
"""Read LH5 object data from a file in the store.
|
140
178
|
|
@@ -143,13 +181,65 @@ class LH5Store:
|
|
143
181
|
.lh5.core.read
|
144
182
|
"""
|
145
183
|
# grab files from store
|
146
|
-
if
|
147
|
-
lh5_obj =
|
184
|
+
if isinstance(lh5_file, (str, h5py.File)):
|
185
|
+
lh5_obj = self.gimme_file(lh5_file, "r", **file_kwargs)[name]
|
148
186
|
else:
|
149
|
-
|
187
|
+
lh5_files = list(lh5_file)
|
188
|
+
n_rows_read = 0
|
189
|
+
|
190
|
+
for i, h5f in enumerate(lh5_files):
|
191
|
+
if (
|
192
|
+
isinstance(idx, (list, tuple))
|
193
|
+
and len(idx) > 0
|
194
|
+
and not np.isscalar(idx[0])
|
195
|
+
):
|
196
|
+
# a list of lists: must be one per file
|
197
|
+
idx_i = idx[i]
|
198
|
+
elif idx is not None:
|
199
|
+
# make idx a proper tuple if it's not one already
|
200
|
+
if not (isinstance(idx, tuple) and len(idx) == 1):
|
201
|
+
idx = (idx,)
|
202
|
+
# idx is a long continuous array
|
203
|
+
n_rows_i = utils.read_n_rows(name, h5f)
|
204
|
+
# find the length of the subset of idx that contains indices
|
205
|
+
# that are less than n_rows_i
|
206
|
+
n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
|
207
|
+
# now split idx into idx_i and the remainder
|
208
|
+
idx_i = np.array(idx[0])[:n_rows_to_read_i]
|
209
|
+
idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
|
210
|
+
else:
|
211
|
+
idx_i = None
|
212
|
+
n_rows_i = n_rows - n_rows_read
|
213
|
+
|
214
|
+
obj_buf, n_rows_read_i = self.read(
|
215
|
+
name,
|
216
|
+
h5f,
|
217
|
+
start_row,
|
218
|
+
n_rows_i,
|
219
|
+
idx_i,
|
220
|
+
use_h5idx,
|
221
|
+
field_mask,
|
222
|
+
obj_buf,
|
223
|
+
obj_buf_start,
|
224
|
+
decompress,
|
225
|
+
)
|
226
|
+
|
227
|
+
n_rows_read += n_rows_read_i
|
228
|
+
if n_rows_read >= n_rows or obj_buf is None:
|
229
|
+
return obj_buf, n_rows_read
|
230
|
+
start_row = 0
|
231
|
+
obj_buf_start += n_rows_read_i
|
232
|
+
return obj_buf, n_rows_read
|
233
|
+
|
234
|
+
if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
|
235
|
+
idx = idx[0]
|
236
|
+
if isinstance(idx, np.ndarray) and idx.dtype == np.dtype("?"):
|
237
|
+
idx = np.where(idx)[0]
|
150
238
|
|
151
239
|
return _serializers._h5_read_lgdo(
|
152
|
-
lh5_obj,
|
240
|
+
lh5_obj.id,
|
241
|
+
lh5_obj.file.filename,
|
242
|
+
lh5_obj.name,
|
153
243
|
start_row=start_row,
|
154
244
|
n_rows=n_rows,
|
155
245
|
idx=idx,
|
@@ -170,6 +260,7 @@ class LH5Store:
|
|
170
260
|
n_rows: int | None = None,
|
171
261
|
wo_mode: str = "append",
|
172
262
|
write_start: int = 0,
|
263
|
+
page_buffer: int = 0,
|
173
264
|
**h5py_kwargs,
|
174
265
|
) -> None:
|
175
266
|
"""Write an LGDO into an LH5 file.
|
@@ -199,10 +290,17 @@ class LH5Store:
|
|
199
290
|
# write_object:overwrite.
|
200
291
|
mode = "w" if wo_mode == "of" else "a"
|
201
292
|
|
293
|
+
file_kwargs = {
|
294
|
+
k: h5py_kwargs[k]
|
295
|
+
for k in h5py_kwargs & signature(h5py.File).parameters.keys()
|
296
|
+
}
|
297
|
+
|
202
298
|
return _serializers._h5_write_lgdo(
|
203
299
|
obj,
|
204
300
|
name,
|
205
|
-
self.gimme_file(
|
301
|
+
self.gimme_file(
|
302
|
+
lh5_file, mode=mode, page_buffer=page_buffer, **file_kwargs
|
303
|
+
),
|
206
304
|
group=group,
|
207
305
|
start_row=start_row,
|
208
306
|
n_rows=n_rows,
|
@@ -217,3 +315,9 @@ class LH5Store:
|
|
217
315
|
Return ``None`` if it is a :class:`.Scalar` or a :class:`.Struct`.
|
218
316
|
"""
|
219
317
|
return utils.read_n_rows(name, self.gimme_file(lh5_file, "r"))
|
318
|
+
|
319
|
+
def read_size_in_bytes(self, name: str, lh5_file: str | h5py.File) -> int:
|
320
|
+
"""Look up the size (in B) of the object in memory. Will recursively
|
321
|
+
crawl through all objects in a Struct or Table
|
322
|
+
"""
|
323
|
+
return utils.read_size_in_bytes(name, self.gimme_file(lh5_file, "r"))
|
lgdo/lh5/tools.py
CHANGED
lgdo/lh5/utils.py
CHANGED
@@ -12,7 +12,7 @@ from typing import Any
|
|
12
12
|
import h5py
|
13
13
|
|
14
14
|
from .. import types
|
15
|
-
from . import _serializers
|
15
|
+
from . import _serializers
|
16
16
|
from .exceptions import LH5DecodeError
|
17
17
|
|
18
18
|
log = logging.getLogger(__name__)
|
@@ -44,57 +44,31 @@ def read_n_rows(name: str, h5f: str | h5py.File) -> int | None:
|
|
44
44
|
Return ``None`` if `name` is a :class:`.Scalar` or a :class:`.Struct`.
|
45
45
|
"""
|
46
46
|
if not isinstance(h5f, h5py.File):
|
47
|
-
h5f = h5py.File(h5f, "r")
|
47
|
+
h5f = h5py.File(h5f, "r", locking=False)
|
48
48
|
|
49
49
|
try:
|
50
|
-
|
50
|
+
h5o = h5f[name].id
|
51
51
|
except KeyError as e:
|
52
52
|
msg = "not found"
|
53
53
|
raise LH5DecodeError(msg, h5f, name) from e
|
54
|
-
except AttributeError as e:
|
55
|
-
msg = "missing 'datatype' attribute"
|
56
|
-
raise LH5DecodeError(msg, h5f, name) from e
|
57
54
|
|
58
|
-
|
59
|
-
|
60
|
-
# scalars are dim-0 datasets
|
61
|
-
if lgdotype is types.Scalar:
|
62
|
-
return None
|
63
|
-
|
64
|
-
# structs don't have rows
|
65
|
-
if lgdotype is types.Struct:
|
66
|
-
return None
|
67
|
-
|
68
|
-
# tables should have elements with all the same length
|
69
|
-
if lgdotype is types.Table:
|
70
|
-
# read out each of the fields
|
71
|
-
rows_read = None
|
72
|
-
for field in datatype.get_struct_fields(attrs["datatype"]):
|
73
|
-
n_rows_read = read_n_rows(name + "/" + field, h5f)
|
74
|
-
if not rows_read:
|
75
|
-
rows_read = n_rows_read
|
76
|
-
elif rows_read != n_rows_read:
|
77
|
-
log.warning(
|
78
|
-
f"'{field}' field in table '{name}' has {rows_read} rows, "
|
79
|
-
f"{n_rows_read} was expected"
|
80
|
-
)
|
81
|
-
return rows_read
|
55
|
+
return _serializers.read.utils.read_n_rows(h5o, h5f.name, name)
|
82
56
|
|
83
|
-
# length of vector of vectors is the length of its cumulative_length
|
84
|
-
if lgdotype is types.VectorOfVectors:
|
85
|
-
return read_n_rows(f"{name}/cumulative_length", h5f)
|
86
57
|
|
87
|
-
|
88
|
-
|
89
|
-
|
58
|
+
def read_size_in_bytes(name: str, h5f: str | h5py.File) -> int | None:
|
59
|
+
"""Look up the size (in B) in an LGDO object in memory. Will crawl
|
60
|
+
recursively through members of a Struct or Table
|
61
|
+
"""
|
62
|
+
if not isinstance(h5f, h5py.File):
|
63
|
+
h5f = h5py.File(h5f, "r", locking=False)
|
90
64
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
65
|
+
try:
|
66
|
+
h5o = h5f[name].id
|
67
|
+
except KeyError as e:
|
68
|
+
msg = "not found"
|
69
|
+
raise LH5DecodeError(msg, h5f, name) from e
|
95
70
|
|
96
|
-
|
97
|
-
raise LH5DecodeError(msg, h5f, name)
|
71
|
+
return _serializers.read.utils.read_size_in_bytes(h5o, h5f.name, name)
|
98
72
|
|
99
73
|
|
100
74
|
def get_h5_group(
|
@@ -125,7 +99,12 @@ def get_h5_group(
|
|
125
99
|
else:
|
126
100
|
group = base_group.create_group(group)
|
127
101
|
if grp_attrs is not None:
|
128
|
-
group.attrs.update(
|
102
|
+
group.attrs.update(
|
103
|
+
{
|
104
|
+
k: v.encode("utf-8") if isinstance(v, str) else v
|
105
|
+
for k, v in grp_attrs.items()
|
106
|
+
}
|
107
|
+
)
|
129
108
|
return group
|
130
109
|
if (
|
131
110
|
grp_attrs is not None
|
@@ -141,7 +120,13 @@ def get_h5_group(
|
|
141
120
|
log.debug(f"overwriting {group}.attrs...")
|
142
121
|
for key in group.attrs:
|
143
122
|
group.attrs.pop(key)
|
144
|
-
|
123
|
+
|
124
|
+
group.attrs.update(
|
125
|
+
{
|
126
|
+
k: v.encode("utf-8") if isinstance(v, str) else v
|
127
|
+
for k, v in grp_attrs.items()
|
128
|
+
}
|
129
|
+
)
|
145
130
|
|
146
131
|
return group
|
147
132
|
|
lgdo/types/histogram.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
|
3
|
+
import logging
|
4
|
+
from collections.abc import Iterable, Mapping, Sequence
|
4
5
|
from typing import Any
|
5
6
|
|
6
7
|
import hist
|
7
8
|
import numpy as np
|
9
|
+
import pandas as pd
|
8
10
|
from numpy.typing import NDArray
|
9
11
|
|
10
12
|
from .array import Array
|
@@ -12,6 +14,8 @@ from .lgdo import LGDO
|
|
12
14
|
from .scalar import Scalar
|
13
15
|
from .struct import Struct
|
14
16
|
|
17
|
+
log = logging.getLogger(__name__)
|
18
|
+
|
15
19
|
|
16
20
|
class Histogram(Struct):
|
17
21
|
class Axis(Struct):
|
@@ -197,6 +201,7 @@ class Histogram(Struct):
|
|
197
201
|
isdensity: bool = False,
|
198
202
|
attrs: dict[str, Any] | None = None,
|
199
203
|
binedge_attrs: dict[str, Any] | None = None,
|
204
|
+
flow: bool = True,
|
200
205
|
) -> None:
|
201
206
|
"""A special struct to contain histogrammed data.
|
202
207
|
|
@@ -221,6 +226,16 @@ class Histogram(Struct):
|
|
221
226
|
as binning.
|
222
227
|
attrs
|
223
228
|
a set of user attributes to be carried along with this LGDO.
|
229
|
+
flow
|
230
|
+
If ``False``, discard counts in over-/underflow bins of the passed
|
231
|
+
:class:`hist.Hist` instance. If ``True``, this data will also be discarded,
|
232
|
+
but a warning is emitted.
|
233
|
+
|
234
|
+
.. note ::
|
235
|
+
|
236
|
+
:class:`Histogram` does not support storing counts in overflow or
|
237
|
+
underflow bins. This parameter just controls, whether a warning will
|
238
|
+
be emitted.
|
224
239
|
"""
|
225
240
|
if isinstance(weights, hist.Hist):
|
226
241
|
if binning is not None:
|
@@ -230,9 +245,10 @@ class Histogram(Struct):
|
|
230
245
|
msg = "not allowed to pass isdensity=True if constructing from hist.Hist instance"
|
231
246
|
raise ValueError(msg)
|
232
247
|
|
233
|
-
if weights.sum(flow=True) != weights.sum(flow=False):
|
234
|
-
|
235
|
-
|
248
|
+
if weights.sum(flow=True) != weights.sum(flow=False) and flow:
|
249
|
+
log.warning(
|
250
|
+
"flow bins of hist.Hist cannot be represented, their counts are discarded"
|
251
|
+
)
|
236
252
|
weights_view = weights.view(flow=False)
|
237
253
|
if type(weights_view) is not np.ndarray:
|
238
254
|
msg = "only simple numpy-backed storages can be used in a hist.Hist"
|
@@ -254,10 +270,10 @@ class Histogram(Struct):
|
|
254
270
|
b.append(Histogram.Axis.from_edges(ax.edges, binedge_attrs))
|
255
271
|
else:
|
256
272
|
if binning is None:
|
257
|
-
msg = "need to
|
273
|
+
msg = "need to pass binning to construct Histogram"
|
258
274
|
raise ValueError(msg)
|
259
|
-
w = weights if isinstance(weights, Array) else Array(weights)
|
260
275
|
|
276
|
+
# set up binning
|
261
277
|
if all(isinstance(ax, Histogram.Axis) for ax in binning):
|
262
278
|
if binedge_attrs is not None:
|
263
279
|
msg = "passed both binedges as Axis instances and binedge_attrs"
|
@@ -271,6 +287,14 @@ class Histogram(Struct):
|
|
271
287
|
msg = "invalid binning object passed"
|
272
288
|
raise ValueError(msg)
|
273
289
|
|
290
|
+
# set up bin weights
|
291
|
+
if isinstance(weights, Array):
|
292
|
+
w = weights
|
293
|
+
elif weights is None:
|
294
|
+
w = Array(shape=[ax.nbins for ax in b], fill_val=0, dtype=np.float32)
|
295
|
+
else:
|
296
|
+
w = Array(weights)
|
297
|
+
|
274
298
|
if len(binning) != len(w.nda.shape):
|
275
299
|
msg = "binning and weight dimensions do not match"
|
276
300
|
raise ValueError(msg)
|
@@ -300,6 +324,98 @@ class Histogram(Struct):
|
|
300
324
|
assert all(isinstance(v, Histogram.Axis) for k, v in bins)
|
301
325
|
return tuple(v for _, v in bins)
|
302
326
|
|
327
|
+
def fill(self, data, w: NDArray = None, keys: Sequence[str] = None) -> None:
|
328
|
+
"""Fill histogram by incrementing bins with data points weighted by w
|
329
|
+
|
330
|
+
Parameters
|
331
|
+
----------
|
332
|
+
data
|
333
|
+
a ndarray with inner dimension equal to number of axes, or a list
|
334
|
+
of equal-length 1d-arrays containing data for each axis, or a
|
335
|
+
Mapping to 1d-arrays containing data for each axis (requires keys),
|
336
|
+
or a Pandas dataframe (optionally takes a list of keys)
|
337
|
+
w
|
338
|
+
weight to use for incrementing data points. If None, use 1 for all
|
339
|
+
keys
|
340
|
+
list of keys to use if data is a pandas ''DataFrame'' or ''Mapping''
|
341
|
+
"""
|
342
|
+
if keys is not None:
|
343
|
+
if isinstance(keys, str):
|
344
|
+
keys = [keys]
|
345
|
+
elif not isinstance(keys, list):
|
346
|
+
keys = list(keys)
|
347
|
+
|
348
|
+
if (
|
349
|
+
isinstance(data, np.ndarray)
|
350
|
+
and len(data.shape) == 1
|
351
|
+
and len(self.binning) == 1
|
352
|
+
):
|
353
|
+
N = len(data)
|
354
|
+
data = [data]
|
355
|
+
elif (
|
356
|
+
isinstance(data, np.ndarray)
|
357
|
+
and len(data.shape) == 2
|
358
|
+
and data.shape[1] == len(self.binning)
|
359
|
+
):
|
360
|
+
N = data.shape[0]
|
361
|
+
data = data.T
|
362
|
+
elif isinstance(data, pd.DataFrame) and (
|
363
|
+
(keys is not None and len(keys) == len(self.binning))
|
364
|
+
or data.ndim == len(self.binning)
|
365
|
+
):
|
366
|
+
if keys is not None:
|
367
|
+
data = data[keys]
|
368
|
+
N = len(data)
|
369
|
+
data = data.values.T
|
370
|
+
elif isinstance(data, Sequence) and len(data) == len(self.binning):
|
371
|
+
data = [d if isinstance(d, np.ndarray) else np.array(d) for d in data]
|
372
|
+
N = len(data[0])
|
373
|
+
if not all(len(d) == N for d in data):
|
374
|
+
msg = "length of all data arrays must be equal"
|
375
|
+
raise ValueError(msg)
|
376
|
+
elif isinstance(data, Mapping):
|
377
|
+
if not isinstance(keys, Sequence) or len(keys) != len(self.binning):
|
378
|
+
msg = "filling hist with Mapping data requires a list of keys with same length as histogram rank"
|
379
|
+
raise ValueError(msg)
|
380
|
+
data = [
|
381
|
+
data[k] if isinstance(data[k], np.ndarray) else np.array(data[k])
|
382
|
+
for k in keys
|
383
|
+
]
|
384
|
+
N = len(data[0])
|
385
|
+
if not all(len(d) == N for d in data):
|
386
|
+
msg = "length of all data arrays must be equal"
|
387
|
+
raise ValueError(msg)
|
388
|
+
else:
|
389
|
+
msg = "data must be 2D numpy array or list of 1D arrays with length equal to number of axes"
|
390
|
+
raise ValueError(msg)
|
391
|
+
|
392
|
+
idx = np.zeros(N, np.float64) # bin indices for flattened array
|
393
|
+
oor_mask = np.ones(N, np.bool_) # mask to remove out of range values
|
394
|
+
stride = [s // self.weights.dtype.itemsize for s in self.weights.nda.strides]
|
395
|
+
for col, ax, s in zip(data, self.binning, stride):
|
396
|
+
if ax.is_range:
|
397
|
+
idx += s * np.floor((col - ax.first) / ax.step - int(not ax.closedleft))
|
398
|
+
if ax.closedleft:
|
399
|
+
oor_mask &= (ax.first <= col) & (col < ax.last)
|
400
|
+
else:
|
401
|
+
oor_mask &= (ax.first < col) & (col <= ax.last)
|
402
|
+
else:
|
403
|
+
idx += s * (
|
404
|
+
np.searchsorted(
|
405
|
+
ax.edges, col, side=("right" if ax.closedleft else "left")
|
406
|
+
)
|
407
|
+
- 1
|
408
|
+
)
|
409
|
+
if ax.closedleft:
|
410
|
+
oor_mask &= (ax.edges[0] <= col) & (col < ax.edges[-1])
|
411
|
+
else:
|
412
|
+
oor_mask &= (ax.edges[0] < col) & (col <= ax.edges[-1])
|
413
|
+
|
414
|
+
# increment bin contents
|
415
|
+
idx = idx[oor_mask].astype(np.int64)
|
416
|
+
w = w[oor_mask] if w is not None else 1
|
417
|
+
np.add.at(self.weights.nda.reshape(-1), idx, w)
|
418
|
+
|
303
419
|
def __setitem__(self, name: str, obj: LGDO) -> None:
|
304
420
|
# do not allow for new attributes on this
|
305
421
|
msg = "histogram fields cannot be mutated"
|
lgdo/types/table.py
CHANGED
@@ -450,7 +450,7 @@ class Table(Struct):
|
|
450
450
|
cols = self.keys()
|
451
451
|
|
452
452
|
if library == "pd":
|
453
|
-
df =
|
453
|
+
df = {}
|
454
454
|
|
455
455
|
for col in cols:
|
456
456
|
data = self[col]
|
@@ -470,7 +470,7 @@ class Table(Struct):
|
|
470
470
|
)
|
471
471
|
df[f"{prefix}{col}"] = data.view_as("pd", with_units=with_units)
|
472
472
|
|
473
|
-
return df
|
473
|
+
return pd.DataFrame(df, copy=False)
|
474
474
|
|
475
475
|
if library == "np":
|
476
476
|
msg = f"Format {library!r} is not supported for Tables."
|
lgdo/types/vectorofvectors.py
CHANGED
@@ -632,7 +632,7 @@ class VectorOfVectors(LGDO):
|
|
632
632
|
offsets = np.empty(
|
633
633
|
len(self.cumulative_length) + 1, dtype=self.cumulative_length.dtype
|
634
634
|
)
|
635
|
-
offsets[1:] = self.cumulative_length
|
635
|
+
offsets[1:] = self.cumulative_length.nda
|
636
636
|
offsets[0] = 0
|
637
637
|
|
638
638
|
content = (
|
@@ -1,55 +0,0 @@
|
|
1
|
-
lgdo/__init__.py,sha256=1YUuAFQHNrOOkr3ZfrtEJOpYqgzbHRYA81ssbQZitQE,3196
|
2
|
-
lgdo/_version.py,sha256=Wi1Vgg8ccNVK7oIZNO8kmGhwjztIUyuzlku2tkT7820,411
|
3
|
-
lgdo/cli.py,sha256=vB1Oj6kZ5gWaY9HBPBRRRyiepp72hm3bFvQeUUWeMYg,8214
|
4
|
-
lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
|
5
|
-
lgdo/lh5_store.py,sha256=5BzbJA9sLcqjp8bJDc2olwOiw0VS6rmfg3cfh1kQkRY,8512
|
6
|
-
lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
|
7
|
-
lgdo/units.py,sha256=VQYME86_ev9S7Fq8RyCOQNqYr29MphTTYemmEouZafk,161
|
8
|
-
lgdo/utils.py,sha256=9t_GYdB8aQhZ4Vz6ujmASzwCgTuP7ZdINtPTVPyIR6E,3661
|
9
|
-
lgdo/compression/__init__.py,sha256=gqbdx4NnpCcW-C7kUXV-hVUZFiNlbCwIbs3uzFe4AFE,1127
|
10
|
-
lgdo/compression/base.py,sha256=82cQJujfvoAOKBFx761dEcx_xM02TBCBBuBo6i78tuI,838
|
11
|
-
lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2515
|
12
|
-
lgdo/compression/radware.py,sha256=VbKAvi18h48Fz-ZxMEg64yD1ezaw1NkMZazxurdyMmc,24015
|
13
|
-
lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
|
14
|
-
lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
|
15
|
-
lgdo/lh5/__init__.py,sha256=KzWF6HI-6N1NqQUm8LAxMmDbg0rgRY4DAaJ2s7w2tLM,811
|
16
|
-
lgdo/lh5/core.py,sha256=c94Er0F28y2shYp0Ti9Nvq8Z2QB5P4QSqixjzvtdslI,11153
|
17
|
-
lgdo/lh5/datatype.py,sha256=O_7BqOlX8PFMyG0ppkfUT5aps5HEqX0bpuKcJO3jhu0,1691
|
18
|
-
lgdo/lh5/exceptions.py,sha256=h0NyNM5EX8lgoK-JTnPlIhr7Jk-VKJzc1kd1IYTjhXw,985
|
19
|
-
lgdo/lh5/iterator.py,sha256=eqH9a_ZjEhgqJUZbMj36jXK_1Xbx86450DVw7LHNB3Y,12369
|
20
|
-
lgdo/lh5/store.py,sha256=5_ytoRtM2g3jVAPN3r02x3u-I1BNGI4lXMhwD2Gi-Eg,6661
|
21
|
-
lgdo/lh5/tools.py,sha256=nb4zaBbVbQZTEzawi_faMhRvaPQf9Iea2xbuN6MPSQg,9922
|
22
|
-
lgdo/lh5/utils.py,sha256=w0gGaYmP4HxoM6vCcku1m8qg06tawixQj6awuRIujXM,6984
|
23
|
-
lgdo/lh5/_serializers/__init__.py,sha256=NSH8uOVY3r_Wn3t0nQHhEHhkHT7-GJYlxuS3YTDJa5Y,1263
|
24
|
-
lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
25
|
-
lgdo/lh5/_serializers/read/array.py,sha256=V9a1E_yvBmpxMu1pqD7V3WTk2HpBg72jwiXNUvhMBjQ,879
|
26
|
-
lgdo/lh5/_serializers/read/composite.py,sha256=tYhi6v_zbrO-ryTShWVFpo-vrRVlgiixcIqaCAcc3RY,13439
|
27
|
-
lgdo/lh5/_serializers/read/encoded.py,sha256=g5_C1i2zXu02JE3ZNg9meeQ80y_5J-ZZNPFsGyGZdOA,3634
|
28
|
-
lgdo/lh5/_serializers/read/ndarray.py,sha256=2Ov7YF92wHUt-Z0s0fjPxeJ_4S8w7DH61Ee4pxVBEK8,3417
|
29
|
-
lgdo/lh5/_serializers/read/scalar.py,sha256=BFnGzgZvE9QPOutoe7vwlHTPpfGbATlScQc5fgGvTlg,731
|
30
|
-
lgdo/lh5/_serializers/read/utils.py,sha256=gavB1v9njr7WFJf8KDsbYlXcJmeF_dTgsLgHN5LoyCo,411
|
31
|
-
lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=kNoRFKA1inefZEgxkz6Paou2KnA7bA-ugK6HvSJSNR0,6567
|
32
|
-
lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
|
-
lgdo/lh5/_serializers/write/array.py,sha256=Gosg8rOCH_2dRMj_oNSWyXuoYXDjy0OK--GCYWswR4U,2803
|
34
|
-
lgdo/lh5/_serializers/write/composite.py,sha256=oyJ2ix0fO52ZtiI19KRanwt-9gn0TDVsAPaRAdUFn6Y,8901
|
35
|
-
lgdo/lh5/_serializers/write/scalar.py,sha256=gkcF2WVBR3aQYl0EynbVUocx4y3r8tvPfQYQJjkPvP4,643
|
36
|
-
lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
|
37
|
-
lgdo/types/__init__.py,sha256=B7qEdlD0yJ8IUHK8xUKSyhRTY5LD1B148DPXI6iaB7w,821
|
38
|
-
lgdo/types/array.py,sha256=sUxh1CNCaefrnybt5qdjmmMpVQa_RqFxUv1tJ_pyBbc,6537
|
39
|
-
lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
|
40
|
-
lgdo/types/encoded.py,sha256=JW4U5ow7KLMzhKnmhdnxbC3SZJAs4bOEDZWKG4KY1uU,15293
|
41
|
-
lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
|
42
|
-
lgdo/types/histogram.py,sha256=3guFbj48cd9HudWxjyXXcNfNg3vp1Quz6YSuCq8FQpc,14949
|
43
|
-
lgdo/types/lgdo.py,sha256=UnJDi1emQYVgH_H29Vipfs4LelPopxG5pgZUu1eKOlw,2761
|
44
|
-
lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
|
45
|
-
lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
|
46
|
-
lgdo/types/table.py,sha256=w6ESACX6TNvEGIUQfNBtn2ofPNPM-Tl-6m6SITGVvtI,17942
|
47
|
-
lgdo/types/vectorofvectors.py,sha256=Q53K8wiHwRHpGw3ARqrLnOXu3kLHptTYMp0ay9KK1vs,24386
|
48
|
-
lgdo/types/vovutils.py,sha256=7BWPP0BSj-92ifbCIUBcfqxG5-TS8uxujTyJJuDFI04,10302
|
49
|
-
lgdo/types/waveformtable.py,sha256=f2tS4f1OEoYaTM5ldCX9zmw8iSISCT3t3wS1SrPdu_o,9901
|
50
|
-
legend_pydataobj-1.9.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
51
|
-
legend_pydataobj-1.9.0.dist-info/METADATA,sha256=kLFP_xph3aEB1PCK_IXc1vchcZgE6GSwPTUsDV74jxQ,44379
|
52
|
-
legend_pydataobj-1.9.0.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
53
|
-
legend_pydataobj-1.9.0.dist-info/entry_points.txt,sha256=Uu5MTlppBZxB4QGlLv-oX8FqACWjAZDNii__TBDJwLQ,72
|
54
|
-
legend_pydataobj-1.9.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
|
55
|
-
legend_pydataobj-1.9.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|