legend-pydataobj 1.11.8__py3-none-any.whl → 1.11.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/METADATA +1 -1
- {legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/RECORD +26 -25
- lgdo/__init__.py +4 -5
- lgdo/_version.py +2 -2
- lgdo/lh5/__init__.py +3 -1
- lgdo/lh5/_serializers/read/composite.py +3 -1
- lgdo/lh5/_serializers/write/composite.py +28 -11
- lgdo/lh5/concat.py +9 -3
- lgdo/lh5/core.py +31 -26
- lgdo/lh5/iterator.py +27 -48
- lgdo/lh5/store.py +75 -22
- lgdo/lh5/tools.py +111 -0
- lgdo/lh5/utils.py +4 -6
- lgdo/lh5_store.py +284 -0
- lgdo/types/array.py +15 -84
- lgdo/types/encoded.py +20 -25
- lgdo/types/histogram.py +1 -1
- lgdo/types/lgdo.py +0 -50
- lgdo/types/table.py +28 -49
- lgdo/types/vectorofvectors.py +94 -132
- lgdo/types/vovutils.py +4 -14
- lgdo/types/waveformtable.py +21 -19
- {legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/WHEEL +0 -0
- {legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/entry_points.txt +0 -0
- {legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/licenses/LICENSE +0 -0
- {legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/top_level.txt +0 -0
lgdo/types/array.py
CHANGED
@@ -6,7 +6,7 @@ corresponding utilities.
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
8
|
import logging
|
9
|
-
from collections.abc import
|
9
|
+
from collections.abc import Iterator
|
10
10
|
from typing import Any
|
11
11
|
|
12
12
|
import awkward as ak
|
@@ -17,12 +17,12 @@ import pint_pandas # noqa: F401
|
|
17
17
|
|
18
18
|
from .. import utils
|
19
19
|
from ..units import default_units_registry as u
|
20
|
-
from .lgdo import
|
20
|
+
from .lgdo import LGDO
|
21
21
|
|
22
22
|
log = logging.getLogger(__name__)
|
23
23
|
|
24
24
|
|
25
|
-
class Array(
|
25
|
+
class Array(LGDO):
|
26
26
|
r"""Holds an :class:`numpy.ndarray` and attributes.
|
27
27
|
|
28
28
|
:class:`Array` (and the other various array types) holds an `nda` instead
|
@@ -78,7 +78,11 @@ class Array(LGDOCollection):
|
|
78
78
|
elif isinstance(nda, Array):
|
79
79
|
nda = nda.nda
|
80
80
|
|
81
|
+
elif not isinstance(nda, np.ndarray):
|
82
|
+
nda = np.array(nda)
|
83
|
+
|
81
84
|
self.nda = nda
|
85
|
+
self.dtype = self.nda.dtype
|
82
86
|
|
83
87
|
super().__init__(attrs)
|
84
88
|
|
@@ -92,91 +96,18 @@ class Array(LGDOCollection):
|
|
92
96
|
return dt + "<" + nd + ">{" + et + "}"
|
93
97
|
|
94
98
|
def __len__(self) -> int:
|
95
|
-
return self.
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
return self.
|
100
|
-
|
101
|
-
@nda.setter
|
102
|
-
def nda(self, value):
|
103
|
-
self._nda = value if isinstance(value, np.ndarray) else np.array(value)
|
104
|
-
self._size = len(self._nda) if self._nda.shape != () else 0
|
105
|
-
|
106
|
-
@property
|
107
|
-
def dtype(self):
|
108
|
-
return self._nda.dtype
|
109
|
-
|
110
|
-
@property
|
111
|
-
def shape(self):
|
112
|
-
return (len(self),) + self._nda.shape[1:]
|
113
|
-
|
114
|
-
def reserve_capacity(self, capacity: int) -> None:
|
115
|
-
"Set size (number of rows) of internal memory buffer"
|
116
|
-
if capacity < len(self):
|
117
|
-
msg = "Cannot reduce capacity below Array length"
|
118
|
-
raise ValueError(msg)
|
119
|
-
self._nda.resize((capacity,) + self._nda.shape[1:], refcheck=False)
|
120
|
-
|
121
|
-
def get_capacity(self) -> int:
|
122
|
-
"Get capacity (i.e. max size before memory must be re-allocated)"
|
123
|
-
return len(self._nda)
|
124
|
-
|
125
|
-
def trim_capacity(self) -> None:
|
126
|
-
"Set capacity to be minimum needed to support Array size"
|
127
|
-
self.reserve_capacity(np.prod(self.shape))
|
128
|
-
|
129
|
-
def resize(self, new_size: int | Collection[int], trim=False) -> None:
|
130
|
-
"""Set size of Array in rows. Only change capacity if it must be
|
131
|
-
increased to accommodate new rows; in this case double capacity.
|
132
|
-
If trim is True, capacity will be set to match size. If new_size
|
133
|
-
is an int, do not change size of inner dimensions.
|
134
|
-
|
135
|
-
If new_size is a collection, internal memory will be re-allocated, so
|
136
|
-
this should be done only rarely!"""
|
137
|
-
|
138
|
-
if isinstance(new_size, Collection):
|
139
|
-
self._size = new_size[0]
|
140
|
-
self._nda.resize(new_size)
|
141
|
-
else:
|
142
|
-
self._size = new_size
|
143
|
-
|
144
|
-
if trim and new_size != self.get_capacity:
|
145
|
-
self.reserve_capacity(new_size)
|
146
|
-
|
147
|
-
# If capacity is not big enough, set to next power of 2 big enough
|
148
|
-
if new_size > self.get_capacity():
|
149
|
-
self.reserve_capacity(int(2 ** (np.ceil(np.log2(new_size)))))
|
99
|
+
return len(self.nda)
|
100
|
+
|
101
|
+
def resize(self, new_size: int) -> None:
|
102
|
+
new_shape = (new_size,) + self.nda.shape[1:]
|
103
|
+
return self.nda.resize(new_shape, refcheck=True)
|
150
104
|
|
151
105
|
def append(self, value: np.ndarray) -> None:
|
152
|
-
|
153
|
-
self.
|
106
|
+
self.resize(len(self) + 1)
|
107
|
+
self.nda[-1] = value
|
154
108
|
|
155
109
|
def insert(self, i: int, value: int | float) -> None:
|
156
|
-
|
157
|
-
if i > len(self):
|
158
|
-
msg = f"index {i} is out of bounds for array with size {len(self)}"
|
159
|
-
raise IndexError(msg)
|
160
|
-
|
161
|
-
value = np.array(value)
|
162
|
-
if value.shape == self.shape[1:]:
|
163
|
-
self.resize(len(self) + 1)
|
164
|
-
self[i + 1 :] = self[i:-1]
|
165
|
-
self[i] = value
|
166
|
-
elif value.shape[1:] == self.shape[1:]:
|
167
|
-
self.resize(len(self) + len(value))
|
168
|
-
self[i + len(value) :] = self[i : -len(value)]
|
169
|
-
self[i : i + len(value)] = value
|
170
|
-
else:
|
171
|
-
msg = f"Could not insert value with shape {value.shape} into Array with shape {self.shape}"
|
172
|
-
raise ValueError(msg)
|
173
|
-
|
174
|
-
def replace(self, i: int, value: int | float) -> None:
|
175
|
-
"Replace value at row i"
|
176
|
-
if i >= len(self):
|
177
|
-
msg = f"index {i} is out of bounds for array with size {len(self)}"
|
178
|
-
raise IndexError(msg)
|
179
|
-
self[i] = value
|
110
|
+
self.nda = np.insert(self.nda, i, value)
|
180
111
|
|
181
112
|
def __getitem__(self, key):
|
182
113
|
return self.nda[key]
|
lgdo/types/encoded.py
CHANGED
@@ -11,12 +11,12 @@ from numpy.typing import NDArray
|
|
11
11
|
|
12
12
|
from .. import utils
|
13
13
|
from .array import Array
|
14
|
-
from .lgdo import
|
14
|
+
from .lgdo import LGDO
|
15
15
|
from .scalar import Scalar
|
16
16
|
from .vectorofvectors import VectorOfVectors
|
17
17
|
|
18
18
|
|
19
|
-
class VectorOfEncodedVectors(
|
19
|
+
class VectorOfEncodedVectors(LGDO):
|
20
20
|
"""An array of variable-length encoded arrays.
|
21
21
|
|
22
22
|
Used to represent an encoded :class:`.VectorOfVectors`. In addition to an
|
@@ -92,17 +92,6 @@ class VectorOfEncodedVectors(LGDOCollection):
|
|
92
92
|
|
93
93
|
return False
|
94
94
|
|
95
|
-
def reserve_capacity(self, *capacity: int) -> None:
|
96
|
-
self.encoded_data.reserve_capacity(*capacity)
|
97
|
-
self.decoded_size.reserve_capacity(capacity[0])
|
98
|
-
|
99
|
-
def get_capacity(self) -> tuple:
|
100
|
-
return (self.decoded_size.get_capacity, *self.encoded_data.get_capacity())
|
101
|
-
|
102
|
-
def trim_capacity(self) -> None:
|
103
|
-
self.encoded_data.trim_capacity()
|
104
|
-
self.decoded_size.trim_capacity()
|
105
|
-
|
106
95
|
def resize(self, new_size: int) -> None:
|
107
96
|
"""Resize vector along the first axis.
|
108
97
|
|
@@ -113,6 +102,21 @@ class VectorOfEncodedVectors(LGDOCollection):
|
|
113
102
|
self.encoded_data.resize(new_size)
|
114
103
|
self.decoded_size.resize(new_size)
|
115
104
|
|
105
|
+
def append(self, value: tuple[NDArray, int]) -> None:
|
106
|
+
"""Append a 1D encoded vector at the end.
|
107
|
+
|
108
|
+
Parameters
|
109
|
+
----------
|
110
|
+
value
|
111
|
+
a tuple holding the encoded array and its decoded size.
|
112
|
+
|
113
|
+
See Also
|
114
|
+
--------
|
115
|
+
.VectorOfVectors.append
|
116
|
+
"""
|
117
|
+
self.encoded_data.append(value[0])
|
118
|
+
self.decoded_size.append(value[1])
|
119
|
+
|
116
120
|
def insert(self, i: int, value: tuple[NDArray, int]) -> None:
|
117
121
|
"""Insert an encoded vector at index `i`.
|
118
122
|
|
@@ -278,7 +282,7 @@ class VectorOfEncodedVectors(LGDOCollection):
|
|
278
282
|
raise ValueError(msg)
|
279
283
|
|
280
284
|
|
281
|
-
class ArrayOfEncodedEqualSizedArrays(
|
285
|
+
class ArrayOfEncodedEqualSizedArrays(LGDO):
|
282
286
|
"""An array of encoded arrays with equal decoded size.
|
283
287
|
|
284
288
|
Used to represent an encoded :class:`.ArrayOfEqualSizedArrays`. In addition
|
@@ -345,23 +349,14 @@ class ArrayOfEncodedEqualSizedArrays(LGDOCollection):
|
|
345
349
|
|
346
350
|
return False
|
347
351
|
|
348
|
-
def
|
349
|
-
self.encoded_data.reserve_capacity(capacity)
|
350
|
-
|
351
|
-
def get_capacity(self) -> tuple:
|
352
|
-
return self.encoded_data.get_capacity()
|
353
|
-
|
354
|
-
def trim_capacity(self) -> None:
|
355
|
-
self.encoded_data.trim_capacity()
|
356
|
-
|
357
|
-
def resize(self, new_size: int, trim: bool = False) -> None:
|
352
|
+
def resize(self, new_size: int) -> None:
|
358
353
|
"""Resize array along the first axis.
|
359
354
|
|
360
355
|
See Also
|
361
356
|
--------
|
362
357
|
.VectorOfVectors.resize
|
363
358
|
"""
|
364
|
-
self.encoded_data.resize(new_size
|
359
|
+
self.encoded_data.resize(new_size)
|
365
360
|
|
366
361
|
def append(self, value: NDArray) -> None:
|
367
362
|
"""Append a 1D encoded array at the end.
|
lgdo/types/histogram.py
CHANGED
@@ -424,7 +424,7 @@ class Histogram(Struct):
|
|
424
424
|
dict.__setitem__(self, name, obj)
|
425
425
|
else:
|
426
426
|
msg = "histogram fields cannot be mutated "
|
427
|
-
raise
|
427
|
+
raise TypeError(msg)
|
428
428
|
|
429
429
|
def __getattr__(self, name: str) -> None:
|
430
430
|
# do not allow for new attributes on this
|
lgdo/types/lgdo.py
CHANGED
@@ -92,53 +92,3 @@ class LGDO(ABC):
|
|
92
92
|
|
93
93
|
def __repr__(self) -> str:
|
94
94
|
return self.__class__.__name__ + f"(attrs={self.attrs!r})"
|
95
|
-
|
96
|
-
|
97
|
-
class LGDOCollection(LGDO):
|
98
|
-
"""Abstract base class representing a LEGEND Collection Object (LGDO).
|
99
|
-
This defines the interface for classes used as table columns.
|
100
|
-
"""
|
101
|
-
|
102
|
-
@abstractmethod
|
103
|
-
def __init__(self, attrs: dict[str, Any] | None = None) -> None:
|
104
|
-
super().__init__(attrs)
|
105
|
-
|
106
|
-
@abstractmethod
|
107
|
-
def __len__(self) -> int:
|
108
|
-
"""Provides ``__len__`` for this array-like class."""
|
109
|
-
|
110
|
-
@abstractmethod
|
111
|
-
def reserve_capacity(self, capacity: int) -> None:
|
112
|
-
"""Reserve capacity (in rows) for later use. Internal memory buffers
|
113
|
-
will have enough entries to store this many rows.
|
114
|
-
"""
|
115
|
-
|
116
|
-
@abstractmethod
|
117
|
-
def get_capacity(self) -> int:
|
118
|
-
"get reserved capacity of internal memory buffers in rows"
|
119
|
-
|
120
|
-
@abstractmethod
|
121
|
-
def trim_capacity(self) -> None:
|
122
|
-
"""set capacity to only what is required to store current contents
|
123
|
-
of LGDOCollection
|
124
|
-
"""
|
125
|
-
|
126
|
-
@abstractmethod
|
127
|
-
def resize(self, new_size: int, trim: bool = False) -> None:
|
128
|
-
"""Return this LGDO's datatype attribute string."""
|
129
|
-
|
130
|
-
def append(self, val) -> None:
|
131
|
-
"append val to end of LGDOCollection"
|
132
|
-
self.insert(len(self), val)
|
133
|
-
|
134
|
-
@abstractmethod
|
135
|
-
def insert(self, i: int, val) -> None:
|
136
|
-
"insert val into LGDOCollection at position i"
|
137
|
-
|
138
|
-
@abstractmethod
|
139
|
-
def replace(self, i: int, val) -> None:
|
140
|
-
"replace item at position i with val in LGDOCollection"
|
141
|
-
|
142
|
-
def clear(self, trim: bool = False) -> None:
|
143
|
-
"set size of LGDOCollection to zero"
|
144
|
-
self.resize(0, trim=trim)
|
lgdo/types/table.py
CHANGED
@@ -19,7 +19,7 @@ from pandas.io.formats import format as fmt
|
|
19
19
|
|
20
20
|
from .array import Array
|
21
21
|
from .arrayofequalsizedarrays import ArrayOfEqualSizedArrays
|
22
|
-
from .lgdo import LGDO
|
22
|
+
from .lgdo import LGDO
|
23
23
|
from .scalar import Scalar
|
24
24
|
from .struct import Struct
|
25
25
|
from .vectorofvectors import VectorOfVectors
|
@@ -27,9 +27,13 @@ from .vectorofvectors import VectorOfVectors
|
|
27
27
|
log = logging.getLogger(__name__)
|
28
28
|
|
29
29
|
|
30
|
-
class Table(Struct
|
30
|
+
class Table(Struct):
|
31
31
|
"""A special struct of arrays or subtable columns of equal length.
|
32
32
|
|
33
|
+
Holds onto an internal read/write location ``loc`` that is useful in
|
34
|
+
managing table I/O using functions like :meth:`push_row`, :meth:`is_full`,
|
35
|
+
and :meth:`clear`.
|
36
|
+
|
33
37
|
Note
|
34
38
|
----
|
35
39
|
If you write to a table and don't fill it up to its total size, be sure to
|
@@ -45,7 +49,7 @@ class Table(Struct, LGDOCollection):
|
|
45
49
|
|
46
50
|
def __init__(
|
47
51
|
self,
|
48
|
-
col_dict: Mapping[str,
|
52
|
+
col_dict: Mapping[str, LGDO] | pd.DataFrame | ak.Array | None = None,
|
49
53
|
size: int | None = None,
|
50
54
|
attrs: Mapping[str, Any] | None = None,
|
51
55
|
) -> None:
|
@@ -61,7 +65,7 @@ class Table(Struct, LGDOCollection):
|
|
61
65
|
col_dict
|
62
66
|
instantiate this table using the supplied mapping of column names
|
63
67
|
and array-like objects. Supported input types are: mapping of
|
64
|
-
strings to
|
68
|
+
strings to LGDOs, :class:`pd.DataFrame` and :class:`ak.Array`.
|
65
69
|
Note 1: no copy is performed, the objects are used directly (unless
|
66
70
|
:class:`ak.Array` is provided). Note 2: if `size` is not ``None``,
|
67
71
|
all arrays will be resized to match it. Note 3: if the arrays have
|
@@ -81,8 +85,7 @@ class Table(Struct, LGDOCollection):
|
|
81
85
|
col_dict = _ak_to_lgdo_or_col_dict(col_dict)
|
82
86
|
|
83
87
|
# call Struct constructor
|
84
|
-
|
85
|
-
LGDOCollection.__init__(self, attrs=attrs)
|
88
|
+
super().__init__(obj_dict=col_dict, attrs=attrs)
|
86
89
|
|
87
90
|
# if col_dict is not empty, set size according to it
|
88
91
|
# if size is also supplied, resize all fields to match it
|
@@ -90,10 +93,13 @@ class Table(Struct, LGDOCollection):
|
|
90
93
|
if col_dict is not None and len(col_dict) > 0:
|
91
94
|
self.resize(new_size=size, do_warn=(size is None))
|
92
95
|
|
93
|
-
# if no col_dict, just set the size
|
96
|
+
# if no col_dict, just set the size (default to 1024)
|
94
97
|
else:
|
95
98
|
self.size = size if size is not None else None
|
96
99
|
|
100
|
+
# always start at loc=0
|
101
|
+
self.loc = 0
|
102
|
+
|
97
103
|
def datatype_name(self) -> str:
|
98
104
|
return "table"
|
99
105
|
|
@@ -101,31 +107,7 @@ class Table(Struct, LGDOCollection):
|
|
101
107
|
"""Provides ``__len__`` for this array-like class."""
|
102
108
|
return self.size
|
103
109
|
|
104
|
-
def
|
105
|
-
"Set size (number of rows) of internal memory buffer"
|
106
|
-
if isinstance(capacity, int):
|
107
|
-
for obj in self.values():
|
108
|
-
obj.reserve_capacity(capacity)
|
109
|
-
else:
|
110
|
-
if len(capacity) != len(self.keys()):
|
111
|
-
msg = "List of capacities must have same length as number of keys"
|
112
|
-
raise ValueError(msg)
|
113
|
-
|
114
|
-
for obj, cap in zip(self.values(), capacity):
|
115
|
-
obj.reserve_capacity(cap)
|
116
|
-
|
117
|
-
def get_capacity(self) -> int:
|
118
|
-
"Get list of capacities for each key"
|
119
|
-
return [v.get_capacity() for v in self.values()]
|
120
|
-
|
121
|
-
def trim_capacity(self) -> int:
|
122
|
-
"Set capacity to be minimum needed to support Array size"
|
123
|
-
for v in self.values():
|
124
|
-
v.trim_capacity()
|
125
|
-
|
126
|
-
def resize(
|
127
|
-
self, new_size: int | None = None, do_warn: bool = False, trim: bool = False
|
128
|
-
) -> None:
|
110
|
+
def resize(self, new_size: int | None = None, do_warn: bool = False) -> None:
|
129
111
|
# if new_size = None, use the size from the first field
|
130
112
|
for field, obj in self.items():
|
131
113
|
if new_size is None:
|
@@ -137,20 +119,21 @@ class Table(Struct, LGDOCollection):
|
|
137
119
|
f"with size {len(obj)} != {new_size}"
|
138
120
|
)
|
139
121
|
if isinstance(obj, Table):
|
140
|
-
obj.resize(new_size
|
122
|
+
obj.resize(new_size)
|
141
123
|
else:
|
142
|
-
obj.resize(new_size
|
124
|
+
obj.resize(new_size)
|
143
125
|
self.size = new_size
|
144
126
|
|
145
|
-
def
|
146
|
-
|
147
|
-
for k, ar in self.items():
|
148
|
-
ar.insert(i, vals[k])
|
149
|
-
self.size += 1
|
127
|
+
def push_row(self) -> None:
|
128
|
+
self.loc += 1
|
150
129
|
|
151
|
-
def
|
152
|
-
self
|
153
|
-
|
130
|
+
def is_full(self) -> bool:
|
131
|
+
return self.loc >= self.size
|
132
|
+
|
133
|
+
def clear(self) -> None:
|
134
|
+
self.loc = 0
|
135
|
+
|
136
|
+
def add_field(self, name: str, obj: LGDO, use_obj_size: bool = False) -> None:
|
154
137
|
"""Add a field (column) to the table.
|
155
138
|
|
156
139
|
Use the name "field" here to match the terminology used in
|
@@ -187,9 +170,7 @@ class Table(Struct, LGDOCollection):
|
|
187
170
|
new_size = len(obj) if use_obj_size else self.size
|
188
171
|
self.resize(new_size=new_size)
|
189
172
|
|
190
|
-
def add_column(
|
191
|
-
self, name: str, obj: LGDOCollection, use_obj_size: bool = False
|
192
|
-
) -> None:
|
173
|
+
def add_column(self, name: str, obj: LGDO, use_obj_size: bool = False) -> None:
|
193
174
|
"""Alias for :meth:`.add_field` using table terminology 'column'."""
|
194
175
|
self.add_field(name, obj, use_obj_size=use_obj_size)
|
195
176
|
|
@@ -220,10 +201,8 @@ class Table(Struct, LGDOCollection):
|
|
220
201
|
set to ``False`` to turn off warnings associated with mismatched
|
221
202
|
`loc` parameter or :meth:`add_column` warnings.
|
222
203
|
"""
|
223
|
-
if
|
224
|
-
log.warning(
|
225
|
-
f"len(other_table) ({len(other_table)}) != len(self) ({len(self)})"
|
226
|
-
)
|
204
|
+
if other_table.loc != self.loc and do_warn:
|
205
|
+
log.warning(f"other_table.loc ({other_table.loc}) != self.loc({self.loc})")
|
227
206
|
if cols is None:
|
228
207
|
cols = other_table.keys()
|
229
208
|
for name in cols:
|