legend-pydataobj 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lgdo/types/__init__.py ADDED
@@ -0,0 +1,30 @@
1
+ """LEGEND Data Objects (LGDO) types."""
2
+
3
+ from .array import Array
4
+ from .arrayofequalsizedarrays import ArrayOfEqualSizedArrays
5
+ from .encoded import ArrayOfEncodedEqualSizedArrays, VectorOfEncodedVectors
6
+ from .fixedsizearray import FixedSizeArray
7
+ from .lgdo import LGDO
8
+ from .scalar import Scalar
9
+ from .struct import Struct
10
+ from .table import Table
11
+ from .vectorofvectors import VectorOfVectors
12
+ from .waveform_table import WaveformTable
13
+
14
+ __all__ = [
15
+ "Array",
16
+ "ArrayOfEqualSizedArrays",
17
+ "ArrayOfEncodedEqualSizedArrays",
18
+ "FixedSizeArray",
19
+ "LGDO",
20
+ "Scalar",
21
+ "Struct",
22
+ "Table",
23
+ "VectorOfVectors",
24
+ "VectorOfEncodedVectors",
25
+ "WaveformTable",
26
+ ]
27
+
28
+ import numpy as np
29
+
30
+ np.set_printoptions(threshold=10)
lgdo/types/array.py ADDED
@@ -0,0 +1,140 @@
1
+ """
2
+ Implements a LEGEND Data Object representing an n-dimensional array and
3
+ corresponding utilities.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import logging
8
+ from collections.abc import Iterator
9
+ from typing import Any
10
+
11
+ import numpy as np
12
+
13
+ from .. import lgdo_utils as utils
14
+ from .lgdo import LGDO
15
+
16
+ log = logging.getLogger(__name__)
17
+
18
+
19
+ class Array(LGDO):
20
+ r"""Holds an :class:`numpy.ndarray` and attributes.
21
+
22
+ :class:`Array` (and the other various array types) holds an `nda` instead
23
+ of deriving from :class:`numpy.ndarray` for the following reasons:
24
+
25
+ - It keeps management of the `nda` totally under the control of the user. The
26
+ user can point it to another object's buffer, grab the `nda` and toss the
27
+ :class:`Array`, etc.
28
+ - It allows the management code to send just the `nda`'s the central routines
29
+ for data manpulation. Keeping LGDO's out of that code allows for more
30
+ standard, reusable, and (we expect) performant Python.
31
+ - It allows the first axis of the `nda` to be treated as "special" for storage
32
+ in :class:`.Table`\ s.
33
+ """
34
+
35
+ def __init__(
36
+ self,
37
+ nda: np.ndarray = None,
38
+ shape: tuple[int, ...] = (),
39
+ dtype: np.dtype = None,
40
+ fill_val: float | int = None,
41
+ attrs: dict[str, Any] = None,
42
+ ) -> None:
43
+ """
44
+ Parameters
45
+ ----------
46
+ nda
47
+ An :class:`numpy.ndarray` to be used for this object's internal
48
+ array. Note: the array is used directly, not copied. If not
49
+ supplied, internal memory is newly allocated based on the shape and
50
+ dtype arguments.
51
+ shape
52
+ A numpy-format shape specification for shape of the internal
53
+ ndarray. Required if `nda` is ``None``, otherwise unused.
54
+ dtype
55
+ Specifies the type of the data in the array. Required if `nda` is
56
+ ``None``, otherwise unused.
57
+ fill_val
58
+ If ``None``, memory is allocated without initialization. Otherwise,
59
+ the array is allocated with all elements set to the corresponding
60
+ fill value. If `nda` is not ``None``, this parameter is ignored.
61
+ attrs
62
+ A set of user attributes to be carried along with this LGDO.
63
+ """
64
+ if nda is None:
65
+ if fill_val is None:
66
+ nda = np.empty(shape, dtype=dtype)
67
+ elif fill_val == 0:
68
+ nda = np.zeros(shape, dtype=dtype)
69
+ else:
70
+ nda = np.full(shape, fill_val, dtype=dtype)
71
+
72
+ elif isinstance(nda, Array):
73
+ nda = nda.nda
74
+
75
+ elif not isinstance(nda, np.ndarray):
76
+ nda = np.array(nda)
77
+
78
+ self.nda = nda
79
+ self.dtype = self.nda.dtype
80
+
81
+ super().__init__(attrs)
82
+
83
+ def datatype_name(self) -> str:
84
+ return "array"
85
+
86
+ def form_datatype(self) -> str:
87
+ dt = self.datatype_name()
88
+ nd = str(len(self.nda.shape))
89
+ et = utils.get_element_type(self)
90
+ return dt + "<" + nd + ">{" + et + "}"
91
+
92
+ def __len__(self) -> int:
93
+ return len(self.nda)
94
+
95
+ def resize(self, new_size: int) -> None:
96
+ new_shape = (new_size,) + self.nda.shape[1:]
97
+ return self.nda.resize(new_shape, refcheck=True)
98
+
99
+ def append(self, value: np.ndarray) -> None:
100
+ self.resize(len(self) + 1)
101
+ self.nda[-1] = value
102
+
103
+ def insert(self, i: int, value: int | float) -> None:
104
+ self.nda = np.insert(self.nda, i, value)
105
+
106
+ def __getitem__(self, key):
107
+ return self.nda[key]
108
+
109
+ def __setitem__(self, key, value):
110
+ return self.nda.__setitem__(key, value)
111
+
112
+ def __eq__(self, other: Array) -> bool:
113
+ if isinstance(other, Array):
114
+ return self.attrs == other.attrs and np.array_equal(self.nda, other.nda)
115
+ else:
116
+ return False
117
+
118
+ def __iter__(self) -> Iterator:
119
+ yield from self.nda
120
+
121
+ def __str__(self) -> str:
122
+ attrs = self.getattrs()
123
+ string = str(self.nda)
124
+ if attrs:
125
+ string += f" with attrs={attrs}"
126
+ return string
127
+
128
+ def __repr__(self) -> str:
129
+ return (
130
+ self.__class__.__name__
131
+ + "("
132
+ + np.array2string(
133
+ self.nda,
134
+ prefix=self.__class__.__name__ + " ",
135
+ formatter={
136
+ "int": lambda x: f"0x{x:02x}" if self.dtype == np.ubyte else str(x)
137
+ },
138
+ )
139
+ + f", attrs={repr(self.attrs)})"
140
+ )
@@ -0,0 +1,133 @@
1
+ """
2
+ Implements a LEGEND Data Object representing an array of equal-sized arrays and
3
+ corresponding utilities.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ from collections.abc import Iterator
8
+ from typing import Any
9
+
10
+ import numpy as np
11
+
12
+ from .. import lgdo_utils as utils
13
+ from . import vectorofvectors as vov
14
+ from .array import Array
15
+
16
+
17
+ class ArrayOfEqualSizedArrays(Array):
18
+ """An array of equal-sized arrays.
19
+
20
+ Arrays of equal size within a file but could be different from application
21
+ to application. Canonical example: array of same-length waveforms.
22
+ """
23
+
24
+ def __init__(
25
+ self,
26
+ dims: tuple[int, ...] = None,
27
+ nda: np.ndarray = None,
28
+ shape: tuple[int, ...] = (),
29
+ dtype: np.dtype = None,
30
+ fill_val: int | float = None,
31
+ attrs: dict[str, Any] = None,
32
+ ) -> None:
33
+ """
34
+ Parameters
35
+ ----------
36
+ dims
37
+ specifies the dimensions required for building the
38
+ :class:`ArrayOfEqualSizedArrays`' `datatype` attribute.
39
+ nda
40
+ An :class:`numpy.ndarray` to be used for this object's internal
41
+ array. Note: the array is used directly, not copied. If not
42
+ supplied, internal memory is newly allocated based on the `shape`
43
+ and `dtype` arguments.
44
+ shape
45
+ A NumPy-format shape specification for shape of the internal
46
+ array. Required if `nda` is ``None``, otherwise unused.
47
+ dtype
48
+ Specifies the type of the data in the array. Required if `nda` is
49
+ ``None``, otherwise unused.
50
+ fill_val
51
+ If ``None``, memory is allocated without initialization. Otherwise,
52
+ the array is allocated with all elements set to the corresponding
53
+ fill value. If `nda` is not ``None``, this parameter is ignored.
54
+ attrs
55
+ A set of user attributes to be carried along with this LGDO.
56
+
57
+ Notes
58
+ -----
59
+ If shape is not "1D array of arrays of shape given by axes 1-N" (of
60
+ `nda`) then specify the dimensionality split in the constructor.
61
+
62
+ See Also
63
+ --------
64
+ :class:`.Array`
65
+ """
66
+ if dims is None:
67
+ # If no dims are provided, assume that it's a 1D Array of (N-1)-D Arrays
68
+ if nda is None:
69
+ s = shape
70
+ else:
71
+ s = nda.shape
72
+ self.dims = (1, len(s) - 1)
73
+ else:
74
+ self.dims = dims
75
+ super().__init__(
76
+ nda=nda, shape=shape, dtype=dtype, fill_val=fill_val, attrs=attrs
77
+ )
78
+
79
+ def datatype_name(self) -> str:
80
+ return "array_of_equalsized_arrays"
81
+
82
+ def form_datatype(self) -> str:
83
+ dt = self.datatype_name()
84
+ nd = str(len(self.nda.shape))
85
+ if self.dims is not None:
86
+ nd = ",".join([str(i) for i in self.dims])
87
+ et = utils.get_element_type(self)
88
+ return dt + "<" + nd + ">{" + et + "}"
89
+
90
+ def __len__(self) -> int:
91
+ return len(self.nda)
92
+
93
+ def __iter__(self) -> Iterator[np.array]:
94
+ return self.nda.__iter__()
95
+
96
+ def __next__(self) -> np.ndarray:
97
+ return self.nda.__next__()
98
+
99
+ def to_vov(self, cumulative_length: np.ndarray = None) -> vov.VectorOfVectors:
100
+ """Convert (and eventually resize) to :class:`.vectorofvectors.VectorOfVectors`.
101
+
102
+ Parameters
103
+ ----------
104
+ cumulative_length
105
+ cumulative length array of the output vector of vectors. Each
106
+ vector in the output is filled with values found in the
107
+ :class:`ArrayOfEqualSizedArrays`, starting from the first index. if
108
+ ``None``, use all of the original 2D array and make vectors of
109
+ equal size.
110
+ """
111
+ attrs = self.getattrs()
112
+
113
+ if cumulative_length is None:
114
+ return vov.VectorOfVectors(
115
+ flattened_data=self.nda.flatten(),
116
+ cumulative_length=(np.arange(self.nda.shape[0], dtype="uint32") + 1)
117
+ * self.nda.shape[1],
118
+ attrs=attrs,
119
+ )
120
+
121
+ if not isinstance(cumulative_length, np.ndarray):
122
+ cumulative_length = np.array(cumulative_length)
123
+
124
+ flattened_data = self.nda[
125
+ np.arange(self.nda.shape[1])
126
+ < np.diff(cumulative_length, prepend=0)[:, None]
127
+ ]
128
+
129
+ return vov.VectorOfVectors(
130
+ flattened_data=flattened_data,
131
+ cumulative_length=cumulative_length,
132
+ attrs=attrs,
133
+ )
lgdo/types/encoded.py ADDED
@@ -0,0 +1,390 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterator
4
+ from typing import Any
5
+
6
+ import numpy as np
7
+ from numpy.typing import NDArray
8
+
9
+ from .. import lgdo_utils as utils
10
+ from .array import Array
11
+ from .lgdo import LGDO
12
+ from .scalar import Scalar
13
+ from .vectorofvectors import VectorOfVectors
14
+
15
+
16
+ class VectorOfEncodedVectors(LGDO):
17
+ """An array of variable-length encoded arrays.
18
+
19
+ Used to represent an encoded :class:`.VectorOfVectors`. In addition to an
20
+ internal :class:`.VectorOfVectors` `self.encoded_data` storing the encoded
21
+ data, a 1D :class:`.Array` in `self.encoded_size` holds the original sizes
22
+ of the encoded vectors.
23
+
24
+ See Also
25
+ --------
26
+ .VectorOfVectors
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ encoded_data: VectorOfVectors = None,
32
+ decoded_size: Array = None,
33
+ attrs: dict[str, Any] = None,
34
+ ) -> None:
35
+ """
36
+ Parameters
37
+ ----------
38
+ encoded_data
39
+ the vector of encoded vectors.
40
+ decoded_size
41
+ an array holding the original length of each encoded vector in
42
+ `encoded_data`.
43
+ attrs
44
+ A set of user attributes to be carried along with this LGDO. Should
45
+ include information about the codec used to encode the data.
46
+ """
47
+ if isinstance(encoded_data, VectorOfVectors):
48
+ self.encoded_data = encoded_data
49
+ elif encoded_data is None:
50
+ self.encoded_data = VectorOfVectors(dtype="ubyte")
51
+ else:
52
+ raise ValueError("encoded_data must be a valid VectorOfVectors")
53
+
54
+ if isinstance(decoded_size, Array):
55
+ self.decoded_size = decoded_size
56
+ elif decoded_size is not None:
57
+ self.decoded_size = Array(decoded_size)
58
+ elif encoded_data is not None:
59
+ self.decoded_size = Array(
60
+ shape=len(encoded_data), dtype="uint32", fill_val=0
61
+ )
62
+ elif decoded_size is None:
63
+ self.decoded_size = Array()
64
+
65
+ if len(self.encoded_data) != len(self.decoded_size):
66
+ raise RuntimeError("encoded_data vs. decoded_size shape mismatch")
67
+
68
+ super().__init__(attrs)
69
+
70
+ def datatype_name(self) -> str:
71
+ return "array"
72
+
73
+ def form_datatype(self) -> str:
74
+ et = utils.get_element_type(self.encoded_data)
75
+ return "array<1>{encoded_array<1>{" + et + "}}"
76
+
77
+ def __len__(self) -> int:
78
+ return len(self.encoded_data)
79
+
80
+ def __eq__(self, other: VectorOfEncodedVectors) -> bool:
81
+ if isinstance(other, VectorOfEncodedVectors):
82
+ return (
83
+ self.encoded_data == other.encoded_data
84
+ and self.decoded_size == other.decoded_size
85
+ and self.attrs == other.attrs
86
+ )
87
+
88
+ else:
89
+ return False
90
+
91
+ def resize(self, new_size: int) -> None:
92
+ """Resize vector along the first axis.
93
+
94
+ See Also
95
+ --------
96
+ .VectorOfVectors.resize
97
+ """
98
+ self.encoded_data.resize(new_size)
99
+ self.decoded_size.resize(new_size)
100
+
101
+ def append(self, value: tuple[NDArray, int]) -> None:
102
+ """Append a 1D encoded vector at the end.
103
+
104
+ Parameters
105
+ ----------
106
+ value
107
+ a tuple holding the encoded array and its decoded size.
108
+
109
+ See Also
110
+ --------
111
+ .VectorOfVectors.append
112
+ """
113
+ self.encoded_data.append(value[0])
114
+ self.decoded_size.append(value[1])
115
+
116
+ def insert(self, i: int, value: tuple[NDArray, int]) -> None:
117
+ """Insert an encoded vector at index `i`.
118
+
119
+ Parameters
120
+ ----------
121
+ i
122
+ the new vector will be inserted before this index.
123
+ value
124
+ a tuple holding the encoded array and its decoded size.
125
+
126
+ See Also
127
+ --------
128
+ .VectorOfVectors.insert
129
+ """
130
+ self.encoded_data.insert(i, value[0])
131
+ self.decoded_size.insert(i, value[1])
132
+
133
+ def replace(self, i: int, value: tuple[NDArray, int]) -> None:
134
+ """Replace the encoded vector (and decoded size) at index `i` with a new one.
135
+
136
+ Parameters
137
+ ----------
138
+ i
139
+ index of the vector to be replaced.
140
+ value
141
+ a tuple holding the encoded array and its decoded size.
142
+
143
+ See Also
144
+ --------
145
+ .VectorOfVectors.replace
146
+ """
147
+ self.encoded_data.replace(i, value[0])
148
+ self.decoded_size[i] = value[1]
149
+
150
+ def __setitem__(self, i: int, value: tuple[NDArray, int]) -> None:
151
+ """Set an encoded vector at index `i`.
152
+
153
+ Parameters
154
+ ----------
155
+ i
156
+ the new vector will be set at this index.
157
+ value
158
+ a tuple holding the encoded array and its decoded size.
159
+ """
160
+ self.encoded_data[i] = value[0]
161
+ self.decoded_size[i] = value[1]
162
+
163
+ def __getitem__(self, i: int) -> tuple[NDArray, int]:
164
+ """Return vector at index `i`.
165
+
166
+ Returns
167
+ -------
168
+ (encoded_data, decoded_size)
169
+ the encoded array and its decoded length.
170
+ """
171
+ return (self.encoded_data[i], self.decoded_size[i])
172
+
173
+ def __iter__(self) -> Iterator[tuple[NDArray, int]]:
174
+ yield from zip(self.encoded_data, self.decoded_size)
175
+
176
+ def __str__(self) -> str:
177
+ string = ""
178
+ pos = 0
179
+ for vec, size in self:
180
+ if pos != 0:
181
+ string += " "
182
+
183
+ string += (
184
+ np.array2string(
185
+ vec,
186
+ prefix=" ",
187
+ formatter={
188
+ "int": lambda x, vec=vec: f"0x{x:02x}"
189
+ if vec.dtype == np.ubyte
190
+ else str(x)
191
+ },
192
+ )
193
+ + f" decoded_size = {size}"
194
+ )
195
+
196
+ if pos < len(self.encoded_data.cumulative_length):
197
+ string += ",\n"
198
+
199
+ pos += 1
200
+
201
+ string = f"[{string}]"
202
+
203
+ attrs = self.getattrs()
204
+ if len(attrs) > 0:
205
+ string += f" with attrs={attrs}"
206
+
207
+ return string
208
+
209
+ def __repr__(self) -> str:
210
+ npopt = np.get_printoptions()
211
+ np.set_printoptions(
212
+ threshold=5,
213
+ edgeitems=2,
214
+ linewidth=100,
215
+ )
216
+ out = (
217
+ "VectorOfEncodedVectors(encoded_data="
218
+ + repr(self.encoded_data)
219
+ + ", decoded_size="
220
+ + repr(self.decoded_size)
221
+ + ", attrs="
222
+ + repr(self.attrs)
223
+ + ")"
224
+ )
225
+ np.set_printoptions(**npopt)
226
+ return out
227
+
228
+
229
+ class ArrayOfEncodedEqualSizedArrays(LGDO):
230
+ """An array of encoded arrays with equal decoded size.
231
+
232
+ Used to represent an encoded :class:`.ArrayOfEqualSizedArrays`. In addition
233
+ to an internal :class:`.VectorOfVectors` `self.encoded_data` storing the
234
+ encoded data, the size of the decoded arrays is stored in a
235
+ :class:`.Scalar` `self.encoded_size`.
236
+
237
+ See Also
238
+ --------
239
+ .ArrayOfEqualSizedArrays
240
+ """
241
+
242
+ def __init__(
243
+ self,
244
+ encoded_data: VectorOfVectors = None,
245
+ decoded_size: Scalar | int = None,
246
+ attrs: dict[str, Any] = None,
247
+ ) -> None:
248
+ """
249
+ Parameters
250
+ ----------
251
+ encoded_data
252
+ the vector of vectors holding the encoded data.
253
+ decoded_size
254
+ the length of the decoded arrays.
255
+ attrs
256
+ A set of user attributes to be carried along with this LGDO. Should
257
+ include information about the codec used to encode the data.
258
+ """
259
+ if isinstance(encoded_data, VectorOfVectors):
260
+ self.encoded_data = encoded_data
261
+ elif encoded_data is None:
262
+ self.encoded_data = VectorOfVectors(dtype="ubyte")
263
+ else:
264
+ raise ValueError("encoded_data must be a valid VectorOfVectors")
265
+
266
+ if isinstance(decoded_size, Scalar):
267
+ self.decoded_size = decoded_size
268
+ elif decoded_size is not None:
269
+ self.decoded_size = Scalar(int(decoded_size))
270
+ else:
271
+ self.decoded_size = Scalar(0)
272
+
273
+ super().__init__(attrs)
274
+
275
+ def datatype_name(self) -> str:
276
+ return "array"
277
+
278
+ def form_datatype(self) -> str:
279
+ et = utils.get_element_type(self.encoded_data)
280
+ return "array_of_encoded_equalsized_arrays<1,1>{" + et + "}"
281
+
282
+ def __len__(self) -> int:
283
+ return len(self.encoded_data)
284
+
285
+ def __eq__(self, other: ArrayOfEncodedEqualSizedArrays) -> bool:
286
+ if isinstance(other, ArrayOfEncodedEqualSizedArrays):
287
+ return (
288
+ self.encoded_data == other.encoded_data
289
+ and self.decoded_size == other.decoded_size
290
+ and self.attrs == other.attrs
291
+ )
292
+
293
+ else:
294
+ return False
295
+
296
+ def resize(self, new_size: int) -> None:
297
+ """Resize array along the first axis.
298
+
299
+ See Also
300
+ --------
301
+ .VectorOfVectors.resize
302
+ """
303
+ self.encoded_data.resize(new_size)
304
+
305
+ def append(self, value: NDArray) -> None:
306
+ """Append a 1D encoded array at the end.
307
+
308
+ See Also
309
+ --------
310
+ .VectorOfVectors.append
311
+ """
312
+ self.encoded_data.append(value)
313
+
314
+ def insert(self, i: int, value: NDArray) -> None:
315
+ """Insert an encoded array at index `i`.
316
+
317
+ See Also
318
+ --------
319
+ .VectorOfVectors.insert
320
+ """
321
+ self.encoded_data.insert(i, value)
322
+
323
+ def replace(self, i: int, value: NDArray) -> None:
324
+ """Replace the encoded array at index `i` with a new one.
325
+
326
+ See Also
327
+ --------
328
+ .VectorOfVectors.replace
329
+ """
330
+ self.encoded_data.replace(i, value)
331
+
332
+ def __setitem__(self, i: int, value: NDArray) -> None:
333
+ """Set an encoded array at index `i`."""
334
+ self.encoded_data[i] = value
335
+
336
+ def __getitem__(self, i: int) -> NDArray:
337
+ """Return encoded array at index `i`."""
338
+ return self.encoded_data[i]
339
+
340
+ def __iter__(self) -> Iterator[NDArray]:
341
+ yield from self.encoded_data
342
+
343
+ def __str__(self) -> str:
344
+ string = ""
345
+ pos = 0
346
+ for vec in self:
347
+ if pos != 0:
348
+ string += " "
349
+
350
+ string += np.array2string(
351
+ vec,
352
+ prefix=" ",
353
+ formatter={
354
+ "int": lambda x, vec=vec: f"0x{x:02x}"
355
+ if vec.dtype == np.ubyte
356
+ else str(x)
357
+ },
358
+ )
359
+
360
+ if pos < len(self.encoded_data.cumulative_length):
361
+ string += ",\n"
362
+
363
+ pos += 1
364
+
365
+ string = f"[{string}] decoded_size={self.decoded_size}"
366
+
367
+ attrs = self.getattrs()
368
+ if len(attrs) > 0:
369
+ string += f" with attrs={attrs}"
370
+
371
+ return string
372
+
373
+ def __repr__(self) -> str:
374
+ npopt = np.get_printoptions()
375
+ np.set_printoptions(
376
+ threshold=5,
377
+ edgeitems=2,
378
+ linewidth=100,
379
+ )
380
+ out = (
381
+ "ArrayOfEncodedEqualSizedArrays(encoded_data="
382
+ + repr(self.encoded_data)
383
+ + ", decoded_size="
384
+ + repr(self.decoded_size)
385
+ + ", attrs="
386
+ + repr(self.attrs)
387
+ + ")"
388
+ )
389
+ np.set_printoptions(**npopt)
390
+ return out