legend-pydataobj 1.11.13__py3-none-any.whl → 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.11.13.dist-info → legend_pydataobj-1.12.0.dist-info}/METADATA +1 -1
- {legend_pydataobj-1.11.13.dist-info → legend_pydataobj-1.12.0.dist-info}/RECORD +25 -26
- {legend_pydataobj-1.11.13.dist-info → legend_pydataobj-1.12.0.dist-info}/WHEEL +1 -1
- lgdo/__init__.py +5 -4
- lgdo/_version.py +2 -2
- lgdo/lh5/__init__.py +1 -3
- lgdo/lh5/_serializers/read/composite.py +1 -3
- lgdo/lh5/_serializers/write/composite.py +2 -2
- lgdo/lh5/concat.py +3 -9
- lgdo/lh5/core.py +33 -32
- lgdo/lh5/iterator.py +48 -27
- lgdo/lh5/store.py +22 -75
- lgdo/lh5/tools.py +0 -111
- lgdo/lh5/utils.py +6 -4
- lgdo/types/array.py +84 -15
- lgdo/types/encoded.py +25 -20
- lgdo/types/histogram.py +1 -1
- lgdo/types/lgdo.py +50 -0
- lgdo/types/table.py +50 -28
- lgdo/types/vectorofvectors.py +132 -94
- lgdo/types/vovutils.py +14 -4
- lgdo/types/waveformtable.py +19 -21
- lgdo/lh5_store.py +0 -284
- {legend_pydataobj-1.11.13.dist-info → legend_pydataobj-1.12.0.dist-info}/entry_points.txt +0 -0
- {legend_pydataobj-1.11.13.dist-info → legend_pydataobj-1.12.0.dist-info}/licenses/LICENSE +0 -0
- {legend_pydataobj-1.11.13.dist-info → legend_pydataobj-1.12.0.dist-info}/top_level.txt +0 -0
lgdo/types/table.py
CHANGED
@@ -19,7 +19,7 @@ from pandas.io.formats import format as fmt
|
|
19
19
|
|
20
20
|
from .array import Array
|
21
21
|
from .arrayofequalsizedarrays import ArrayOfEqualSizedArrays
|
22
|
-
from .lgdo import LGDO
|
22
|
+
from .lgdo import LGDO, LGDOCollection
|
23
23
|
from .scalar import Scalar
|
24
24
|
from .struct import Struct
|
25
25
|
from .vectorofvectors import VectorOfVectors
|
@@ -27,13 +27,9 @@ from .vectorofvectors import VectorOfVectors
|
|
27
27
|
log = logging.getLogger(__name__)
|
28
28
|
|
29
29
|
|
30
|
-
class Table(Struct):
|
30
|
+
class Table(Struct, LGDOCollection):
|
31
31
|
"""A special struct of arrays or subtable columns of equal length.
|
32
32
|
|
33
|
-
Holds onto an internal read/write location ``loc`` that is useful in
|
34
|
-
managing table I/O using functions like :meth:`push_row`, :meth:`is_full`,
|
35
|
-
and :meth:`clear`.
|
36
|
-
|
37
33
|
Note
|
38
34
|
----
|
39
35
|
If you write to a table and don't fill it up to its total size, be sure to
|
@@ -49,7 +45,7 @@ class Table(Struct):
|
|
49
45
|
|
50
46
|
def __init__(
|
51
47
|
self,
|
52
|
-
col_dict: Mapping[str,
|
48
|
+
col_dict: Mapping[str, LGDOCollection] | pd.DataFrame | ak.Array | None = None,
|
53
49
|
size: int | None = None,
|
54
50
|
attrs: Mapping[str, Any] | None = None,
|
55
51
|
) -> None:
|
@@ -65,7 +61,7 @@ class Table(Struct):
|
|
65
61
|
col_dict
|
66
62
|
instantiate this table using the supplied mapping of column names
|
67
63
|
and array-like objects. Supported input types are: mapping of
|
68
|
-
strings to
|
64
|
+
strings to LGDOCollections, :class:`pd.DataFrame` and :class:`ak.Array`.
|
69
65
|
Note 1: no copy is performed, the objects are used directly (unless
|
70
66
|
:class:`ak.Array` is provided). Note 2: if `size` is not ``None``,
|
71
67
|
all arrays will be resized to match it. Note 3: if the arrays have
|
@@ -85,7 +81,9 @@ class Table(Struct):
|
|
85
81
|
col_dict = _ak_to_lgdo_or_col_dict(col_dict)
|
86
82
|
|
87
83
|
# call Struct constructor
|
88
|
-
|
84
|
+
Struct.__init__(self, obj_dict=col_dict, attrs=attrs)
|
85
|
+
# no need to call the LGDOCollection constructor, as we are calling the
|
86
|
+
# Struct constructor already
|
89
87
|
|
90
88
|
# if col_dict is not empty, set size according to it
|
91
89
|
# if size is also supplied, resize all fields to match it
|
@@ -93,13 +91,10 @@ class Table(Struct):
|
|
93
91
|
if col_dict is not None and len(col_dict) > 0:
|
94
92
|
self.resize(new_size=size, do_warn=(size is None))
|
95
93
|
|
96
|
-
# if no col_dict, just set the size
|
94
|
+
# if no col_dict, just set the size
|
97
95
|
else:
|
98
96
|
self.size = size if size is not None else None
|
99
97
|
|
100
|
-
# always start at loc=0
|
101
|
-
self.loc = 0
|
102
|
-
|
103
98
|
def datatype_name(self) -> str:
|
104
99
|
return "table"
|
105
100
|
|
@@ -107,7 +102,31 @@ class Table(Struct):
|
|
107
102
|
"""Provides ``__len__`` for this array-like class."""
|
108
103
|
return self.size
|
109
104
|
|
110
|
-
def
|
105
|
+
def reserve_capacity(self, capacity: int | list) -> None:
|
106
|
+
"Set size (number of rows) of internal memory buffer"
|
107
|
+
if isinstance(capacity, int):
|
108
|
+
for obj in self.values():
|
109
|
+
obj.reserve_capacity(capacity)
|
110
|
+
else:
|
111
|
+
if len(capacity) != len(self.keys()):
|
112
|
+
msg = "List of capacities must have same length as number of keys"
|
113
|
+
raise ValueError(msg)
|
114
|
+
|
115
|
+
for obj, cap in zip(self.values(), capacity):
|
116
|
+
obj.reserve_capacity(cap)
|
117
|
+
|
118
|
+
def get_capacity(self) -> int:
|
119
|
+
"Get list of capacities for each key"
|
120
|
+
return [v.get_capacity() for v in self.values()]
|
121
|
+
|
122
|
+
def trim_capacity(self) -> int:
|
123
|
+
"Set capacity to be minimum needed to support Array size"
|
124
|
+
for v in self.values():
|
125
|
+
v.trim_capacity()
|
126
|
+
|
127
|
+
def resize(
|
128
|
+
self, new_size: int | None = None, do_warn: bool = False, trim: bool = False
|
129
|
+
) -> None:
|
111
130
|
# if new_size = None, use the size from the first field
|
112
131
|
for field, obj in self.items():
|
113
132
|
if new_size is None:
|
@@ -119,21 +138,20 @@ class Table(Struct):
|
|
119
138
|
f"with size {len(obj)} != {new_size}"
|
120
139
|
)
|
121
140
|
if isinstance(obj, Table):
|
122
|
-
obj.resize(new_size)
|
141
|
+
obj.resize(new_size, trim)
|
123
142
|
else:
|
124
|
-
obj.resize(new_size)
|
143
|
+
obj.resize(new_size, trim)
|
125
144
|
self.size = new_size
|
126
145
|
|
127
|
-
def
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
def clear(self) -> None:
|
134
|
-
self.loc = 0
|
146
|
+
def insert(self, i: int, vals: dict) -> None:
|
147
|
+
"Insert vals into table at row i. Vals is a mapping from table key to val"
|
148
|
+
for k, ar in self.items():
|
149
|
+
ar.insert(i, vals[k])
|
150
|
+
self.size += 1
|
135
151
|
|
136
|
-
def add_field(
|
152
|
+
def add_field(
|
153
|
+
self, name: str, obj: LGDOCollection, use_obj_size: bool = False
|
154
|
+
) -> None:
|
137
155
|
"""Add a field (column) to the table.
|
138
156
|
|
139
157
|
Use the name "field" here to match the terminology used in
|
@@ -170,7 +188,9 @@ class Table(Struct):
|
|
170
188
|
new_size = len(obj) if use_obj_size else self.size
|
171
189
|
self.resize(new_size=new_size)
|
172
190
|
|
173
|
-
def add_column(
|
191
|
+
def add_column(
|
192
|
+
self, name: str, obj: LGDOCollection, use_obj_size: bool = False
|
193
|
+
) -> None:
|
174
194
|
"""Alias for :meth:`.add_field` using table terminology 'column'."""
|
175
195
|
self.add_field(name, obj, use_obj_size=use_obj_size)
|
176
196
|
|
@@ -201,8 +221,10 @@ class Table(Struct):
|
|
201
221
|
set to ``False`` to turn off warnings associated with mismatched
|
202
222
|
`loc` parameter or :meth:`add_column` warnings.
|
203
223
|
"""
|
204
|
-
if other_table
|
205
|
-
log.warning(
|
224
|
+
if len(other_table) != len(self) and do_warn:
|
225
|
+
log.warning(
|
226
|
+
f"len(other_table) ({len(other_table)}) != len(self) ({len(self)})"
|
227
|
+
)
|
206
228
|
if cols is None:
|
207
229
|
cols = other_table.keys()
|
208
230
|
for name in cols:
|
lgdo/types/vectorofvectors.py
CHANGED
@@ -20,12 +20,12 @@ from .. import utils
|
|
20
20
|
from . import arrayofequalsizedarrays as aoesa
|
21
21
|
from . import vovutils
|
22
22
|
from .array import Array
|
23
|
-
from .lgdo import
|
23
|
+
from .lgdo import LGDOCollection
|
24
24
|
|
25
25
|
log = logging.getLogger(__name__)
|
26
26
|
|
27
27
|
|
28
|
-
class VectorOfVectors(
|
28
|
+
class VectorOfVectors(LGDOCollection):
|
29
29
|
"""A n-dimensional variable-length 1D array of variable-length 1D arrays.
|
30
30
|
|
31
31
|
If the vector is 2-dimensional, the internal representation is as two NumPy
|
@@ -130,20 +130,48 @@ class VectorOfVectors(LGDO):
|
|
130
130
|
|
131
131
|
# ak.to_buffer helps in de-serialization
|
132
132
|
# NOTE: ak.to_packed() needed?
|
133
|
-
form,
|
134
|
-
|
135
|
-
#
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
)
|
133
|
+
form, _, container = ak.to_buffers(ak.to_packed(data))
|
134
|
+
|
135
|
+
# check if bytestring
|
136
|
+
curr = form
|
137
|
+
for _ in range(data.ndim - 1):
|
138
|
+
curr = curr.content
|
139
|
+
if (
|
140
|
+
"__array__" in curr.parameters
|
141
|
+
and curr.parameters["__array__"] == "bytestring"
|
142
|
+
):
|
143
|
+
diffs = np.diff(container[f"node{data.ndim - 1}-offsets"])
|
144
|
+
if (diffs != diffs[0]).all():
|
145
|
+
err_msg = "Non uniform string lengths not supported"
|
146
|
+
raise NotImplementedError(err_msg)
|
147
|
+
flattened_data = np.asarray(
|
148
|
+
ak.enforce_type(
|
149
|
+
ak.unflatten(
|
150
|
+
container.pop(
|
151
|
+
f"node{data.ndim}-data", np.empty(0, dtype=dtype)
|
152
|
+
),
|
153
|
+
diffs[0],
|
154
|
+
),
|
155
|
+
"bytes",
|
156
|
+
)
|
157
|
+
)
|
158
|
+
|
159
|
+
# if user-provided dtype is different than dtype from Awkward, cast
|
160
|
+
# NOTE: makes a copy only if needed
|
161
|
+
flattened_data = np.asarray(flattened_data, dtype=dtype)
|
162
|
+
else:
|
163
|
+
# NOTE: node#-data is not even in the dict if the awkward array is empty
|
164
|
+
# NOTE: if the data arg was a numpy array, to_buffers() preserves
|
165
|
+
# the original dtype
|
166
|
+
# FIXME: have to copy the buffers, otherwise self will not own the
|
167
|
+
# data and self.resize() will fail. Is it possible to avoid this?
|
168
|
+
flattened_data = np.copy(
|
169
|
+
container.pop(f"node{data.ndim - 1}-data", np.empty(0, dtype=dtype))
|
170
|
+
)
|
143
171
|
|
144
|
-
|
145
|
-
|
146
|
-
|
172
|
+
# if user-provided dtype is different than dtype from Awkward, cast
|
173
|
+
# NOTE: makes a copy only if needed
|
174
|
+
flattened_data = np.asarray(flattened_data, dtype=dtype)
|
147
175
|
|
148
176
|
# start from innermost VoV and build nested structure
|
149
177
|
for i in range(data.ndim - 2, -1, -1):
|
@@ -210,20 +238,17 @@ class VectorOfVectors(LGDO):
|
|
210
238
|
elif self.flattened_data is None:
|
211
239
|
self.flattened_data = flattened_data
|
212
240
|
|
213
|
-
|
214
|
-
self.dtype = self.flattened_data.dtype
|
215
|
-
|
216
|
-
# set ndim
|
217
|
-
self.ndim = 2
|
218
|
-
pointer = self.flattened_data
|
219
|
-
while True:
|
220
|
-
if isinstance(pointer, Array):
|
221
|
-
break
|
241
|
+
super().__init__(attrs)
|
222
242
|
|
223
|
-
|
224
|
-
|
243
|
+
@property
|
244
|
+
def ndim(self):
|
245
|
+
return 1 + (
|
246
|
+
1 if isinstance(self.flattened_data, Array) else self.flattened_data.ndim
|
247
|
+
)
|
225
248
|
|
226
|
-
|
249
|
+
@property
|
250
|
+
def dtype(self) -> np.dtype:
|
251
|
+
return self.flattened_data.dtype
|
227
252
|
|
228
253
|
def datatype_name(self) -> str:
|
229
254
|
return "array"
|
@@ -276,7 +301,30 @@ class VectorOfVectors(LGDO):
|
|
276
301
|
else:
|
277
302
|
raise NotImplementedError
|
278
303
|
|
279
|
-
def
|
304
|
+
def reserve_capacity(self, cap_cl, *cap_args) -> None:
|
305
|
+
"""Set capacity of internal data arrays. Expect number of args to
|
306
|
+
equal `self.n_dim`. First arg is capacity of cumulative length array.
|
307
|
+
If `self.n_dim` is 2, second argument is capacity of flattened data,
|
308
|
+
otherwise arguments are fed recursively to remaining dimensions.
|
309
|
+
"""
|
310
|
+
self.cumulative_length.reserve_capacity(cap_cl)
|
311
|
+
self.flattened_data.reserve_capacity(*cap_args)
|
312
|
+
|
313
|
+
def get_capacity(self) -> tuple[int]:
|
314
|
+
"""Get tuple containing capacity of each dimension. First dimension
|
315
|
+
is cumulative length array. Last dimension is flattened data.
|
316
|
+
"""
|
317
|
+
fd_cap = self.flattened_data.get_capacity()
|
318
|
+
if isinstance(fd_cap, int):
|
319
|
+
return (self.cumulative_length.get_capacity(), fd_cap)
|
320
|
+
return (self.cumulative_length.get_capacity(), *fd_cap)
|
321
|
+
|
322
|
+
def trim_capacity(self) -> None:
|
323
|
+
"Set capacity for all dimensions to minimum needed to hold data"
|
324
|
+
self.cumulative_length.trim_capacity()
|
325
|
+
self.flattened_data.trim_capacity()
|
326
|
+
|
327
|
+
def resize(self, new_size: int, trim: bool = False) -> None:
|
280
328
|
"""Resize vector along the first axis.
|
281
329
|
|
282
330
|
`self.flattened_data` is resized only if `new_size` is smaller than the
|
@@ -286,6 +334,8 @@ class VectorOfVectors(LGDO):
|
|
286
334
|
`self.cumulative_length` is padded with its last element. This
|
287
335
|
corresponds to appending empty vectors.
|
288
336
|
|
337
|
+
If `trim` is ``True``, resize capacity to match new size
|
338
|
+
|
289
339
|
Examples
|
290
340
|
--------
|
291
341
|
>>> vov = VectorOfVectors([[1, 2, 3], [4, 5]])
|
@@ -303,23 +353,22 @@ class VectorOfVectors(LGDO):
|
|
303
353
|
[3],
|
304
354
|
]
|
305
355
|
"""
|
306
|
-
vidx = self.cumulative_length
|
307
356
|
old_s = len(self)
|
308
|
-
dlen = new_size - old_s
|
309
|
-
csum = vidx[-1] if len(self) > 0 else 0
|
310
357
|
|
311
358
|
# first resize the cumulative length
|
312
|
-
self.cumulative_length.resize(new_size)
|
359
|
+
self.cumulative_length.resize(new_size, trim)
|
313
360
|
|
314
361
|
# if new_size > size, new elements are filled with zeros, let's fix
|
315
362
|
# that
|
316
|
-
if
|
317
|
-
self.cumulative_length[old_s:] =
|
363
|
+
if new_size > old_s:
|
364
|
+
self.cumulative_length[old_s:] = self.cumulative_length[old_s - 1]
|
318
365
|
|
319
366
|
# then resize the data array
|
320
367
|
# if dlen > 0 this has no effect
|
321
368
|
if len(self.cumulative_length) > 0:
|
322
|
-
self.flattened_data.resize(self.cumulative_length[-1])
|
369
|
+
self.flattened_data.resize(self.cumulative_length[-1], trim)
|
370
|
+
else:
|
371
|
+
self.flattened_data.resize(0, trim)
|
323
372
|
|
324
373
|
def append(self, new: NDArray) -> None:
|
325
374
|
"""Append a 1D vector `new` at the end.
|
@@ -334,20 +383,7 @@ class VectorOfVectors(LGDO):
|
|
334
383
|
[8 9],
|
335
384
|
]
|
336
385
|
"""
|
337
|
-
|
338
|
-
# first extend cumulative_length by +1
|
339
|
-
self.cumulative_length.resize(len(self) + 1)
|
340
|
-
# set it at the right value
|
341
|
-
newlen = (
|
342
|
-
self.cumulative_length[-2] + len(new) if len(self) > 1 else len(new)
|
343
|
-
)
|
344
|
-
self.cumulative_length[-1] = newlen
|
345
|
-
# then resize flattened_data to accommodate the new vector
|
346
|
-
self.flattened_data.resize(len(self.flattened_data) + len(new))
|
347
|
-
# finally set it
|
348
|
-
self[-1] = new
|
349
|
-
else:
|
350
|
-
raise NotImplementedError
|
386
|
+
self.insert(len(self), new)
|
351
387
|
|
352
388
|
def insert(self, i: int, new: NDArray) -> None:
|
353
389
|
"""Insert a vector at index `i`.
|
@@ -364,23 +400,15 @@ class VectorOfVectors(LGDO):
|
|
364
400
|
[8 9],
|
365
401
|
[4 5],
|
366
402
|
]
|
367
|
-
|
368
|
-
Warning
|
369
|
-
-------
|
370
|
-
This method involves a significant amount of memory re-allocation and
|
371
|
-
is expected to perform poorly on large vectors.
|
372
403
|
"""
|
373
404
|
if self.ndim == 2:
|
374
|
-
if i
|
375
|
-
msg = f"index {i} is out of bounds for vector
|
405
|
+
if i > len(self):
|
406
|
+
msg = f"index {i} is out of bounds for vector with size {len(self)}"
|
376
407
|
raise IndexError(msg)
|
377
408
|
|
378
|
-
self.
|
379
|
-
|
380
|
-
)
|
381
|
-
self.cumulative_length = Array(
|
382
|
-
np.insert(self.cumulative_length, i, self.cumulative_length[i - 1])
|
383
|
-
)
|
409
|
+
i_start = 0 if i == 0 else self.cumulative_length[i - 1]
|
410
|
+
self.flattened_data.insert(i_start, new)
|
411
|
+
self.cumulative_length.insert(i, i_start)
|
384
412
|
self.cumulative_length[i:] += np.uint32(len(new))
|
385
413
|
else:
|
386
414
|
raise NotImplementedError
|
@@ -400,11 +428,6 @@ class VectorOfVectors(LGDO):
|
|
400
428
|
[[8 9],
|
401
429
|
[4 5],
|
402
430
|
]
|
403
|
-
|
404
|
-
Warning
|
405
|
-
-------
|
406
|
-
This method involves a significant amount of memory re-allocation and
|
407
|
-
is expected to perform poorly on large vectors.
|
408
431
|
"""
|
409
432
|
if self.ndim == 2:
|
410
433
|
if i >= len(self):
|
@@ -414,27 +437,17 @@ class VectorOfVectors(LGDO):
|
|
414
437
|
vidx = self.cumulative_length
|
415
438
|
dlen = len(new) - len(self[i])
|
416
439
|
|
417
|
-
if dlen
|
418
|
-
#
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
)
|
429
|
-
else:
|
430
|
-
# set the already allocated indices
|
431
|
-
self.flattened_data[vidx[i - 1] : vidx[i]] = new[: len(self[i])]
|
432
|
-
# then insert the remaining
|
433
|
-
self.flattened_data = Array(
|
434
|
-
np.insert(self.flattened_data, vidx[i], new[len(self[i]) :])
|
435
|
-
)
|
436
|
-
|
437
|
-
vidx[i:] = vidx[i:] + dlen
|
440
|
+
if dlen != 0:
|
441
|
+
# move the subsequent entries
|
442
|
+
vidx[i:] += dlen
|
443
|
+
self.flattened_data.resize(vidx[-1])
|
444
|
+
self.flattened_data._nda[vidx[i] : vidx[-1]] = self.flattened_data._nda[
|
445
|
+
vidx[i] - dlen : vidx[-1] - dlen
|
446
|
+
]
|
447
|
+
|
448
|
+
# set the already allocated indices
|
449
|
+
start = vidx[i - 1] if i > 0 else 0
|
450
|
+
self.flattened_data[start : vidx[i]] = new
|
438
451
|
else:
|
439
452
|
raise NotImplementedError
|
440
453
|
|
@@ -484,7 +497,18 @@ class VectorOfVectors(LGDO):
|
|
484
497
|
cum_lens = np.add(start, lens.cumsum(), dtype=int)
|
485
498
|
|
486
499
|
# fill with fast vectorized routine
|
487
|
-
|
500
|
+
if np.issubdtype(self.flattened_data.dtype, np.unsignedinteger):
|
501
|
+
nan_val = np.iinfo(self.flattened_data.dtype).max
|
502
|
+
if np.issubdtype(self.flattened_data.dtype, np.integer):
|
503
|
+
nan_val = np.iinfo(self.flattened_data.dtype).min
|
504
|
+
else:
|
505
|
+
nan_val = np.nan
|
506
|
+
vovutils._nb_fill(
|
507
|
+
vec,
|
508
|
+
lens,
|
509
|
+
np.array([nan_val]).astype(self.flattened_data.nda.dtype),
|
510
|
+
self.flattened_data.nda[start : cum_lens[-1]],
|
511
|
+
)
|
488
512
|
|
489
513
|
# add new vector(s) length to cumulative_length
|
490
514
|
self.cumulative_length[i : i + len(lens)] = cum_lens
|
@@ -634,11 +658,25 @@ class VectorOfVectors(LGDO):
|
|
634
658
|
offsets[1:] = self.cumulative_length.nda
|
635
659
|
offsets[0] = 0
|
636
660
|
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
661
|
+
if self.ndim != 2:
|
662
|
+
content = self.flattened_data.view_as(
|
663
|
+
library, with_units=with_units
|
664
|
+
).layout
|
665
|
+
# need to handle strings separately
|
666
|
+
elif np.issubdtype(self.flattened_data.nda.dtype, np.bytes_):
|
667
|
+
byte_arrays = []
|
668
|
+
for s in self.flattened_data.nda:
|
669
|
+
# Convert each string to array of bytes
|
670
|
+
byte_array = np.frombuffer(s, dtype=np.uint8)
|
671
|
+
byte_arrays.append(byte_array)
|
672
|
+
max_len = max(len(b) for b in byte_arrays)
|
673
|
+
raw_arrays = ak.contents.NumpyArray(np.concatenate(byte_arrays))
|
674
|
+
array_of_chars = ak.contents.RegularArray(
|
675
|
+
raw_arrays, max_len, parameters={"__array__": "bytes"}
|
676
|
+
)
|
677
|
+
content = ak.enforce_type(array_of_chars, "bytes", highlevel=False)
|
678
|
+
else:
|
679
|
+
content = ak.contents.NumpyArray(self.flattened_data.nda)
|
642
680
|
|
643
681
|
layout = ak.contents.ListOffsetArray(
|
644
682
|
offsets=ak.index.Index(offsets),
|
lgdo/types/vovutils.py
CHANGED
@@ -81,7 +81,7 @@ def _nb_build_cl(sorted_array_in: NDArray, cumulative_length_out: NDArray) -> ND
|
|
81
81
|
|
82
82
|
@numba.guvectorize(
|
83
83
|
[
|
84
|
-
f"{data_type}[:,:],{size_type}[:],{data_type}[:]"
|
84
|
+
f"{data_type}[:,:],{size_type}[:],{data_type},{data_type}[:]"
|
85
85
|
for data_type in [
|
86
86
|
"b1",
|
87
87
|
"i1",
|
@@ -99,10 +99,12 @@ def _nb_build_cl(sorted_array_in: NDArray, cumulative_length_out: NDArray) -> ND
|
|
99
99
|
]
|
100
100
|
for size_type in ["i4", "i8", "u4", "u8"]
|
101
101
|
],
|
102
|
-
"(l,m),(l),(n)",
|
102
|
+
"(l,m),(l),(),(n)",
|
103
103
|
**nb_kwargs,
|
104
104
|
)
|
105
|
-
def _nb_fill(
|
105
|
+
def _nb_fill(
|
106
|
+
aoa_in: NDArray, len_in: NDArray, nan_val: int | float, flattened_array_out: NDArray
|
107
|
+
):
|
106
108
|
"""Vectorized function to fill flattened array from array of arrays and
|
107
109
|
lengths. Values in aoa_in past lengths will not be copied.
|
108
110
|
|
@@ -112,6 +114,9 @@ def _nb_fill(aoa_in: NDArray, len_in: NDArray, flattened_array_out: NDArray):
|
|
112
114
|
array of arrays containing values to be copied
|
113
115
|
len_in
|
114
116
|
array of vector lengths for each row of aoa_in
|
117
|
+
nan_val
|
118
|
+
value to use when len_in is longer than aoa_in. Should use
|
119
|
+
np.nan for floating point, and 0xfff... for integer types
|
115
120
|
flattened_array_out
|
116
121
|
flattened array to copy values into. Must be longer than sum of
|
117
122
|
lengths in len_in
|
@@ -122,9 +127,14 @@ def _nb_fill(aoa_in: NDArray, len_in: NDArray, flattened_array_out: NDArray):
|
|
122
127
|
raise ValueError(msg)
|
123
128
|
|
124
129
|
start = 0
|
130
|
+
max_len = aoa_in.shape[1]
|
125
131
|
for i, ll in enumerate(len_in):
|
126
132
|
stop = start + ll
|
127
|
-
|
133
|
+
if ll > max_len:
|
134
|
+
flattened_array_out[start : start + max_len] = aoa_in[i, :max_len]
|
135
|
+
flattened_array_out[start + max_len : stop] = nan_val
|
136
|
+
else:
|
137
|
+
flattened_array_out[start:stop] = aoa_in[i, :ll]
|
128
138
|
start = stop
|
129
139
|
|
130
140
|
|
lgdo/types/waveformtable.py
CHANGED
@@ -112,12 +112,10 @@ class WaveformTable(Table):
|
|
112
112
|
if not isinstance(t0, Array):
|
113
113
|
shape = (size,)
|
114
114
|
t0_dtype = t0.dtype if hasattr(t0, "dtype") else np.float32
|
115
|
-
|
116
|
-
t0
|
117
|
-
|
118
|
-
|
119
|
-
nda.resize(shape, refcheck=True)
|
120
|
-
t0 = Array(nda=nda)
|
115
|
+
if isinstance(t0, np.ndarray):
|
116
|
+
t0 = Array(nda=t0, shape=shape, dtype=t0_dtype)
|
117
|
+
else:
|
118
|
+
t0 = Array(fill_val=t0, shape=shape, dtype=t0_dtype)
|
121
119
|
|
122
120
|
if t0_units is not None:
|
123
121
|
t0.attrs["units"] = f"{t0_units}"
|
@@ -125,12 +123,11 @@ class WaveformTable(Table):
|
|
125
123
|
if not isinstance(dt, Array):
|
126
124
|
shape = (size,)
|
127
125
|
dt_dtype = dt.dtype if hasattr(dt, "dtype") else np.float32
|
128
|
-
|
129
|
-
dt
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
dt = Array(nda=nda)
|
126
|
+
if isinstance(dt, np.ndarray):
|
127
|
+
dt = Array(nda=dt, shape=shape, dtype=dt_dtype)
|
128
|
+
else:
|
129
|
+
dt = Array(fill_val=dt, shape=shape, dtype=dt_dtype)
|
130
|
+
|
134
131
|
if dt_units is not None:
|
135
132
|
dt.attrs["units"] = f"{dt_units}"
|
136
133
|
|
@@ -174,14 +171,15 @@ class WaveformTable(Table):
|
|
174
171
|
if hasattr(values, "dtype")
|
175
172
|
else np.dtype(np.float64)
|
176
173
|
)
|
177
|
-
|
178
|
-
values
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
174
|
+
if isinstance(values, np.ndarray):
|
175
|
+
values = ArrayOfEqualSizedArrays(
|
176
|
+
dims=(1, 1), nda=values, shape=shape, dtype=dtype
|
177
|
+
)
|
178
|
+
else:
|
179
|
+
values = ArrayOfEqualSizedArrays(
|
180
|
+
dims=(1, 1), fill_val=0, shape=shape, dtype=dtype
|
181
|
+
)
|
182
|
+
|
185
183
|
if values_units is not None:
|
186
184
|
values.attrs["units"] = f"{values_units}"
|
187
185
|
|
@@ -215,7 +213,7 @@ class WaveformTable(Table):
|
|
215
213
|
return
|
216
214
|
shape = self.values.nda.shape
|
217
215
|
shape = (shape[0], wf_len)
|
218
|
-
self.values.
|
216
|
+
self.values.resize(shape)
|
219
217
|
|
220
218
|
def resize_wf_len(self, new_len: int) -> None:
|
221
219
|
"""Alias for `wf_len.setter`, for when we want to make it clear in
|