legend-pydataobj 1.5.0a5__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.5.0a5.dist-info → legend_pydataobj-1.6.0.dist-info}/METADATA +1 -1
- legend_pydataobj-1.6.0.dist-info/RECORD +54 -0
- {legend_pydataobj-1.5.0a5.dist-info → legend_pydataobj-1.6.0.dist-info}/WHEEL +1 -1
- {legend_pydataobj-1.5.0a5.dist-info → legend_pydataobj-1.6.0.dist-info}/entry_points.txt +1 -0
- lgdo/__init__.py +7 -4
- lgdo/_version.py +2 -2
- lgdo/cli.py +237 -12
- lgdo/compression/__init__.py +1 -0
- lgdo/lh5/__init__.py +9 -1
- lgdo/lh5/_serializers/__init__.py +43 -0
- lgdo/lh5/_serializers/read/__init__.py +0 -0
- lgdo/lh5/_serializers/read/array.py +34 -0
- lgdo/lh5/_serializers/read/composite.py +405 -0
- lgdo/lh5/_serializers/read/encoded.py +129 -0
- lgdo/lh5/_serializers/read/ndarray.py +104 -0
- lgdo/lh5/_serializers/read/scalar.py +34 -0
- lgdo/lh5/_serializers/read/utils.py +12 -0
- lgdo/lh5/_serializers/read/vector_of_vectors.py +195 -0
- lgdo/lh5/_serializers/write/__init__.py +0 -0
- lgdo/lh5/_serializers/write/array.py +92 -0
- lgdo/lh5/_serializers/write/composite.py +259 -0
- lgdo/lh5/_serializers/write/scalar.py +23 -0
- lgdo/lh5/_serializers/write/vector_of_vectors.py +95 -0
- lgdo/lh5/core.py +272 -0
- lgdo/lh5/datatype.py +46 -0
- lgdo/lh5/exceptions.py +34 -0
- lgdo/lh5/iterator.py +1 -1
- lgdo/lh5/store.py +69 -1160
- lgdo/lh5/tools.py +27 -53
- lgdo/lh5/utils.py +130 -27
- lgdo/lh5_store.py +59 -2
- lgdo/logging.py +4 -3
- lgdo/types/__init__.py +1 -0
- lgdo/types/array.py +3 -0
- lgdo/types/arrayofequalsizedarrays.py +1 -0
- lgdo/types/encoded.py +3 -8
- lgdo/types/fixedsizearray.py +1 -0
- lgdo/types/struct.py +1 -0
- lgdo/types/table.py +69 -26
- lgdo/types/vectorofvectors.py +314 -458
- lgdo/types/vovutils.py +320 -0
- lgdo/types/waveformtable.py +1 -0
- lgdo/utils.py +1 -32
- legend_pydataobj-1.5.0a5.dist-info/RECORD +0 -36
- {legend_pydataobj-1.5.0a5.dist-info → legend_pydataobj-1.6.0.dist-info}/LICENSE +0 -0
- {legend_pydataobj-1.5.0a5.dist-info → legend_pydataobj-1.6.0.dist-info}/top_level.txt +0 -0
lgdo/types/table.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
Implements a LEGEND Data Object representing a special struct of arrays of
|
3
3
|
equal length and corresponding utilities.
|
4
4
|
"""
|
5
|
+
|
5
6
|
from __future__ import annotations
|
6
7
|
|
7
8
|
import logging
|
@@ -199,6 +200,11 @@ class Table(Struct):
|
|
199
200
|
) -> pd.DataFrame:
|
200
201
|
"""Get a :class:`pandas.DataFrame` from the data in the table.
|
201
202
|
|
203
|
+
Warning
|
204
|
+
-------
|
205
|
+
This method is deprecated. Use :meth:`.view_as` to view the table as a
|
206
|
+
Pandas dataframe.
|
207
|
+
|
202
208
|
Notes
|
203
209
|
-----
|
204
210
|
The requested data must be array-like, with the ``nda`` attribute.
|
@@ -224,6 +230,29 @@ class Table(Struct):
|
|
224
230
|
)
|
225
231
|
return self.view_as(library="pd", cols=cols, prefix=prefix)
|
226
232
|
|
233
|
+
def flatten(self, _prefix="") -> Table:
|
234
|
+
"""Flatten the table, if nested.
|
235
|
+
|
236
|
+
Returns a new :class:`Table` (that references, not copies, the existing
|
237
|
+
columns) with columns in nested tables being moved to the first level
|
238
|
+
(and renamed appropriately).
|
239
|
+
|
240
|
+
Examples
|
241
|
+
--------
|
242
|
+
>>> repr(tbl)
|
243
|
+
"Table(dict={'a': Array([1 2 3], attrs={'datatype': 'array<1>{real}'}), 'tbl': Table(dict={'b': Array([4 5 6], attrs={'datatype': 'array<1>{real}'}), 'tbl1': Table(dict={'z': Array([9 9 9], attrs={'datatype': 'array<1>{real}'})}, attrs={'datatype': 'table{z}'})}, attrs={'datatype': 'table{b,tbl1}'})}, attrs={'datatype': 'table{a,tbl}'})"
|
244
|
+
>>> tbl.flatten().keys()
|
245
|
+
dict_keys(['a', 'tbl__b', 'tbl__tbl1__z'])
|
246
|
+
"""
|
247
|
+
flat_table = Table(size=self.size)
|
248
|
+
for key, obj in self.items():
|
249
|
+
if isinstance(obj, Table):
|
250
|
+
flat_table.join(obj.flatten(_prefix=f"{_prefix}{key}__"))
|
251
|
+
else:
|
252
|
+
flat_table.add_column(_prefix + key, obj)
|
253
|
+
|
254
|
+
return flat_table
|
255
|
+
|
227
256
|
def eval(
|
228
257
|
self,
|
229
258
|
expr: str,
|
@@ -237,6 +266,13 @@ class Table(Struct):
|
|
237
266
|
columns are viewed as :class:`ak.Array` and the respective routines are
|
238
267
|
therefore available.
|
239
268
|
|
269
|
+
To columns nested in subtables can be accessed by scoping with two
|
270
|
+
underscores (``__``). For example: ::
|
271
|
+
|
272
|
+
tbl.eval("a + tbl2__b")
|
273
|
+
|
274
|
+
computes the sum of column `a` and column `b` in the subtable `tbl2`.
|
275
|
+
|
240
276
|
Parameters
|
241
277
|
----------
|
242
278
|
expr
|
@@ -280,15 +316,16 @@ class Table(Struct):
|
|
280
316
|
|
281
317
|
# make a dictionary of low-level objects (numpy or awkward)
|
282
318
|
# for later computation
|
319
|
+
flat_self = self.flatten()
|
283
320
|
self_unwrap = {}
|
284
321
|
has_ak = False
|
285
322
|
for obj in c.co_names:
|
286
|
-
if obj in
|
287
|
-
if isinstance(
|
288
|
-
self_unwrap[obj] =
|
323
|
+
if obj in flat_self:
|
324
|
+
if isinstance(flat_self[obj], VectorOfVectors):
|
325
|
+
self_unwrap[obj] = flat_self[obj].view_as("ak", with_units=False)
|
289
326
|
has_ak = True
|
290
327
|
else:
|
291
|
-
self_unwrap[obj] =
|
328
|
+
self_unwrap[obj] = flat_self[obj].view_as("np", with_units=False)
|
292
329
|
|
293
330
|
# use numexpr if we are only dealing with numpy data types
|
294
331
|
if not has_ak:
|
@@ -314,7 +351,7 @@ class Table(Struct):
|
|
314
351
|
|
315
352
|
# resort to good ol' eval()
|
316
353
|
globs = {"ak": ak, "np": np}
|
317
|
-
out_data = eval(expr, globs, (self_unwrap | parameters))
|
354
|
+
out_data = eval(expr, globs, (self_unwrap | parameters))
|
318
355
|
|
319
356
|
# need to convert back to LGDO
|
320
357
|
if isinstance(out_data, ak.Array):
|
@@ -381,41 +418,44 @@ class Table(Struct):
|
|
381
418
|
forward physical units to the output data.
|
382
419
|
cols
|
383
420
|
a list of column names specifying the subset of the table's columns
|
384
|
-
to be added to the
|
421
|
+
to be added to the data view structure.
|
385
422
|
prefix
|
386
|
-
The prefix to be added to the column names. Used when recursively
|
387
|
-
dataframe of a
|
423
|
+
The prefix to be added to the column names. Used when recursively
|
424
|
+
getting the dataframe of a :class:`Table` inside this
|
425
|
+
:class:`Table`.
|
388
426
|
|
389
427
|
See Also
|
390
428
|
--------
|
391
429
|
.LGDO.view_as
|
392
430
|
"""
|
431
|
+
if cols is None:
|
432
|
+
cols = self.keys()
|
433
|
+
|
393
434
|
if library == "pd":
|
394
435
|
df = pd.DataFrame()
|
395
|
-
|
396
|
-
cols = self.keys()
|
436
|
+
|
397
437
|
for col in cols:
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
elif isinstance(column, Table):
|
405
|
-
tmp_df = column.view_as(
|
438
|
+
data = self[col]
|
439
|
+
|
440
|
+
if isinstance(data, Table):
|
441
|
+
log.debug(f"viewing Table {col=!r} recursively")
|
442
|
+
|
443
|
+
tmp_df = data.view_as(
|
406
444
|
"pd", with_units=with_units, prefix=f"{prefix}{col}_"
|
407
445
|
)
|
408
|
-
|
409
|
-
|
410
|
-
|
446
|
+
for k, v in tmp_df.items():
|
447
|
+
df[k] = v
|
448
|
+
|
411
449
|
else:
|
412
|
-
|
413
|
-
|
450
|
+
log.debug(
|
451
|
+
f"viewing {type(data).__name__} column {col!r} as Pandas Series"
|
414
452
|
)
|
453
|
+
df[f"{prefix}{col}"] = data.view_as("pd", with_units=with_units)
|
454
|
+
|
415
455
|
return df
|
416
456
|
|
417
457
|
if library == "np":
|
418
|
-
msg = f"Format {library} is not supported for Tables."
|
458
|
+
msg = f"Format {library!r} is not supported for Tables."
|
419
459
|
raise TypeError(msg)
|
420
460
|
|
421
461
|
if library == "ak":
|
@@ -423,7 +463,10 @@ class Table(Struct):
|
|
423
463
|
msg = "Pint does not support Awkward yet, you must view the data with_units=False"
|
424
464
|
raise ValueError(msg)
|
425
465
|
|
426
|
-
|
466
|
+
# NOTE: passing the Table directly (which inherits from a dict)
|
467
|
+
# makes it somehow really slow. Not sure why, but this could be due
|
468
|
+
# to extra LGDO fields (like "attrs")
|
469
|
+
return ak.Array({col: self[col].view_as("ak") for col in cols})
|
427
470
|
|
428
|
-
msg = f"{library} is not a supported third-party format."
|
471
|
+
msg = f"{library!r} is not a supported third-party format."
|
429
472
|
raise TypeError(msg)
|