legend-pydataobj 1.5.0a5__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {legend_pydataobj-1.5.0a5.dist-info → legend_pydataobj-1.6.0.dist-info}/METADATA +1 -1
  2. legend_pydataobj-1.6.0.dist-info/RECORD +54 -0
  3. {legend_pydataobj-1.5.0a5.dist-info → legend_pydataobj-1.6.0.dist-info}/WHEEL +1 -1
  4. {legend_pydataobj-1.5.0a5.dist-info → legend_pydataobj-1.6.0.dist-info}/entry_points.txt +1 -0
  5. lgdo/__init__.py +7 -4
  6. lgdo/_version.py +2 -2
  7. lgdo/cli.py +237 -12
  8. lgdo/compression/__init__.py +1 -0
  9. lgdo/lh5/__init__.py +9 -1
  10. lgdo/lh5/_serializers/__init__.py +43 -0
  11. lgdo/lh5/_serializers/read/__init__.py +0 -0
  12. lgdo/lh5/_serializers/read/array.py +34 -0
  13. lgdo/lh5/_serializers/read/composite.py +405 -0
  14. lgdo/lh5/_serializers/read/encoded.py +129 -0
  15. lgdo/lh5/_serializers/read/ndarray.py +104 -0
  16. lgdo/lh5/_serializers/read/scalar.py +34 -0
  17. lgdo/lh5/_serializers/read/utils.py +12 -0
  18. lgdo/lh5/_serializers/read/vector_of_vectors.py +195 -0
  19. lgdo/lh5/_serializers/write/__init__.py +0 -0
  20. lgdo/lh5/_serializers/write/array.py +92 -0
  21. lgdo/lh5/_serializers/write/composite.py +259 -0
  22. lgdo/lh5/_serializers/write/scalar.py +23 -0
  23. lgdo/lh5/_serializers/write/vector_of_vectors.py +95 -0
  24. lgdo/lh5/core.py +272 -0
  25. lgdo/lh5/datatype.py +46 -0
  26. lgdo/lh5/exceptions.py +34 -0
  27. lgdo/lh5/iterator.py +1 -1
  28. lgdo/lh5/store.py +69 -1160
  29. lgdo/lh5/tools.py +27 -53
  30. lgdo/lh5/utils.py +130 -27
  31. lgdo/lh5_store.py +59 -2
  32. lgdo/logging.py +4 -3
  33. lgdo/types/__init__.py +1 -0
  34. lgdo/types/array.py +3 -0
  35. lgdo/types/arrayofequalsizedarrays.py +1 -0
  36. lgdo/types/encoded.py +3 -8
  37. lgdo/types/fixedsizearray.py +1 -0
  38. lgdo/types/struct.py +1 -0
  39. lgdo/types/table.py +69 -26
  40. lgdo/types/vectorofvectors.py +314 -458
  41. lgdo/types/vovutils.py +320 -0
  42. lgdo/types/waveformtable.py +1 -0
  43. lgdo/utils.py +1 -32
  44. legend_pydataobj-1.5.0a5.dist-info/RECORD +0 -36
  45. {legend_pydataobj-1.5.0a5.dist-info → legend_pydataobj-1.6.0.dist-info}/LICENSE +0 -0
  46. {legend_pydataobj-1.5.0a5.dist-info → legend_pydataobj-1.6.0.dist-info}/top_level.txt +0 -0
lgdo/types/table.py CHANGED
@@ -2,6 +2,7 @@
2
2
  Implements a LEGEND Data Object representing a special struct of arrays of
3
3
  equal length and corresponding utilities.
4
4
  """
5
+
5
6
  from __future__ import annotations
6
7
 
7
8
  import logging
@@ -199,6 +200,11 @@ class Table(Struct):
199
200
  ) -> pd.DataFrame:
200
201
  """Get a :class:`pandas.DataFrame` from the data in the table.
201
202
 
203
+ Warning
204
+ -------
205
+ This method is deprecated. Use :meth:`.view_as` to view the table as a
206
+ Pandas dataframe.
207
+
202
208
  Notes
203
209
  -----
204
210
  The requested data must be array-like, with the ``nda`` attribute.
@@ -224,6 +230,29 @@ class Table(Struct):
224
230
  )
225
231
  return self.view_as(library="pd", cols=cols, prefix=prefix)
226
232
 
233
+ def flatten(self, _prefix="") -> Table:
234
+ """Flatten the table, if nested.
235
+
236
+ Returns a new :class:`Table` (that references, not copies, the existing
237
+ columns) with columns in nested tables being moved to the first level
238
+ (and renamed appropriately).
239
+
240
+ Examples
241
+ --------
242
+ >>> repr(tbl)
243
+ "Table(dict={'a': Array([1 2 3], attrs={'datatype': 'array<1>{real}'}), 'tbl': Table(dict={'b': Array([4 5 6], attrs={'datatype': 'array<1>{real}'}), 'tbl1': Table(dict={'z': Array([9 9 9], attrs={'datatype': 'array<1>{real}'})}, attrs={'datatype': 'table{z}'})}, attrs={'datatype': 'table{b,tbl1}'})}, attrs={'datatype': 'table{a,tbl}'})"
244
+ >>> tbl.flatten().keys()
245
+ dict_keys(['a', 'tbl__b', 'tbl__tbl1__z'])
246
+ """
247
+ flat_table = Table(size=self.size)
248
+ for key, obj in self.items():
249
+ if isinstance(obj, Table):
250
+ flat_table.join(obj.flatten(_prefix=f"{_prefix}{key}__"))
251
+ else:
252
+ flat_table.add_column(_prefix + key, obj)
253
+
254
+ return flat_table
255
+
227
256
  def eval(
228
257
  self,
229
258
  expr: str,
@@ -237,6 +266,13 @@ class Table(Struct):
237
266
  columns are viewed as :class:`ak.Array` and the respective routines are
238
267
  therefore available.
239
268
 
269
+ To columns nested in subtables can be accessed by scoping with two
270
+ underscores (``__``). For example: ::
271
+
272
+ tbl.eval("a + tbl2__b")
273
+
274
+ computes the sum of column `a` and column `b` in the subtable `tbl2`.
275
+
240
276
  Parameters
241
277
  ----------
242
278
  expr
@@ -280,15 +316,16 @@ class Table(Struct):
280
316
 
281
317
  # make a dictionary of low-level objects (numpy or awkward)
282
318
  # for later computation
319
+ flat_self = self.flatten()
283
320
  self_unwrap = {}
284
321
  has_ak = False
285
322
  for obj in c.co_names:
286
- if obj in self.keys():
287
- if isinstance(self[obj], VectorOfVectors):
288
- self_unwrap[obj] = self[obj].view_as("ak", with_units=False)
323
+ if obj in flat_self:
324
+ if isinstance(flat_self[obj], VectorOfVectors):
325
+ self_unwrap[obj] = flat_self[obj].view_as("ak", with_units=False)
289
326
  has_ak = True
290
327
  else:
291
- self_unwrap[obj] = self[obj].view_as("np", with_units=False)
328
+ self_unwrap[obj] = flat_self[obj].view_as("np", with_units=False)
292
329
 
293
330
  # use numexpr if we are only dealing with numpy data types
294
331
  if not has_ak:
@@ -314,7 +351,7 @@ class Table(Struct):
314
351
 
315
352
  # resort to good ol' eval()
316
353
  globs = {"ak": ak, "np": np}
317
- out_data = eval(expr, globs, (self_unwrap | parameters)) # noqa: PGH001
354
+ out_data = eval(expr, globs, (self_unwrap | parameters))
318
355
 
319
356
  # need to convert back to LGDO
320
357
  if isinstance(out_data, ak.Array):
@@ -381,41 +418,44 @@ class Table(Struct):
381
418
  forward physical units to the output data.
382
419
  cols
383
420
  a list of column names specifying the subset of the table's columns
384
- to be added to the dataframe.
421
+ to be added to the data view structure.
385
422
  prefix
386
- The prefix to be added to the column names. Used when recursively getting the
387
- dataframe of a table inside this table.
423
+ The prefix to be added to the column names. Used when recursively
424
+ getting the dataframe of a :class:`Table` inside this
425
+ :class:`Table`.
388
426
 
389
427
  See Also
390
428
  --------
391
429
  .LGDO.view_as
392
430
  """
431
+ if cols is None:
432
+ cols = self.keys()
433
+
393
434
  if library == "pd":
394
435
  df = pd.DataFrame()
395
- if cols is None:
396
- cols = self.keys()
436
+
397
437
  for col in cols:
398
- column = self[col]
399
- if isinstance(column, (Array, VectorOfVectors)):
400
- tmp_ser = column.view_as("pd", with_units=with_units).rename(
401
- prefix + str(col)
402
- )
403
- df = pd.DataFrame(tmp_ser) if df.empty else df.join(tmp_ser)
404
- elif isinstance(column, Table):
405
- tmp_df = column.view_as(
438
+ data = self[col]
439
+
440
+ if isinstance(data, Table):
441
+ log.debug(f"viewing Table {col=!r} recursively")
442
+
443
+ tmp_df = data.view_as(
406
444
  "pd", with_units=with_units, prefix=f"{prefix}{col}_"
407
445
  )
408
- df = tmp_df if df.empty else df.join(tmp_df)
409
- elif df.empty:
410
- df[prefix + str(col)] = column.view_as("pd", with_units=with_units)
446
+ for k, v in tmp_df.items():
447
+ df[k] = v
448
+
411
449
  else:
412
- df[prefix + str(col)] = df.join(
413
- column.view_as("pd", with_units=with_units)
450
+ log.debug(
451
+ f"viewing {type(data).__name__} column {col!r} as Pandas Series"
414
452
  )
453
+ df[f"{prefix}{col}"] = data.view_as("pd", with_units=with_units)
454
+
415
455
  return df
416
456
 
417
457
  if library == "np":
418
- msg = f"Format {library} is not supported for Tables."
458
+ msg = f"Format {library!r} is not supported for Tables."
419
459
  raise TypeError(msg)
420
460
 
421
461
  if library == "ak":
@@ -423,7 +463,10 @@ class Table(Struct):
423
463
  msg = "Pint does not support Awkward yet, you must view the data with_units=False"
424
464
  raise ValueError(msg)
425
465
 
426
- return ak.Array(self)
466
+ # NOTE: passing the Table directly (which inherits from a dict)
467
+ # makes it somehow really slow. Not sure why, but this could be due
468
+ # to extra LGDO fields (like "attrs")
469
+ return ak.Array({col: self[col].view_as("ak") for col in cols})
427
470
 
428
- msg = f"{library} is not a supported third-party format."
471
+ msg = f"{library!r} is not a supported third-party format."
429
472
  raise TypeError(msg)