legend-pydataobj 1.6.2__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: legend_pydataobj
3
- Version: 1.6.2
3
+ Version: 1.7.0
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -1,6 +1,6 @@
1
1
  lgdo/__init__.py,sha256=nv9kORuX2FCA6rQLbH959E0fuGMfZvHb0H5uyrLr2WI,3046
2
- lgdo/_version.py,sha256=ay9A4GSmtr3NioHirRgXvWfXtjwRjzXIO_WPuFobCoI,411
3
- lgdo/cli.py,sha256=hHc0Cz4ZXEOwo55oIukHAhHUkw09ePr4m3sO7eUbWHA,8018
2
+ lgdo/_version.py,sha256=2fEqxujmrV2dsREie2BmOYFLu66FowyHtZT2AoLuIzU,411
3
+ lgdo/cli.py,sha256=vB1Oj6kZ5gWaY9HBPBRRRyiepp72hm3bFvQeUUWeMYg,8214
4
4
  lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
5
5
  lgdo/lh5_store.py,sha256=xHwzbKNueEtFwScxrgfvCo2_bWKS6j7ojrpeF9kQflc,8483
6
6
  lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
@@ -18,8 +18,8 @@ lgdo/lh5/datatype.py,sha256=VhPWeWv3FW8XM6ZOFOdTZOYK3_hRZ3i0fYsBOEOIF5U,1623
18
18
  lgdo/lh5/exceptions.py,sha256=QWStQD27Qrm4oYs5Z3UAIoq4y7X-f_Z6QWCBCH0DXwE,1006
19
19
  lgdo/lh5/iterator.py,sha256=eqH9a_ZjEhgqJUZbMj36jXK_1Xbx86450DVw7LHNB3Y,12369
20
20
  lgdo/lh5/store.py,sha256=sYX1harVGRyP0oq1LGq2qrFhorutkev9MOovwhzEWZ4,6670
21
- lgdo/lh5/tools.py,sha256=AH0RuaUEJ7Tfzb964KnuVdxCKrGqwNP8XDt2iq4829g,8386
22
- lgdo/lh5/utils.py,sha256=xSTv0obtsUxO38JpkPKd2FbzENlhuTvYR0aexFU1ZQc,6640
21
+ lgdo/lh5/tools.py,sha256=MqYvyag2Uo3RxO0jLphml6UfScLPI2a2MCz3kjkIwmU,9430
22
+ lgdo/lh5/utils.py,sha256=7QYhKd8MqpeFevLyWuasv63WfzEPx7Fd9wA-l3JjnIQ,6984
23
23
  lgdo/lh5/_serializers/__init__.py,sha256=7zvTmBdp-pqS0ium6cKKjEvcqIND-kBC7319G5wMq5Y,1213
24
24
  lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  lgdo/lh5/_serializers/read/array.py,sha256=DLguBkiVNXZuUk7LdalixA8uISx_SETVPXWk-26HYmk,933
@@ -30,7 +30,7 @@ lgdo/lh5/_serializers/read/scalar.py,sha256=YwvA6kyNUh6H0kh2L7bzfgLkA8Et2dQFjp2n
30
30
  lgdo/lh5/_serializers/read/utils.py,sha256=K_HDQ_H-vtbs_gEif1MTtFki4qh6lw-5HE7b-7_s-9Q,417
31
31
  lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=ED4gr2Sw5TZBeww2c2kLbqn9dWYBk1VTgHiR4-5E1Mc,6665
32
32
  lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- lgdo/lh5/_serializers/write/array.py,sha256=lpz0V7bvy2rWkUuSAgX3aTSUe-HLvLnmnIXZLPX_Ddw,2802
33
+ lgdo/lh5/_serializers/write/array.py,sha256=Gosg8rOCH_2dRMj_oNSWyXuoYXDjy0OK--GCYWswR4U,2803
34
34
  lgdo/lh5/_serializers/write/composite.py,sha256=f3b4YeOoUr8y1wA7zsKEFT5mIwX8SD0MYQ40unMRyQc,8460
35
35
  lgdo/lh5/_serializers/write/scalar.py,sha256=gkcF2WVBR3aQYl0EynbVUocx4y3r8tvPfQYQJjkPvP4,643
36
36
  lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=mZuC7NIb-IkmJ9wgn37TTvFTLLAFR71iivrY4yiSJZM,2912
@@ -41,14 +41,14 @@ lgdo/types/encoded.py,sha256=JW4U5ow7KLMzhKnmhdnxbC3SZJAs4bOEDZWKG4KY1uU,15293
41
41
  lgdo/types/fixedsizearray.py,sha256=7Fj4QS9ubaeEf2tM3HwjSs6AuG8hKSYaT6Hy7Y_VHdQ,1525
42
42
  lgdo/types/lgdo.py,sha256=UnJDi1emQYVgH_H29Vipfs4LelPopxG5pgZUu1eKOlw,2761
43
43
  lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
44
- lgdo/types/struct.py,sha256=rLtyPthut2wjwiOqwntPrYjjt3uRQbTLWWjiewfKm20,3979
45
- lgdo/types/table.py,sha256=-ldEt_sY-Q5GqcqrDjvCKJ5fZ79NDa9hY-gs4mYgNZQ,17153
46
- lgdo/types/vectorofvectors.py,sha256=9pKtPfvh0vQ-nmg68LUM8cMttoxTl0BG3QH5awQtSxo,24503
44
+ lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
45
+ lgdo/types/table.py,sha256=PYxHXRmuNZkz1UK6MzUVWGhEsRFf6t-xXGFFrXUP0EY,17936
46
+ lgdo/types/vectorofvectors.py,sha256=d_n0lK6rut3_DdPcmMro0ObJAIRhMQnYj0cqAEpckPc,24368
47
47
  lgdo/types/vovutils.py,sha256=7BWPP0BSj-92ifbCIUBcfqxG5-TS8uxujTyJJuDFI04,10302
48
48
  lgdo/types/waveformtable.py,sha256=f2tS4f1OEoYaTM5ldCX9zmw8iSISCT3t3wS1SrPdu_o,9901
49
- legend_pydataobj-1.6.2.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
50
- legend_pydataobj-1.6.2.dist-info/METADATA,sha256=XON5L-CQcu1HuiKff1CoE5QqBOQtr1bHXxYvErIYdFg,44353
51
- legend_pydataobj-1.6.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
52
- legend_pydataobj-1.6.2.dist-info/entry_points.txt,sha256=Uu5MTlppBZxB4QGlLv-oX8FqACWjAZDNii__TBDJwLQ,72
53
- legend_pydataobj-1.6.2.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
54
- legend_pydataobj-1.6.2.dist-info/RECORD,,
49
+ legend_pydataobj-1.7.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
50
+ legend_pydataobj-1.7.0.dist-info/METADATA,sha256=rcfERzQLFd45YcFRMTusx2x7i1LuU7faypmwka_83Ws,44353
51
+ legend_pydataobj-1.7.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
52
+ legend_pydataobj-1.7.0.dist-info/entry_points.txt,sha256=Uu5MTlppBZxB4QGlLv-oX8FqACWjAZDNii__TBDJwLQ,72
53
+ legend_pydataobj-1.7.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
54
+ legend_pydataobj-1.7.0.dist-info/RECORD,,
lgdo/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.6.2'
16
- __version_tuple__ = version_tuple = (1, 6, 2)
15
+ __version__ = version = '1.7.0'
16
+ __version_tuple__ = version_tuple = (1, 7, 0)
lgdo/cli.py CHANGED
@@ -52,6 +52,11 @@ def lh5ls(args=None):
52
52
  default=None,
53
53
  help="""Maximum tree depth of groups to print""",
54
54
  )
55
+ parser.add_argument(
56
+ "--detail",
57
+ action="store_true",
58
+ help="""Print details about datasets""",
59
+ )
55
60
 
56
61
  args = parser.parse_args(args)
57
62
 
@@ -66,7 +71,13 @@ def lh5ls(args=None):
66
71
  print(__version__) # noqa: T201
67
72
  sys.exit()
68
73
 
69
- lh5.show(args.lh5_file, args.lh5_group, attrs=args.attributes, depth=args.depth)
74
+ lh5.show(
75
+ args.lh5_file,
76
+ args.lh5_group,
77
+ attrs=args.attributes,
78
+ depth=args.depth,
79
+ detail=args.detail,
80
+ )
70
81
 
71
82
 
72
83
  def lh5concat(args=None):
@@ -10,7 +10,7 @@ from ...exceptions import LH5EncodeError
10
10
 
11
11
  log = logging.getLogger(__name__)
12
12
 
13
- DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "lzf"}
13
+ DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"}
14
14
 
15
15
 
16
16
  def _h5_write_array(
lgdo/lh5/tools.py CHANGED
@@ -87,6 +87,7 @@ def show(
87
87
  indent: str = "",
88
88
  header: bool = True,
89
89
  depth: int | None = None,
90
+ detail: bool = False,
90
91
  ) -> None:
91
92
  """Print a tree of LH5 file contents with LGDO datatype.
92
93
 
@@ -104,6 +105,8 @@ def show(
104
105
  print `lh5_group` at the top of the diagram.
105
106
  depth
106
107
  maximum tree depth of groups to print
108
+ detail
109
+ whether to print additional information about how the data is stored
107
110
 
108
111
  Examples
109
112
  --------
@@ -171,6 +174,26 @@ def show(
171
174
 
172
175
  print(f"{indent}{char} \033[1m{key}\033[0m · {dtype} {_attrs}") # noqa: T201
173
176
 
177
+ if detail and isinstance(val, h5py.Dataset):
178
+ char = "| "
179
+ if killme:
180
+ char = " "
181
+ toprint = f"{indent}{char}"
182
+ try:
183
+ toprint += f"\033[3mdtype\033[0m={val.dtype}"
184
+ toprint += f", \033[3mshape\033[0m={val.shape}"
185
+ toprint += f", \033[3mnbytes\033[0m={utils.fmtbytes(val.nbytes)}"
186
+ if (chunkshape := val.chunks) is None:
187
+ toprint += ", \033[3mnumchunks\033[0m=contiguous"
188
+ else:
189
+ toprint += f", \033[3mnumchunks\033[0m={val.id.get_num_chunks()}"
190
+ toprint += f", \033[3mchunkshape\033[0m={chunkshape}"
191
+ toprint += f", \033[3mcompression\033[0m={val.compression}"
192
+ except TypeError:
193
+ toprint += "(scalar)"
194
+
195
+ print(toprint) # noqa: T201
196
+
174
197
  # if it's a group, call this function recursively
175
198
  if isinstance(val, h5py.Group):
176
199
  show(
@@ -179,6 +202,7 @@ def show(
179
202
  header=False,
180
203
  attrs=attrs,
181
204
  depth=depth - 1 if depth else None,
205
+ detail=detail,
182
206
  )
183
207
 
184
208
  # break or move to next key
lgdo/lh5/utils.py CHANGED
@@ -221,3 +221,13 @@ def expand_path(
221
221
  return paths[0]
222
222
 
223
223
  return paths
224
+
225
+
226
+ # https://stackoverflow.com/a/1094933
227
+ def fmtbytes(num, suffix="B"):
228
+ """Returns formatted f-string for printing human-readable number of bytes."""
229
+ for unit in ("", "k", "M", "G", "T", "P", "E", "Z"):
230
+ if abs(num) < 1024.0:
231
+ return f"{num:3.1f} {unit}{suffix}"
232
+ num /= 1024.0
233
+ return f"{num:.1f} Y{suffix}"
lgdo/types/struct.py CHANGED
@@ -6,6 +6,7 @@ utilities.
6
6
  from __future__ import annotations
7
7
 
8
8
  import logging
9
+ from collections.abc import Mapping
9
10
  from typing import Any
10
11
 
11
12
  import numpy as np
@@ -24,8 +25,8 @@ class Struct(LGDO, dict):
24
25
 
25
26
  def __init__(
26
27
  self,
27
- obj_dict: dict[str, LGDO] | None = None,
28
- attrs: dict[str, Any] | None = None,
28
+ obj_dict: Mapping[str, LGDO] | None = None,
29
+ attrs: Mapping[str, Any] | None = None,
29
30
  ) -> None:
30
31
  """
31
32
  Parameters
@@ -37,8 +38,25 @@ class Struct(LGDO, dict):
37
38
  a set of user attributes to be carried along with this LGDO.
38
39
  """
39
40
  if obj_dict is not None:
40
- self.update(obj_dict)
41
-
41
+ for k, v in obj_dict.items():
42
+ # check if value is another mapping-like object
43
+ # initialize another struct (or derived class) in such a case
44
+ if not isinstance(v, LGDO) and isinstance(v, Mapping):
45
+ # NOTE: calling self.__new__() and then self.__init__() allows for polymorphism
46
+ # but is there a better way?
47
+ nested = self.__new__(type(self), v)
48
+ nested.__init__(v)
49
+ super().update({k: nested})
50
+ else:
51
+ # otherwise object must be an LGDO
52
+ if not isinstance(v, LGDO):
53
+ msg = f"value of '{k}' ({v!r}) is not an LGDO or a dictionary"
54
+ raise ValueError(msg)
55
+
56
+ # assign
57
+ super().update({k: v})
58
+
59
+ # call LGDO constructor to setup attributes
42
60
  super().__init__(attrs)
43
61
 
44
62
  def datatype_name(self) -> str:
lgdo/types/table.py CHANGED
@@ -42,9 +42,9 @@ class Table(Struct):
42
42
 
43
43
  def __init__(
44
44
  self,
45
+ col_dict: Mapping[str, LGDO] | pd.DataFrame | ak.Array | None = None,
45
46
  size: int | None = None,
46
- col_dict: dict[str, LGDO] | None = None,
47
- attrs: dict[str, Any] | None = None,
47
+ attrs: Mapping[str, Any] | None = None,
48
48
  ) -> None:
49
49
  r"""
50
50
  Parameters
@@ -56,11 +56,14 @@ class Table(Struct):
56
56
  determined from the length of the first array in `col_dict`. If
57
57
  neither is provided, a default length of 1024 is used.
58
58
  col_dict
59
- instantiate this table using the supplied named array-like LGDO's.
60
- Note 1: no copy is performed, the objects are used directly.
61
- Note 2: if `size` is not ``None``, all arrays will be resized to
62
- match it. Note 3: if the arrays have different lengths, all will
63
- be resized to match the length of the first array.
59
+ instantiate this table using the supplied mapping of column names
60
+ and array-like objects. Supported input types are: mapping of
61
+ strings to LGDOs, :class:`pd.DataFrame` and :class:`ak.Array`.
62
+ Note 1: no copy is performed, the objects are used directly (unless
63
+ :class:`ak.Array` is provided). Note 2: if `size` is not ``None``,
64
+ all arrays will be resized to match it. Note 3: if the arrays have
65
+ different lengths, all will be resized to match the length of the
66
+ first array.
64
67
  attrs
65
68
  A set of user attributes to be carried along with this LGDO.
66
69
 
@@ -68,14 +71,20 @@ class Table(Struct):
68
71
  -----
69
72
  the :attr:`loc` attribute is initialized to 0.
70
73
  """
74
+ if isinstance(col_dict, pd.DataFrame):
75
+ col_dict = {k: Array(v) for k, v in col_dict.items()}
76
+
77
+ if isinstance(col_dict, ak.Array):
78
+ col_dict = _ak_to_lgdo_or_col_dict(col_dict)
79
+
80
+ # call Struct constructor
71
81
  super().__init__(obj_dict=col_dict, attrs=attrs)
72
82
 
73
83
  # if col_dict is not empty, set size according to it
74
84
  # if size is also supplied, resize all fields to match it
75
85
  # otherwise, warn if the supplied fields have varying size
76
86
  if col_dict is not None and len(col_dict) > 0:
77
- do_warn = size is None
78
- self.resize(new_size=size, do_warn=do_warn)
87
+ self.resize(new_size=size, do_warn=(size is None))
79
88
 
80
89
  # if no col_dict, just set the size (default to 1024)
81
90
  else:
@@ -479,3 +488,11 @@ class Table(Struct):
479
488
 
480
489
  msg = f"{library!r} is not a supported third-party format."
481
490
  raise TypeError(msg)
491
+
492
+
493
+ def _ak_to_lgdo_or_col_dict(array: ak.Array):
494
+ if isinstance(array.type.content, ak.types.RecordType):
495
+ return {field: _ak_to_lgdo_or_col_dict(array[field]) for field in array.fields}
496
+ if isinstance(array.type.content, ak.types.NumpyType):
497
+ return Array(ak.to_numpy(array))
498
+ return VectorOfVectors(array)
@@ -302,26 +302,23 @@ class VectorOfVectors(LGDO):
302
302
  [3],
303
303
  ]
304
304
  """
305
- if self.ndim == 2:
306
- vidx = self.cumulative_length
307
- old_s = len(self)
308
- dlen = new_size - old_s
309
- csum = vidx[-1] if len(self) > 0 else 0
310
-
311
- # first resize the cumulative length
312
- self.cumulative_length.resize(new_size)
313
-
314
- # if new_size > size, new elements are filled with zeros, let's fix
315
- # that
316
- if dlen > 0:
317
- self.cumulative_length[old_s:] = csum
318
-
319
- # then resize the data array
320
- # if dlen > 0 this has no effect
321
- if len(self.cumulative_length) > 0:
322
- self.flattened_data.resize(self.cumulative_length[-1])
323
- else:
324
- raise NotImplementedError
305
+ vidx = self.cumulative_length
306
+ old_s = len(self)
307
+ dlen = new_size - old_s
308
+ csum = vidx[-1] if len(self) > 0 else 0
309
+
310
+ # first resize the cumulative length
311
+ self.cumulative_length.resize(new_size)
312
+
313
+ # if new_size > size, new elements are filled with zeros, let's fix
314
+ # that
315
+ if dlen > 0:
316
+ self.cumulative_length[old_s:] = csum
317
+
318
+ # then resize the data array
319
+ # if dlen > 0 this has no effect
320
+ if len(self.cumulative_length) > 0:
321
+ self.flattened_data.resize(self.cumulative_length[-1])
325
322
 
326
323
  def append(self, new: NDArray) -> None:
327
324
  """Append a 1D vector `new` at the end.