PyPI - legend-pydataobj - Versions diffs - 1.12.0a4__py3-none-any.whl → 1.14.0__py3-none-any.whl - Mend

legend-pydataobj 1.12.0a4py3-none-any.whl → 1.14.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/METADATA +1 -1
{legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/RECORD +26 -25
{legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/WHEEL +1 -1
lgdo/_version.py +2 -2
lgdo/compression/radware.py +3 -3
lgdo/compression/varlen.py +2 -2
lgdo/lh5/__init__.py +2 -2
lgdo/lh5/_serializers/read/ndarray.py +5 -5
lgdo/lh5/_serializers/write/array.py +3 -4
lgdo/lh5/_serializers/write/composite.py +94 -14
lgdo/lh5/core.py +19 -11
lgdo/lh5/datatype.py +5 -1
lgdo/lh5/iterator.py +56 -43
lgdo/lh5/settings.py +34 -0
lgdo/lh5/store.py +14 -8
lgdo/lh5/tools.py +7 -6
lgdo/lh5/utils.py +18 -10
lgdo/types/array.py +5 -2
lgdo/types/encoded.py +6 -0
lgdo/types/scalar.py +3 -0
lgdo/types/struct.py +52 -2
lgdo/types/table.py +11 -6
lgdo/types/vectorofvectors.py +3 -0
{legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/entry_points.txt +0 -0
{legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/licenses/LICENSE +0 -0
{legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/top_level.txt +0 -0

{legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: legend_pydataobj
-Version: 1.12.0a4
+Version: 1.14.0
 Summary: LEGEND Python Data Objects
 Author: The LEGEND Collaboration
 Maintainer: The LEGEND Collaboration

{legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-legend_pydataobj-1.12.0a4.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+legend_pydataobj-1.14.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
 lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
-lgdo/_version.py,sha256=lVyzAOse2pIwNX9sD_s_ucUhU5oPCN_lFpntrC7eKG8,521
+lgdo/_version.py,sha256=zEosD-3Sqrti57GKf-4yC-NurX2Smyv5d6IDkQisUBo,513
 lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
 lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
 lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
@@ -9,47 +9,48 @@ lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
 lgdo/compression/__init__.py,sha256=xHt_8Th0LxxNwj9iYHf5uGNTm3A_4qyW7zEVdAX3NwI,1127
 lgdo/compression/base.py,sha256=82cQJujfvoAOKBFx761dEcx_xM02TBCBBuBo6i78tuI,838
 lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2515
-lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
+lgdo/compression/radware.py,sha256=-W7LgvkSVzdVJ6qNn7Ts3O9EcRcl8mUiApTLqR4dtIo,23836
 lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
-lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
-lgdo/lh5/__init__.py,sha256=UTzKGmpgFoHwVB_yNULvJsHD_uQQGl-R87l-3QBkh7w,773
+lgdo/compression/varlen.py,sha256=bjyxhHzfpi6PIPy-Uc47W8_LrRbFoJLJ2kVeD5nhyqo,15125
+lgdo/lh5/__init__.py,sha256=smHTawINIiogHNfYJq3aPvtxleTnBMdPADRCdc1wea8,748
 lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
-lgdo/lh5/core.py,sha256=nULH5UoRjUCH0E3Z0-OH_DbFz2PRAQP73Qaf1kfnyPE,13481
-lgdo/lh5/datatype.py,sha256=O_7BqOlX8PFMyG0ppkfUT5aps5HEqX0bpuKcJO3jhu0,1691
+lgdo/lh5/core.py,sha256=tbvitu3Pr-FCF4nOopVxGVOobDhGaVWo4o0HS58TGtY,13806
+lgdo/lh5/datatype.py,sha256=ry3twFaosuBoskiTKqtBYRMk9PQAf403593xKaItfog,1827
 lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
-lgdo/lh5/iterator.py,sha256=1ob9B7Bf3ioGCtZkUZoL6ibTxAwLf4ld8_33ghVVEa4,20498
-lgdo/lh5/store.py,sha256=qkBm3gPbr1R2UlQpUuDR5sGRMzpYJBWFL8fDIry6tmQ,8474
-lgdo/lh5/tools.py,sha256=drtJWHY82wCFuFr6LVVnm2AQgs_wZuFmAvyOB4tcOHs,6431
-lgdo/lh5/utils.py,sha256=f2H7H1D-RfDN3g_YrVDQEPaHevn5yDJFA-uznK9cgx8,6336
+lgdo/lh5/iterator.py,sha256=vuN98pa-xHDWXM2GMxvMxFEJGfHatMX6ajqnaP55PuY,20680
+lgdo/lh5/settings.py,sha256=cmPd6ZvneAF5sFMA1qf-9g_YSSygJcQSRmZDp1_sBEU,1001
+lgdo/lh5/store.py,sha256=HJuDjWQ8ztrKDoyWW3cplhtWDnz3J4a-Fe2WF4fzOY4,8676
+lgdo/lh5/tools.py,sha256=EZTCj3TMMp4Rnocq1F0QeO1yYHzx4yMR7l_Em4G7sC4,6503
+lgdo/lh5/utils.py,sha256=hxPoaG25MOhuu7emrw2xzx3zerl-GzeMWdlfoQmLiYo,6667
 lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
 lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
 lgdo/lh5/_serializers/read/composite.py,sha256=UvkZHEhf0V7SFLxzF52eyP68hU0guGOLqosrfmIfeys,11729
 lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
-lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
+lgdo/lh5/_serializers/read/ndarray.py,sha256=cxzZ7esT5BzxyoXfITBG_EDTtCVxSeSu6dVZrohOdOY,3685
 lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
 lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNxQVJXd0,7581
 lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
 lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lgdo/lh5/_serializers/write/array.py,sha256=66DKnW2yqIBlUGNBPWcE-m4W0B2-nTKusDHGX9m6GY0,3223
-lgdo/lh5/_serializers/write/composite.py,sha256=JYoLT9intT_Y4xPeL_l7CSd22O0ZKyEmd0flKkWWPFA,9268
+lgdo/lh5/_serializers/write/array.py,sha256=qzRNPQ4mtvc7HYPE3vUcM6bi7lWYnolNStdJVcDfzPU,3174
+lgdo/lh5/_serializers/write/composite.py,sha256=sZfV8aGZCH0mvMZ2dGDKt-MoepgL4PlR9ZWbT_JNIjQ,12171
 lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
 lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
 lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
-lgdo/types/array.py,sha256=vxViJScqKw4zGUrrIOuuU_9Y0oTfOkEEhs0TOyUYjwI,9284
+lgdo/types/array.py,sha256=TpZINHgGIptslwr5mwKYWU_PrYAk8bH1ECJ4XfLkWxg,9338
 lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
-lgdo/types/encoded.py,sha256=_e8u_BPfpjJbLnEdyTo9QG3kbNsGj0BN4gjdj3L1ndw,15640
+lgdo/types/encoded.py,sha256=8DJHb3kxz6RrmjkeLWS6iyjvIJqx86mDInWqqjpMON0,15752
 lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
 lgdo/types/histogram.py,sha256=Jz1lLH56BfYnmcUhxUHK1h2wLDQ0Abgyd-6LznU-3-k,19979
 lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
-lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
-lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
-lgdo/types/table.py,sha256=FkWesoEA9bmGGSW8Ewig1Zs77ffUoR_nggfYSmkWpjU,20079
-lgdo/types/vectorofvectors.py,sha256=GbAKV_ehXN4XdWSwnmKS_ErCiudRetcH_3wo7iDrVjw,26854
+lgdo/types/scalar.py,sha256=nBPiqX4g3GrPavEbG6nCt2Jel7Mj0IchXqwxB6ei_rg,1989
+lgdo/types/struct.py,sha256=m3pYfGfKptV8ti3wb4n1nsPKMvhjdWCFoRdR5YooZBM,6353
+lgdo/types/table.py,sha256=huhgpzdAUx0bRaEaitwnb-Ve7oAu5B6zxPK5EXPUfg0,20233
+lgdo/types/vectorofvectors.py,sha256=k1LwNnX3TcRAhOujj85kNkfZN0MXZYL9aaMUbr82JlE,26910
 lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
 lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
-legend_pydataobj-1.12.0a4.dist-info/METADATA,sha256=eD1QW8NEKGSWEqxSes1-TFnq1VHoxtdLmLfafsB53nI,44445
-legend_pydataobj-1.12.0a4.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
-legend_pydataobj-1.12.0a4.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
-legend_pydataobj-1.12.0a4.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
-legend_pydataobj-1.12.0a4.dist-info/RECORD,,
+legend_pydataobj-1.14.0.dist-info/METADATA,sha256=JaH2muAaB5Otjd9XhqiFfrgqtf9mR6F4XbIBPlZmB0g,44443
+legend_pydataobj-1.14.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+legend_pydataobj-1.14.0.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
+legend_pydataobj-1.14.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
+legend_pydataobj-1.14.0.dist-info/RECORD,,

{legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.0.0)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

lgdo/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.12.0a4'
-__version_tuple__ = version_tuple = (1, 12, 0, 'a4')
+__version__ = version = '1.14.0'
+__version_tuple__ = version_tuple = (1, 14, 0)

lgdo/compression/radware.py CHANGED Viewed

@@ -95,13 +95,13 @@ def encode(
     if isinstance(sig_in, np.ndarray):
         s = sig_in.shape
         if len(sig_in) == 0:
-            return np.empty(s[:-1] + (0,), dtype=ubyte), np.empty(0, dtype=uint32)
+            return np.empty((*s[:-1], 0), dtype=ubyte), np.empty(0, dtype=uint32)
         if sig_out is None:
             # the encoded signal is an array of bytes
             # -> twice as long as a uint16
             # pre-allocate ubyte (uint8) array, expand last dimension
-            sig_out = np.empty(s[:-1] + (s[-1] * 2,), dtype=ubyte)
+            sig_out = np.empty((*s[:-1], s[-1] * 2), dtype=ubyte)
         if sig_out.dtype != ubyte:
             msg = "sig_out must be of type ubyte"
@@ -226,7 +226,7 @@ def decode(
             # allocate output array with lasd dim as large as the longest
             # uncompressed wf
             maxs = np.max(_get_hton_u16(sig_in[0], 0))
-            sig_out = np.empty(s[:-1] + (maxs,), dtype=int32)
+            sig_out = np.empty((*s[:-1], maxs), dtype=int32)
         # siglen has one dimension less (the last)
         siglen = np.empty(s[:-1], dtype=uint32)

lgdo/compression/varlen.py CHANGED Viewed

@@ -74,14 +74,14 @@ def encode(
     if isinstance(sig_in, np.ndarray):
         s = sig_in.shape
         if len(sig_in) == 0:
-            return np.empty(s[:-1] + (0,), dtype=ubyte), np.empty(0, dtype=uint32)
+            return np.empty((*s[:-1], 0), dtype=ubyte), np.empty(0, dtype=uint32)
         if sig_out is None:
             # the encoded signal is an array of bytes
             # pre-allocate ubyte (uint8) array with a generous (but safe) size
             max_b = int(np.ceil(np.iinfo(sig_in.dtype).bits / 16) * 5)
             # expand last dimension
-            sig_out = np.empty(s[:-1] + (s[-1] * max_b,), dtype=ubyte)
+            sig_out = np.empty((*s[:-1], s[-1] * max_b), dtype=ubyte)
         if sig_out.dtype != ubyte:
             msg = "sig_out must be of type ubyte"

lgdo/lh5/__init__.py CHANGED Viewed

@@ -7,7 +7,6 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
 from __future__ import annotations
-from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
 from .core import read, read_as, write
 from .iterator import LH5Iterator
 from .store import LH5Store
@@ -15,14 +14,15 @@ from .tools import ls, show
 from .utils import read_n_rows
 __all__ = [
-    "DEFAULT_HDF5_SETTINGS",
     "LH5Iterator",
     "LH5Store",
     "concat",
+    "default_hdf5_settings",
     "ls",
     "read",
     "read_as",
     "read_n_rows",
+    "reset_default_hdf5_settings",
     "show",
     "write",
 ]

lgdo/lh5/_serializers/read/ndarray.py CHANGED Viewed

@@ -57,7 +57,7 @@ def _h5_read_ndarray(
             (start_row,) + (0,) * (h5d.rank - 1),
             (1,) * h5d.rank,
             None,
-            (n_rows_to_read,) + fspace.shape[1:],
+            (n_rows_to_read, *fspace.shape[1:]),
         )
     elif use_h5idx:
         # Note that h5s will automatically merge adjacent elements into a range
@@ -67,7 +67,7 @@ def _h5_read_ndarray(
                 (i,) + (0,) * (h5d.rank - 1),
                 (1,) * h5d.rank,
                 None,
-                (1,) + fspace.shape[1:],
+                (1, *fspace.shape[1:]),
                 h5py.h5s.SELECT_OR,
             )
@@ -84,7 +84,7 @@ def _h5_read_ndarray(
                 (obj_buf_start,) + (0,) * (h5d.rank - 1),
                 (1,) * h5d.rank,
                 None,
-                (n_rows_to_read,) + fspace.shape[1:],
+                (n_rows_to_read, *fspace.shape[1:]),
             )
             h5d.read(mspace, fspace, obj_buf.nda)
         else:
@@ -93,10 +93,10 @@ def _h5_read_ndarray(
             obj_buf.nda[dest_sel, ...] = tmp[idx, ...]
         nda = obj_buf.nda
     elif n_rows == 0:
-        tmp_shape = (0,) + h5d.shape[1:]
+        tmp_shape = (0, *h5d.shape[1:])
         nda = np.empty(tmp_shape, h5d.dtype)
     else:
-        mspace = h5py.h5s.create_simple((n_rows_to_read,) + fspace.shape[1:])
+        mspace = h5py.h5s.create_simple((n_rows_to_read, *fspace.shape[1:]))
         nda = np.empty(mspace.shape, h5d.dtype)
         if idx is None or use_h5idx:
             h5d.read(mspace, fspace, nda)

lgdo/lh5/_serializers/write/array.py CHANGED Viewed

@@ -6,12 +6,11 @@ import h5py
 import numpy as np
 from .... import types
+from ... import settings
 from ...exceptions import LH5EncodeError
 log = logging.getLogger(__name__)
-DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"}
 def _h5_write_array(
     obj,
@@ -41,7 +40,7 @@ def _h5_write_array(
         # this is needed in order to have a resizable (in the first
         # axis) data set, i.e. rows can be appended later
         # NOTE: this automatically turns chunking on!
-        maxshape = (None,) + nda.shape[1:]
+        maxshape = (None, *nda.shape[1:])
         h5py_kwargs.setdefault("maxshape", maxshape)
         if wo_mode == "o" and name in group:
@@ -49,7 +48,7 @@ def _h5_write_array(
             del group[name]
         # set default compression options
-        for k, v in DEFAULT_HDF5_SETTINGS.items():
+        for k, v in settings.DEFAULT_HDF5_SETTINGS.items():
             h5py_kwargs.setdefault(k, v)
         # compress using the 'compression' LGDO attribute, if available

lgdo/lh5/_serializers/write/composite.py CHANGED Viewed

@@ -64,6 +64,59 @@ def _h5_write_lgdo(
     group = utils.get_h5_group(group, lh5_file)
+    # name already in file
+    if name in group or (
+        ("datatype" in group.attrs or group == "/")
+        and (len(name) <= 2 or "/" not in name[1:-1])
+    ):
+        pass
+    # group is in file but not struct or need to create nesting
+    else:
+        # check if name is nested
+        # if name is nested, iterate up from parent
+        # otherwise we just need to iterate the group
+        if len(name) > 2 and "/" in name[1:-1]:
+            group = utils.get_h5_group(
+                name[:-1].rsplit("/", 1)[0],
+                group,
+            )
+            curr_name = (
+                name.rsplit("/", 1)[1]
+                if name[-1] != "/"
+                else name[:-1].rsplit("/", 1)[1]
+            )
+        else:
+            curr_name = name
+        # initialize the object to be written
+        obj = types.Struct({curr_name.replace("/", ""): obj})
+        # if base group already has a child we just append
+        if len(group) >= 1:
+            wo_mode = "ac"
+        else:
+            # iterate up the group hierarchy until we reach the root or a group with more than one child
+            while group.name != "/":
+                if len(group) > 1:
+                    break
+                curr_name = group.name
+                group = group.parent
+                if group.name != "/":
+                    obj = types.Struct({curr_name[len(group.name) + 1 :]: obj})
+                else:
+                    obj = types.Struct({curr_name[1:]: obj})
+            # if the group has more than one child, we need to append else we can overwrite
+            wo_mode = "ac" if len(group) > 1 else "o"
+        # set the new name
+        if group.name == "/":
+            name = "/"
+        elif group.parent.name == "/":
+            name = group.name[1:]
+        else:
+            name = group.name[len(group.parent.name) + 1 :]
+        # get the new group
+        group = utils.get_h5_group(group.parent if group.name != "/" else "/", lh5_file)
     if wo_mode == "w" and name in group:
         msg = f"can't overwrite '{name}' in wo_mode 'write_safe'"
         raise LH5EncodeError(msg, lh5_file, group, name)
@@ -87,7 +140,7 @@ def _h5_write_lgdo(
             lh5_file,
             group=group,
             start_row=start_row,
-            n_rows=n_rows,
+            n_rows=n_rows,  # if isinstance(obj, types.Table | types.Histogram) else None,
             wo_mode=wo_mode,
             write_start=write_start,
             **h5py_kwargs,
@@ -186,19 +239,31 @@ def _h5_write_struct(
     write_start=0,
     **h5py_kwargs,
 ):
+    # this works for structs and derived (tables)
     assert isinstance(obj, types.Struct)
     # In order to append a column, we need to update the
-    # `table{old_fields}` value in `group.attrs['datatype"]` to include
-    # the new fields.  One way to do this is to override
-    # `obj.attrs["datatype"]` to include old and new fields. Then we
-    # can write the fields to the table as normal.
+    # `struct/table{old_fields}` value in `group.attrs['datatype"]` to include
+    # the new fields. One way to do this is to override `obj.attrs["datatype"]`
+    # to include old and new fields. Then we can write the fields to the
+    # struct/table as normal.
     if wo_mode == "ac":
+        if name not in group:
+            msg = "Cannot append column to non-existing struct on disk"
+            raise LH5EncodeError(msg, lh5_file, group, name)
         old_group = utils.get_h5_group(name, group)
+        if "datatype" not in old_group.attrs:
+            msg = "Cannot append column to an existing  non-LGDO object on disk"
+            raise LH5EncodeError(msg, lh5_file, group, name)
         lgdotype = datatype.datatype(old_group.attrs["datatype"])
         fields = datatype.get_struct_fields(old_group.attrs["datatype"])
-        if not issubclass(lgdotype, types.Struct):
-            msg = f"Trying to append columns to an object of type {lgdotype.__name__}"
+        if lgdotype is not type(obj):
+            msg = (
+                "Trying to append columns to an object of different "
+                f"type {lgdotype.__name__}!={type(obj)}"
+            )
             raise LH5EncodeError(msg, lh5_file, group, name)
         # If the mode is `append_column`, make sure we aren't appending
@@ -211,8 +276,14 @@ def _h5_write_struct(
                 "column(s) to a table with the same field(s)"
             )
             raise LH5EncodeError(msg, lh5_file, group, name)
         # It doesn't matter what key we access, as all fields in the old table have the same size
-        if old_group[next(iter(old_group.keys()))].size != obj.size:
+        if (
+            isinstance(obj, types.Table)
+            and old_group.attrs["datatype"][:6]
+            != "struct"  # structs dont care about size
+            and old_group[next(iter(old_group.keys()))].size != obj.size
+        ):
             msg = (
                 f"Table sizes don't match. Trying to append column of size {obj.size} "
                 f"to a table of size {old_group[next(iter(old_group.keys()))].size}."
@@ -222,16 +293,27 @@ def _h5_write_struct(
         # Now we can append the obj.keys() to the old fields, and then update obj.attrs.
         fields.extend(list(obj.keys()))
         obj.attrs.pop("datatype")
-        obj.attrs["datatype"] = "table" + "{" + ",".join(fields) + "}"
+        obj.attrs["datatype"] = (
+            obj.datatype_name() + "{" + ",".join(sorted(fields)) + "}"
+        )
+        # propagating wo_mode="ac" to nested LGDOs does not make any sense
+        wo_mode = "append"
+        # overwrite attributes of the existing struct
+        attrs_overwrite = True
+    else:
+        attrs_overwrite = wo_mode == "o"
     group = utils.get_h5_group(
         name,
         group,
         grp_attrs=obj.attrs,
-        overwrite=(wo_mode in ["o", "ac"]),
+        overwrite=attrs_overwrite,
     )
     # If the mode is overwrite, then we need to peek into the file's
-    # table's existing fields.  If we are writing a new table to the
+    # table's existing fields. If we are writing a new table to the
     # group that does not contain an old field, we should delete that
     # old field from the file
     if wo_mode == "o":
@@ -260,11 +342,9 @@ def _h5_write_struct(
         else:
             obj_fld = obj[field]
-        # Convert keys to string for dataset names
-        f = str(field)
         _h5_write_lgdo(
             obj_fld,
-            f,
+            str(field),
             lh5_file,
             group=group,
             start_row=start_row,

lgdo/lh5/core.py CHANGED Viewed

@@ -5,6 +5,7 @@ import inspect
 import sys
 from collections.abc import Mapping, Sequence
 from contextlib import suppress
+from pathlib import Path
 from typing import Any
 import h5py
@@ -18,7 +19,7 @@ from .utils import read_n_rows
 def read(
     name: str,
-    lh5_file: str | h5py.File | Sequence[str | h5py.File],
+    lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
     start_row: int = 0,
     n_rows: int = sys.maxsize,
     idx: ArrayLike = None,
@@ -111,8 +112,8 @@ def read(
     """
     if isinstance(lh5_file, h5py.File):
         lh5_obj = lh5_file[name]
-    elif isinstance(lh5_file, str):
-        lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
+    elif isinstance(lh5_file, (str, Path)):
+        lh5_file = h5py.File(str(Path(lh5_file)), mode="r", locking=locking)
         try:
             lh5_obj = lh5_file[name]
         except KeyError as ke:
@@ -194,7 +195,7 @@ def read(
 def write(
     obj: types.LGDO,
     name: str,
-    lh5_file: str | h5py.File,
+    lh5_file: str | Path | h5py.File,
     group: str | h5py.Group = "/",
     start_row: int = 0,
     n_rows: int | None = None,
@@ -268,11 +269,13 @@ def write(
           end of array is the same as ``append``.
         - ``overwrite_file`` or ``of``: delete file if present prior to
           writing to it. `write_start` should be 0 (its ignored).
-        - ``append_column`` or ``ac``: append columns from an
-          :class:`~.lgdo.table.Table` `obj` only if there is an existing
-          :class:`~.lgdo.table.Table` in the `lh5_file` with the same
-          `name` and :class:`~.lgdo.table.Table.size`. If the sizes don't
-          match, or if there are matching fields, it errors out.
+        - ``append_column`` or ``ac``: append fields/columns from an
+          :class:`~.lgdo.struct.Struct` `obj` (and derived types such as
+          :class:`~.lgdo.table.Table`) only if there is an existing
+          :class:`~.lgdo.struct.Struct` in the `lh5_file` with the same `name`.
+          If there are matching fields, it errors out. If appending to a
+          ``Table`` and the size of the new column is different from the size
+          of the existing table, it errors out.
     write_start
         row in the output file (if already existing) to start overwriting
         from.
@@ -288,7 +291,12 @@ def write(
         datasets. **Note: `compression` Ignored if compression is specified
         as an `obj` attribute.**
     """
-    if wo_mode in ("w", "write", "of", "overwrite_file"):
+    if (
+        isinstance(lh5_file, str)
+        and not Path(lh5_file).is_file()
+        and wo_mode in ("w", "write_safe", "of", "overwrite_file")
+    ):
         h5py_kwargs.update(
             {
                 "fs_strategy": "page",
@@ -310,7 +318,7 @@ def write(
 def read_as(
     name: str,
-    lh5_file: str | h5py.File | Sequence[str | h5py.File],
+    lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
     library: str,
     **kwargs,
 ) -> Any:

lgdo/lh5/datatype.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import re
 from collections import OrderedDict
+from itertools import permutations as perm
 from .. import types as lgdo
@@ -14,7 +15,10 @@ _lgdo_datatype_map: dict[str, lgdo.LGDO] = OrderedDict(
             lgdo.ArrayOfEncodedEqualSizedArrays,
             r"^array_of_encoded_equalsized_arrays<1,1>\{.+\}$",
         ),
-        (lgdo.Histogram, r"^struct\{binning,weights,isdensity\}$"),
+        (
+            lgdo.Histogram,
+            rf"^struct\{{(?:{'|'.join([','.join(p) for p in perm(['binning', 'weights', 'isdensity'])])})\}}$",
+        ),
         (lgdo.Struct, r"^struct\{.*\}$"),
         (lgdo.Table, r"^table\{.*\}$"),
         (lgdo.FixedSizeArray, r"^fixedsize_array<\d+>\{.+\}$"),

lgdo/lh5/iterator.py CHANGED Viewed

@@ -17,54 +17,50 @@ LGDO = typing.Union[Array, Scalar, Struct, VectorOfVectors]
 class LH5Iterator(typing.Iterator):
-    """
-    A class for iterating through one or more LH5 files, one block of entries
-    at a time. This also accepts an entry list/mask to enable event selection,
-    and a field mask.
-    This can be used as an iterator:
-    >>> for lh5_obj in LH5Iterator(...):
-    >>>    # do the thing!
-    This is intended for if you are reading a large quantity of data. This
-    will ensure that you traverse files efficiently to minimize caching time
-    and will limit your memory usage (particularly when reading in waveforms!).
-    The ``lh5_obj`` that is read by this class is reused in order to avoid
-    reallocation of memory; this means that if you want to hold on to data
-    between reads, you will have to copy it somewhere!
-    When defining an LH5Iterator, you must give it a list of files and the
-    hdf5 groups containing the data tables you are reading. You may also
-    provide a field mask, and an entry list or mask, specifying which entries
-    to read from the files. You may also pair it with a friend iterator, which
-    contains a parallel group of files which will be simultaneously read.
-    In addition to accessing requested data via ``lh5_obj``, several
-    properties exist to tell you where that data came from:
-    - lh5_it.current_i_entry: get the index within the entry list of the
-      first entry that is currently read
-    - lh5_it.current_local_entries: get the entry numbers relative to the
-      file the data came from
-    - lh5_it.current_global_entries: get the entry number relative to the
-      full dataset
-    - lh5_it.current_files: get the file name corresponding to each entry
-    - lh5_it.current_groups: get the group name corresponding to each entry
-    This class can also be used for random access:
-    >>> lh5_obj = lh5_it.read(i_entry)
-    to read the block of entries starting at i_entry. In case of multiple files
-    or the use of an event selection, i_entry refers to a global event index
-    across files and does not count events that are excluded by the selection.
+    """Iterate over chunks of entries from LH5 files.
+    The iterator reads ``buffer_len`` entries at a time from one or more
+    files.  The LGDO instance returned at each iteration is reused to avoid
+    reallocations, so copy the data if it should be preserved.
+    Examples
+    --------
+    Iterate through a table one chunk at a time::
+        from lgdo.lh5 import LH5Iterator
+        for table in LH5Iterator("data.lh5", "geds/raw/energy", buffer_len=100):
+            process(table)
+    ``LH5Iterator`` can also be used for random access::
+        it = LH5Iterator(files, groups)
+        table = it.read(i_entry)
+    In case of multiple files or an entry selection, ``i_entry`` refers to the
+    global event index across all files.
+    When instantiating an iterator you must provide a list of files and the
+    HDF5 groups to read.  Optional parameters allow field masking, event
+    selection and pairing the iterator with a "friend" iterator that is read in
+    parallel.  Several properties are available to obtain the provenance of the
+    data currently loaded:
+    - ``current_i_entry`` -- index within the entry list of the first entry in
+      the buffer
+    - ``current_local_entries`` -- entry numbers relative to the file the data
+      came from
+    - ``current_global_entries`` -- entry number relative to the full dataset
+    - ``current_files`` -- file name corresponding to each entry in the buffer
+    - ``current_groups`` -- group name corresponding to each entry in the
+      buffer
     """
     def __init__(
         self,
         lh5_files: str | list[str],
         groups: str | list[str] | list[list[str]],
+        *,
         base_path: str = "",
         entry_list: list[int] | list[list[int]] | None = None,
         entry_mask: list[bool] | list[list[bool]] | None = None,
@@ -75,6 +71,7 @@ class LH5Iterator(typing.Iterator):
         file_cache: int = 10,
         file_map: NDArray[int] = None,
         friend: typing.Iterator | None = None,
+        h5py_open_mode: str = "r",
     ) -> None:
         """
         Parameters
@@ -115,9 +112,21 @@ class LH5Iterator(typing.Iterator):
             The friend should have the same length and entry list. A single
             LH5 table containing columns from both iterators will be returned.
             Note that buffer_len will be set to the minimum of the two.
+        h5py_open_mode
+            file open mode used when acquiring file handles. ``r`` (default)
+            opens files read-only while ``a`` allow opening files for
+            write-appending as well.
         """
         self.lh5_st = LH5Store(base_path=base_path, keep_open=file_cache)
+        if h5py_open_mode == "read":
+            h5py_open_mode = "r"
+        if h5py_open_mode == "append":
+            h5py_open_mode = "a"
+        if h5py_open_mode not in ["r", "a"]:
+            msg = f"unknown h5py_open_mode '{h5py_open_mode}'"
+            raise ValueError(msg)
         # List of files, with wildcards and env vars expanded
         if isinstance(lh5_files, str):
             lh5_files = [lh5_files]
@@ -152,6 +161,10 @@ class LH5Iterator(typing.Iterator):
                 self.lh5_files += [f_exp] * len(g)
                 self.groups += list(g)
+        # open files in the requested mode so they are writable if needed
+        for f in set(self.lh5_files):
+            self.lh5_st.gimme_file(f, mode=h5py_open_mode)
         if entry_list is not None and entry_mask is not None:
             msg = "entry_list and entry_mask arguments are mutually exclusive"
             raise ValueError(msg)
@@ -505,7 +518,7 @@ class LH5Iterator(typing.Iterator):
         return self
     def __next__(self) -> tuple[LGDO, int, int]:
-        """Read next buffer_len entries and return lh5_table and iterator entry."""
+        """Read the next chunk of entries and return the buffer."""
         n_entries = self.n_entries
         if n_entries is not None:
             n_entries = min(

lgdo/lh5/settings.py ADDED Viewed

@@ -0,0 +1,34 @@
+from __future__ import annotations
+from typing import Any
+def default_hdf5_settings() -> dict[str, Any]:
+    """Returns the HDF5 settings for writing data to disk to the pydataobj defaults.
+    Examples
+    --------
+    >>> from lgdo import lh5
+    >>> lh5.DEFAULT_HDF5_SETTINGS["compression"] = "lzf"
+    >>> lh5.write(data, "data", "file.lh5")  # compressed with LZF
+    >>> lh5.DEFAULT_HDF5_SETTINGS = lh5.default_hdf5_settings()
+    >>> lh5.write(data, "data", "file.lh5", "of")  # compressed with default settings (GZIP)
+    """
+    return {
+        "shuffle": True,
+        "compression": "gzip",
+    }
+DEFAULT_HDF5_SETTINGS: dict[str, ...] = default_hdf5_settings()
+"""Global dictionary storing the default HDF5 settings for writing data to disk.
+Modify this global variable before writing data to disk with this package.
+Examples
+--------
+>>> from lgdo import lh5
+>>> lh5.DEFAULT_HDF5_SETTINGS["compression"] = "lzf"
+>>> lh5.write(data, "data", "file.lh5")  # compressed with LZF
+"""

lgdo/lh5/store.py CHANGED Viewed

@@ -38,7 +38,10 @@ class LH5Store:
     """
     def __init__(
-        self, base_path: str = "", keep_open: bool = False, locking: bool = False
+        self,
+        base_path: str | Path = "",
+        keep_open: bool = False,
+        locking: bool = False,
     ) -> None:
         """
         Parameters
@@ -52,6 +55,7 @@ class LH5Store:
         locking
             whether to lock files when reading
         """
+        base_path = str(Path(base_path)) if base_path != "" else ""
         self.base_path = "" if base_path == "" else utils.expand_path(base_path)
         self.keep_open = keep_open
         self.locking = locking
@@ -59,7 +63,7 @@ class LH5Store:
     def gimme_file(
         self,
-        lh5_file: str | h5py.File,
+        lh5_file: str | Path | h5py.File,
         mode: str = "r",
         page_buffer: int = 0,
         **file_kwargs,
@@ -83,6 +87,8 @@ class LH5Store:
         if isinstance(lh5_file, h5py.File):
             return lh5_file
+        lh5_file = str(Path(lh5_file))
         if mode == "r":
             lh5_file = utils.expand_path(lh5_file, base_path=self.base_path)
             file_kwargs["locking"] = self.locking
@@ -147,7 +153,7 @@ class LH5Store:
     def get_buffer(
         self,
         name: str,
-        lh5_file: str | h5py.File | Sequence[str | h5py.File],
+        lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
         size: int | None = None,
         field_mask: Mapping[str, bool] | Sequence[str] | None = None,
     ) -> types.LGDO:
@@ -162,7 +168,7 @@ class LH5Store:
     def read(
         self,
         name: str,
-        lh5_file: str | h5py.File | Sequence[str | h5py.File],
+        lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
         start_row: int = 0,
         n_rows: int = sys.maxsize,
         idx: ArrayLike = None,
@@ -180,7 +186,7 @@ class LH5Store:
         .lh5.core.read
         """
         # grab files from store
-        if isinstance(lh5_file, (str, h5py.File)):
+        if isinstance(lh5_file, (str, Path, h5py.File)):
             h5f = self.gimme_file(lh5_file, "r", **file_kwargs)
         else:
             h5f = [self.gimme_file(f, "r", **file_kwargs) for f in lh5_file]
@@ -201,7 +207,7 @@ class LH5Store:
         self,
         obj: types.LGDO,
         name: str,
-        lh5_file: str | h5py.File,
+        lh5_file: str | Path | h5py.File,
         group: str | h5py.Group = "/",
         start_row: int = 0,
         n_rows: int | None = None,
@@ -256,14 +262,14 @@ class LH5Store:
             **h5py_kwargs,
         )
-    def read_n_rows(self, name: str, lh5_file: str | h5py.File) -> int | None:
+    def read_n_rows(self, name: str, lh5_file: str | Path | h5py.File) -> int | None:
         """Look up the number of rows in an Array-like object called `name` in `lh5_file`.
         Return ``None`` if it is a :class:`.Scalar` or a :class:`.Struct`.
         """
         return utils.read_n_rows(name, self.gimme_file(lh5_file, "r"))
-    def read_size_in_bytes(self, name: str, lh5_file: str | h5py.File) -> int:
+    def read_size_in_bytes(self, name: str, lh5_file: str | Path | h5py.File) -> int:
         """Look up the size (in B) of the object in memory. Will recursively
         crawl through all objects in a Struct or Table
         """

lgdo/lh5/tools.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 import fnmatch
 import logging
 from copy import copy
+from pathlib import Path
 import h5py
@@ -13,7 +14,7 @@ log = logging.getLogger(__name__)
 def ls(
-    lh5_file: str | h5py.Group,
+    lh5_file: str | Path | h5py.Group,
     lh5_group: str = "",
     recursive: bool = False,
 ) -> list[str]:
@@ -39,8 +40,8 @@ def ls(
     lh5_st = LH5Store()
     # To use recursively, make lh5_file a h5group instead of a string
-    if isinstance(lh5_file, str):
-        lh5_file = lh5_st.gimme_file(lh5_file, "r")
+    if isinstance(lh5_file, (str, Path)):
+        lh5_file = lh5_st.gimme_file(str(Path(lh5_file)), "r")
         if lh5_group.startswith("/"):
             lh5_group = lh5_group[1:]
@@ -75,7 +76,7 @@ def ls(
 def show(
-    lh5_file: str | h5py.Group,
+    lh5_file: str | Path | h5py.Group,
     lh5_group: str = "/",
     attrs: bool = False,
     indent: str = "",
@@ -121,8 +122,8 @@ def show(
         return
     # open file
-    if isinstance(lh5_file, str):
-        lh5_file = h5py.File(utils.expand_path(lh5_file), "r", locking=False)
+    if isinstance(lh5_file, (str, Path)):
+        lh5_file = h5py.File(utils.expand_path(Path(lh5_file)), "r", locking=False)
     # go to group
     if lh5_group != "/":

lgdo/lh5/utils.py CHANGED Viewed

@@ -21,7 +21,7 @@ log = logging.getLogger(__name__)
 def get_buffer(
     name: str,
-    lh5_file: str | h5py.File | Sequence[str | h5py.File],
+    lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
     size: int | None = None,
     field_mask: Mapping[str, bool] | Sequence[str] | None = None,
 ) -> types.LGDO:
@@ -39,7 +39,7 @@ def get_buffer(
     return obj
-def read_n_rows(name: str, h5f: str | h5py.File) -> int | None:
+def read_n_rows(name: str, h5f: str | Path | h5py.File) -> int | None:
     """Look up the number of rows in an Array-like LGDO object on disk.
     Return ``None`` if `name` is a :class:`.Scalar` or a :class:`.Struct`.
@@ -56,7 +56,7 @@ def read_n_rows(name: str, h5f: str | h5py.File) -> int | None:
     return _serializers.read.utils.read_n_rows(h5o, h5f.name, name)
-def read_size_in_bytes(name: str, h5f: str | h5py.File) -> int | None:
+def read_size_in_bytes(name: str, h5f: str | Path | h5py.File) -> int | None:
     """Look up the size (in B) in an LGDO object in memory. Will crawl
     recursively through members of a Struct or Table
     """
@@ -111,7 +111,7 @@ def get_h5_group(
         grp_attrs is not None
         and len(set(grp_attrs.items()) ^ set(group.attrs.items())) > 0
     ):
-        if not overwrite:
+        if not overwrite and len(group.attrs) != 0:
             msg = (
                 f"Provided {grp_attrs=} are different from "
                 f"existing ones {dict(group.attrs)=} but overwrite flag is not set"
@@ -158,10 +158,10 @@ def expand_vars(expr: str, substitute: dict[str, str] | None = None) -> str:
 def expand_path(
-    path: str,
+    path: str | Path,
     substitute: dict[str, str] | None = None,
     list: bool = False,
-    base_path: str | None = None,
+    base_path: str | Path | None = None,
 ) -> str | list:
     """Expand (environment) variables and wildcards to return absolute paths.
@@ -184,18 +184,26 @@ def expand_path(
         Unique absolute path, or list of all absolute paths
     """
     if base_path is not None and base_path != "":
-        base_path = Path(os.path.expandvars(base_path)).expanduser()
-        path = base_path / path
+        base_path = Path(expand_vars(str(base_path))).expanduser()
+        if not Path(path).expanduser().is_absolute():
+            path = base_path / path
     # first expand variables
-    _path = expand_vars(path, substitute)
+    _path = expand_vars(str(path), substitute)
     # then expand wildcards
     # pathlib glob works differently so use glob for now
     paths = sorted(glob.glob(str(Path(_path).expanduser())))  # noqa: PTH207
     if base_path is not None and base_path != "":
-        paths = [os.path.relpath(p, base_path) for p in paths]
+        rel_paths = []
+        for p in paths:
+            p_path = Path(p)
+            try:
+                rel_paths.append(str(p_path.relative_to(base_path)))
+            except ValueError:
+                rel_paths.append(str(p_path))
+        paths = rel_paths
     if not list:
         if len(paths) == 0:

lgdo/types/array.py CHANGED Viewed

@@ -109,14 +109,14 @@ class Array(LGDOCollection):
     @property
     def shape(self):
-        return (len(self),) + self._nda.shape[1:]
+        return (len(self), *self._nda.shape[1:])
     def reserve_capacity(self, capacity: int) -> None:
         "Set size (number of rows) of internal memory buffer"
         if capacity < len(self):
             msg = "Cannot reduce capacity below Array length"
             raise ValueError(msg)
-        self._nda.resize((capacity,) + self._nda.shape[1:], refcheck=False)
+        self._nda.resize((capacity, *self._nda.shape[1:]), refcheck=False)
     def get_capacity(self) -> int:
         "Get capacity (i.e. max size before memory must be re-allocated)"
@@ -190,6 +190,9 @@ class Array(LGDOCollection):
         return False
+    def __hash__(self):
+        return hash(self.name)
     def __iter__(self) -> Iterator:
         yield from self.nda

lgdo/types/encoded.py CHANGED Viewed

@@ -92,6 +92,9 @@ class VectorOfEncodedVectors(LGDOCollection):
         return False
+    def __hash__(self):
+        return hash(self.name)
     def reserve_capacity(self, *capacity: int) -> None:
         self.encoded_data.reserve_capacity(*capacity)
         self.decoded_size.reserve_capacity(capacity[0])
@@ -345,6 +348,9 @@ class ArrayOfEncodedEqualSizedArrays(LGDOCollection):
         return False
+    def __hash__(self):
+        return hash(self.name)
     def reserve_capacity(self, *capacity: int) -> None:
         self.encoded_data.reserve_capacity(capacity)

lgdo/types/scalar.py CHANGED Viewed

@@ -63,6 +63,9 @@ class Scalar(LGDO):
         return False
+    def __hash__(self):
+        return hash(self.name)
     def __str__(self) -> str:
         attrs = self.getattrs()
         return f"{self.value!s} with attrs={attrs!r}"

lgdo/types/struct.py CHANGED Viewed

@@ -5,7 +5,9 @@ utilities.
 from __future__ import annotations
+import copy
 import logging
+import re
 from collections.abc import Mapping
 from typing import Any
@@ -56,7 +58,21 @@ class Struct(LGDO, dict):
                     # assign
                     super().update({k: v})
-        # call LGDO constructor to setup attributes
+        # check the datatype attribute passed by the user and sort the fields
+        # to ensure consistent behavior
+        if attrs is not None and "datatype" in attrs:
+            _attrs = copy.copy(dict(attrs))
+            if not _is_struct_datatype(self.datatype_name(), _attrs["datatype"]):
+                msg = (
+                    f"datatype attribute ({self.attrs['datatype']}) is not "
+                    f"compatible with class datatype!"
+                )
+                raise ValueError(msg)
+            _attrs["datatype"] = _sort_datatype_fields(_attrs["datatype"])
+            attrs = _attrs
         super().__init__(attrs)
     def datatype_name(self) -> str:
@@ -64,7 +80,10 @@ class Struct(LGDO, dict):
     def form_datatype(self) -> str:
         return (
-            self.datatype_name() + "{" + ",".join([str(k) for k in self.keys()]) + "}"
+            self.datatype_name()
+            + "{"
+            + ",".join(sorted([str(k) for k in self.keys()]))
+            + "}"
         )
     def update_datatype(self) -> None:
@@ -157,3 +176,34 @@ class Struct(LGDO, dict):
             "not possible. Call view_as() on the fields instead."
         )
         raise NotImplementedError(msg)
+def _is_struct_datatype(dt_name, expr):
+    return re.search("^" + dt_name + r"\{(.*)\}$", expr) is not None
+def _get_struct_fields(expr: str) -> list[str]:
+    assert _is_struct_datatype(".*", expr)
+    arr = re.search(r"\{(.*)\}$", expr).group(1).split(",")
+    if arr == [""]:
+        arr = []
+    return sorted(arr)
+def _struct_datatype_equal(dt_name, dt1, dt2):
+    if any(not _is_struct_datatype(dt_name, dt) for dt in (dt1, dt2)):
+        return False
+    return _get_struct_fields(dt1) == _get_struct_fields(dt2)
+def _sort_datatype_fields(expr):
+    assert _is_struct_datatype(".*", expr)
+    match = re.search(r"^(.*)\{.*\}$", expr)
+    struct_type = match.group(1)
+    fields = _get_struct_fields(expr)
+    return struct_type + "{" + ",".join(sorted([str(k) for k in fields])) + "}"

lgdo/types/table.py CHANGED Viewed

@@ -81,8 +81,9 @@ class Table(Struct, LGDOCollection):
             col_dict = _ak_to_lgdo_or_col_dict(col_dict)
         # call Struct constructor
-        Struct.__init__(self, obj_dict=col_dict)
-        LGDOCollection.__init__(self, attrs=attrs)
+        Struct.__init__(self, obj_dict=col_dict, attrs=attrs)
+        # no need to call the LGDOCollection constructor, as we are calling the
+        # Struct constructor already
         # if col_dict is not empty, set size according to it
         # if size is also supplied, resize all fields to match it
@@ -329,9 +330,10 @@ class Table(Struct, LGDOCollection):
             :func:`numexpr.evaluate`` as `local_dict` argument or to
             :func:`eval` as `locals` argument.
         modules
-            a dictionary of additional modules used by the expression. If this is not `None`
-            then :func:`eval`is used and the expression can depend on any modules from this dictionary in
-            addition to awkward and numpy. These are passed to :func:`eval` as `globals` argument.
+            a dictionary of additional modules used by the expression. If this
+            is not `None` then :func:`eval`is used and the expression can
+            depend on any modules from this dictionary in addition to awkward
+            and numpy. These are passed to :func:`eval` as `globals` argument.
         Examples
         --------
@@ -402,7 +404,10 @@ class Table(Struct, LGDOCollection):
                 return _make_lgdo(out_data)
             except Exception:
-                msg = f"Warning {expr} could not be evaluated with numexpr probably due to some not allowed characters, trying with eval()."
+                msg = (
+                    f"Warning {expr} could not be evaluated with numexpr probably "
+                    "due to some not allowed characters, trying with eval()."
+                )
                 log.debug(msg)
         # resort to good ol' eval()

lgdo/types/vectorofvectors.py CHANGED Viewed

@@ -284,6 +284,9 @@ class VectorOfVectors(LGDOCollection):
         return False
+    def __hash__(self):
+        return hash(self.name)
     def __getitem__(self, i: int) -> NDArray:
         """Return a view of the vector at index `i` along the first axis."""
         if self.ndim == 2:

{legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

legend-pydataobj 1.12.0a4__py3-none-any.whl → 1.14.0__py3-none-any.whl

legend-pydataobj 1.12.0a4py3-none-any.whl → 1.14.0py3-none-any.whl