legend-pydataobj 1.12.0a4__py3-none-any.whl → 1.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: legend_pydataobj
3
- Version: 1.12.0a4
3
+ Version: 1.14.0
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -1,6 +1,6 @@
1
- legend_pydataobj-1.12.0a4.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
1
+ legend_pydataobj-1.14.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
2
2
  lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
3
- lgdo/_version.py,sha256=lVyzAOse2pIwNX9sD_s_ucUhU5oPCN_lFpntrC7eKG8,521
3
+ lgdo/_version.py,sha256=zEosD-3Sqrti57GKf-4yC-NurX2Smyv5d6IDkQisUBo,513
4
4
  lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
5
5
  lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
6
6
  lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
@@ -9,47 +9,48 @@ lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
9
9
  lgdo/compression/__init__.py,sha256=xHt_8Th0LxxNwj9iYHf5uGNTm3A_4qyW7zEVdAX3NwI,1127
10
10
  lgdo/compression/base.py,sha256=82cQJujfvoAOKBFx761dEcx_xM02TBCBBuBo6i78tuI,838
11
11
  lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2515
12
- lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
12
+ lgdo/compression/radware.py,sha256=-W7LgvkSVzdVJ6qNn7Ts3O9EcRcl8mUiApTLqR4dtIo,23836
13
13
  lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
14
- lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
15
- lgdo/lh5/__init__.py,sha256=UTzKGmpgFoHwVB_yNULvJsHD_uQQGl-R87l-3QBkh7w,773
14
+ lgdo/compression/varlen.py,sha256=bjyxhHzfpi6PIPy-Uc47W8_LrRbFoJLJ2kVeD5nhyqo,15125
15
+ lgdo/lh5/__init__.py,sha256=smHTawINIiogHNfYJq3aPvtxleTnBMdPADRCdc1wea8,748
16
16
  lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
17
- lgdo/lh5/core.py,sha256=nULH5UoRjUCH0E3Z0-OH_DbFz2PRAQP73Qaf1kfnyPE,13481
18
- lgdo/lh5/datatype.py,sha256=O_7BqOlX8PFMyG0ppkfUT5aps5HEqX0bpuKcJO3jhu0,1691
17
+ lgdo/lh5/core.py,sha256=tbvitu3Pr-FCF4nOopVxGVOobDhGaVWo4o0HS58TGtY,13806
18
+ lgdo/lh5/datatype.py,sha256=ry3twFaosuBoskiTKqtBYRMk9PQAf403593xKaItfog,1827
19
19
  lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
20
- lgdo/lh5/iterator.py,sha256=1ob9B7Bf3ioGCtZkUZoL6ibTxAwLf4ld8_33ghVVEa4,20498
21
- lgdo/lh5/store.py,sha256=qkBm3gPbr1R2UlQpUuDR5sGRMzpYJBWFL8fDIry6tmQ,8474
22
- lgdo/lh5/tools.py,sha256=drtJWHY82wCFuFr6LVVnm2AQgs_wZuFmAvyOB4tcOHs,6431
23
- lgdo/lh5/utils.py,sha256=f2H7H1D-RfDN3g_YrVDQEPaHevn5yDJFA-uznK9cgx8,6336
20
+ lgdo/lh5/iterator.py,sha256=vuN98pa-xHDWXM2GMxvMxFEJGfHatMX6ajqnaP55PuY,20680
21
+ lgdo/lh5/settings.py,sha256=cmPd6ZvneAF5sFMA1qf-9g_YSSygJcQSRmZDp1_sBEU,1001
22
+ lgdo/lh5/store.py,sha256=HJuDjWQ8ztrKDoyWW3cplhtWDnz3J4a-Fe2WF4fzOY4,8676
23
+ lgdo/lh5/tools.py,sha256=EZTCj3TMMp4Rnocq1F0QeO1yYHzx4yMR7l_Em4G7sC4,6503
24
+ lgdo/lh5/utils.py,sha256=hxPoaG25MOhuu7emrw2xzx3zerl-GzeMWdlfoQmLiYo,6667
24
25
  lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
25
26
  lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
27
  lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
27
28
  lgdo/lh5/_serializers/read/composite.py,sha256=UvkZHEhf0V7SFLxzF52eyP68hU0guGOLqosrfmIfeys,11729
28
29
  lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
29
- lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
30
+ lgdo/lh5/_serializers/read/ndarray.py,sha256=cxzZ7esT5BzxyoXfITBG_EDTtCVxSeSu6dVZrohOdOY,3685
30
31
  lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
31
32
  lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNxQVJXd0,7581
32
33
  lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
33
34
  lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- lgdo/lh5/_serializers/write/array.py,sha256=66DKnW2yqIBlUGNBPWcE-m4W0B2-nTKusDHGX9m6GY0,3223
35
- lgdo/lh5/_serializers/write/composite.py,sha256=JYoLT9intT_Y4xPeL_l7CSd22O0ZKyEmd0flKkWWPFA,9268
35
+ lgdo/lh5/_serializers/write/array.py,sha256=qzRNPQ4mtvc7HYPE3vUcM6bi7lWYnolNStdJVcDfzPU,3174
36
+ lgdo/lh5/_serializers/write/composite.py,sha256=sZfV8aGZCH0mvMZ2dGDKt-MoepgL4PlR9ZWbT_JNIjQ,12171
36
37
  lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
37
38
  lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
38
39
  lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
39
- lgdo/types/array.py,sha256=vxViJScqKw4zGUrrIOuuU_9Y0oTfOkEEhs0TOyUYjwI,9284
40
+ lgdo/types/array.py,sha256=TpZINHgGIptslwr5mwKYWU_PrYAk8bH1ECJ4XfLkWxg,9338
40
41
  lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
41
- lgdo/types/encoded.py,sha256=_e8u_BPfpjJbLnEdyTo9QG3kbNsGj0BN4gjdj3L1ndw,15640
42
+ lgdo/types/encoded.py,sha256=8DJHb3kxz6RrmjkeLWS6iyjvIJqx86mDInWqqjpMON0,15752
42
43
  lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
43
44
  lgdo/types/histogram.py,sha256=Jz1lLH56BfYnmcUhxUHK1h2wLDQ0Abgyd-6LznU-3-k,19979
44
45
  lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
45
- lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
46
- lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
47
- lgdo/types/table.py,sha256=FkWesoEA9bmGGSW8Ewig1Zs77ffUoR_nggfYSmkWpjU,20079
48
- lgdo/types/vectorofvectors.py,sha256=GbAKV_ehXN4XdWSwnmKS_ErCiudRetcH_3wo7iDrVjw,26854
46
+ lgdo/types/scalar.py,sha256=nBPiqX4g3GrPavEbG6nCt2Jel7Mj0IchXqwxB6ei_rg,1989
47
+ lgdo/types/struct.py,sha256=m3pYfGfKptV8ti3wb4n1nsPKMvhjdWCFoRdR5YooZBM,6353
48
+ lgdo/types/table.py,sha256=huhgpzdAUx0bRaEaitwnb-Ve7oAu5B6zxPK5EXPUfg0,20233
49
+ lgdo/types/vectorofvectors.py,sha256=k1LwNnX3TcRAhOujj85kNkfZN0MXZYL9aaMUbr82JlE,26910
49
50
  lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
50
51
  lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
51
- legend_pydataobj-1.12.0a4.dist-info/METADATA,sha256=eD1QW8NEKGSWEqxSes1-TFnq1VHoxtdLmLfafsB53nI,44445
52
- legend_pydataobj-1.12.0a4.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
53
- legend_pydataobj-1.12.0a4.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
54
- legend_pydataobj-1.12.0a4.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
55
- legend_pydataobj-1.12.0a4.dist-info/RECORD,,
52
+ legend_pydataobj-1.14.0.dist-info/METADATA,sha256=JaH2muAaB5Otjd9XhqiFfrgqtf9mR6F4XbIBPlZmB0g,44443
53
+ legend_pydataobj-1.14.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
+ legend_pydataobj-1.14.0.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
55
+ legend_pydataobj-1.14.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
56
+ legend_pydataobj-1.14.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.0.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
lgdo/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '1.12.0a4'
21
- __version_tuple__ = version_tuple = (1, 12, 0, 'a4')
20
+ __version__ = version = '1.14.0'
21
+ __version_tuple__ = version_tuple = (1, 14, 0)
@@ -95,13 +95,13 @@ def encode(
95
95
  if isinstance(sig_in, np.ndarray):
96
96
  s = sig_in.shape
97
97
  if len(sig_in) == 0:
98
- return np.empty(s[:-1] + (0,), dtype=ubyte), np.empty(0, dtype=uint32)
98
+ return np.empty((*s[:-1], 0), dtype=ubyte), np.empty(0, dtype=uint32)
99
99
 
100
100
  if sig_out is None:
101
101
  # the encoded signal is an array of bytes
102
102
  # -> twice as long as a uint16
103
103
  # pre-allocate ubyte (uint8) array, expand last dimension
104
- sig_out = np.empty(s[:-1] + (s[-1] * 2,), dtype=ubyte)
104
+ sig_out = np.empty((*s[:-1], s[-1] * 2), dtype=ubyte)
105
105
 
106
106
  if sig_out.dtype != ubyte:
107
107
  msg = "sig_out must be of type ubyte"
@@ -226,7 +226,7 @@ def decode(
226
226
  # allocate output array with lasd dim as large as the longest
227
227
  # uncompressed wf
228
228
  maxs = np.max(_get_hton_u16(sig_in[0], 0))
229
- sig_out = np.empty(s[:-1] + (maxs,), dtype=int32)
229
+ sig_out = np.empty((*s[:-1], maxs), dtype=int32)
230
230
 
231
231
  # siglen has one dimension less (the last)
232
232
  siglen = np.empty(s[:-1], dtype=uint32)
@@ -74,14 +74,14 @@ def encode(
74
74
  if isinstance(sig_in, np.ndarray):
75
75
  s = sig_in.shape
76
76
  if len(sig_in) == 0:
77
- return np.empty(s[:-1] + (0,), dtype=ubyte), np.empty(0, dtype=uint32)
77
+ return np.empty((*s[:-1], 0), dtype=ubyte), np.empty(0, dtype=uint32)
78
78
 
79
79
  if sig_out is None:
80
80
  # the encoded signal is an array of bytes
81
81
  # pre-allocate ubyte (uint8) array with a generous (but safe) size
82
82
  max_b = int(np.ceil(np.iinfo(sig_in.dtype).bits / 16) * 5)
83
83
  # expand last dimension
84
- sig_out = np.empty(s[:-1] + (s[-1] * max_b,), dtype=ubyte)
84
+ sig_out = np.empty((*s[:-1], s[-1] * max_b), dtype=ubyte)
85
85
 
86
86
  if sig_out.dtype != ubyte:
87
87
  msg = "sig_out must be of type ubyte"
lgdo/lh5/__init__.py CHANGED
@@ -7,7 +7,6 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
- from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
11
10
  from .core import read, read_as, write
12
11
  from .iterator import LH5Iterator
13
12
  from .store import LH5Store
@@ -15,14 +14,15 @@ from .tools import ls, show
15
14
  from .utils import read_n_rows
16
15
 
17
16
  __all__ = [
18
- "DEFAULT_HDF5_SETTINGS",
19
17
  "LH5Iterator",
20
18
  "LH5Store",
21
19
  "concat",
20
+ "default_hdf5_settings",
22
21
  "ls",
23
22
  "read",
24
23
  "read_as",
25
24
  "read_n_rows",
25
+ "reset_default_hdf5_settings",
26
26
  "show",
27
27
  "write",
28
28
  ]
@@ -57,7 +57,7 @@ def _h5_read_ndarray(
57
57
  (start_row,) + (0,) * (h5d.rank - 1),
58
58
  (1,) * h5d.rank,
59
59
  None,
60
- (n_rows_to_read,) + fspace.shape[1:],
60
+ (n_rows_to_read, *fspace.shape[1:]),
61
61
  )
62
62
  elif use_h5idx:
63
63
  # Note that h5s will automatically merge adjacent elements into a range
@@ -67,7 +67,7 @@ def _h5_read_ndarray(
67
67
  (i,) + (0,) * (h5d.rank - 1),
68
68
  (1,) * h5d.rank,
69
69
  None,
70
- (1,) + fspace.shape[1:],
70
+ (1, *fspace.shape[1:]),
71
71
  h5py.h5s.SELECT_OR,
72
72
  )
73
73
 
@@ -84,7 +84,7 @@ def _h5_read_ndarray(
84
84
  (obj_buf_start,) + (0,) * (h5d.rank - 1),
85
85
  (1,) * h5d.rank,
86
86
  None,
87
- (n_rows_to_read,) + fspace.shape[1:],
87
+ (n_rows_to_read, *fspace.shape[1:]),
88
88
  )
89
89
  h5d.read(mspace, fspace, obj_buf.nda)
90
90
  else:
@@ -93,10 +93,10 @@ def _h5_read_ndarray(
93
93
  obj_buf.nda[dest_sel, ...] = tmp[idx, ...]
94
94
  nda = obj_buf.nda
95
95
  elif n_rows == 0:
96
- tmp_shape = (0,) + h5d.shape[1:]
96
+ tmp_shape = (0, *h5d.shape[1:])
97
97
  nda = np.empty(tmp_shape, h5d.dtype)
98
98
  else:
99
- mspace = h5py.h5s.create_simple((n_rows_to_read,) + fspace.shape[1:])
99
+ mspace = h5py.h5s.create_simple((n_rows_to_read, *fspace.shape[1:]))
100
100
  nda = np.empty(mspace.shape, h5d.dtype)
101
101
  if idx is None or use_h5idx:
102
102
  h5d.read(mspace, fspace, nda)
@@ -6,12 +6,11 @@ import h5py
6
6
  import numpy as np
7
7
 
8
8
  from .... import types
9
+ from ... import settings
9
10
  from ...exceptions import LH5EncodeError
10
11
 
11
12
  log = logging.getLogger(__name__)
12
13
 
13
- DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"}
14
-
15
14
 
16
15
  def _h5_write_array(
17
16
  obj,
@@ -41,7 +40,7 @@ def _h5_write_array(
41
40
  # this is needed in order to have a resizable (in the first
42
41
  # axis) data set, i.e. rows can be appended later
43
42
  # NOTE: this automatically turns chunking on!
44
- maxshape = (None,) + nda.shape[1:]
43
+ maxshape = (None, *nda.shape[1:])
45
44
  h5py_kwargs.setdefault("maxshape", maxshape)
46
45
 
47
46
  if wo_mode == "o" and name in group:
@@ -49,7 +48,7 @@ def _h5_write_array(
49
48
  del group[name]
50
49
 
51
50
  # set default compression options
52
- for k, v in DEFAULT_HDF5_SETTINGS.items():
51
+ for k, v in settings.DEFAULT_HDF5_SETTINGS.items():
53
52
  h5py_kwargs.setdefault(k, v)
54
53
 
55
54
  # compress using the 'compression' LGDO attribute, if available
@@ -64,6 +64,59 @@ def _h5_write_lgdo(
64
64
 
65
65
  group = utils.get_h5_group(group, lh5_file)
66
66
 
67
+ # name already in file
68
+ if name in group or (
69
+ ("datatype" in group.attrs or group == "/")
70
+ and (len(name) <= 2 or "/" not in name[1:-1])
71
+ ):
72
+ pass
73
+ # group is in file but not struct or need to create nesting
74
+ else:
75
+ # check if name is nested
76
+ # if name is nested, iterate up from parent
77
+ # otherwise we just need to iterate the group
78
+ if len(name) > 2 and "/" in name[1:-1]:
79
+ group = utils.get_h5_group(
80
+ name[:-1].rsplit("/", 1)[0],
81
+ group,
82
+ )
83
+ curr_name = (
84
+ name.rsplit("/", 1)[1]
85
+ if name[-1] != "/"
86
+ else name[:-1].rsplit("/", 1)[1]
87
+ )
88
+ else:
89
+ curr_name = name
90
+ # initialize the object to be written
91
+ obj = types.Struct({curr_name.replace("/", ""): obj})
92
+
93
+ # if base group already has a child we just append
94
+ if len(group) >= 1:
95
+ wo_mode = "ac"
96
+ else:
97
+ # iterate up the group hierarchy until we reach the root or a group with more than one child
98
+ while group.name != "/":
99
+ if len(group) > 1:
100
+ break
101
+ curr_name = group.name
102
+ group = group.parent
103
+ if group.name != "/":
104
+ obj = types.Struct({curr_name[len(group.name) + 1 :]: obj})
105
+ else:
106
+ obj = types.Struct({curr_name[1:]: obj})
107
+ # if the group has more than one child, we need to append else we can overwrite
108
+ wo_mode = "ac" if len(group) > 1 else "o"
109
+
110
+ # set the new name
111
+ if group.name == "/":
112
+ name = "/"
113
+ elif group.parent.name == "/":
114
+ name = group.name[1:]
115
+ else:
116
+ name = group.name[len(group.parent.name) + 1 :]
117
+ # get the new group
118
+ group = utils.get_h5_group(group.parent if group.name != "/" else "/", lh5_file)
119
+
67
120
  if wo_mode == "w" and name in group:
68
121
  msg = f"can't overwrite '{name}' in wo_mode 'write_safe'"
69
122
  raise LH5EncodeError(msg, lh5_file, group, name)
@@ -87,7 +140,7 @@ def _h5_write_lgdo(
87
140
  lh5_file,
88
141
  group=group,
89
142
  start_row=start_row,
90
- n_rows=n_rows,
143
+ n_rows=n_rows, # if isinstance(obj, types.Table | types.Histogram) else None,
91
144
  wo_mode=wo_mode,
92
145
  write_start=write_start,
93
146
  **h5py_kwargs,
@@ -186,19 +239,31 @@ def _h5_write_struct(
186
239
  write_start=0,
187
240
  **h5py_kwargs,
188
241
  ):
242
+ # this works for structs and derived (tables)
189
243
  assert isinstance(obj, types.Struct)
190
244
 
191
245
  # In order to append a column, we need to update the
192
- # `table{old_fields}` value in `group.attrs['datatype"]` to include
193
- # the new fields. One way to do this is to override
194
- # `obj.attrs["datatype"]` to include old and new fields. Then we
195
- # can write the fields to the table as normal.
246
+ # `struct/table{old_fields}` value in `group.attrs['datatype"]` to include
247
+ # the new fields. One way to do this is to override `obj.attrs["datatype"]`
248
+ # to include old and new fields. Then we can write the fields to the
249
+ # struct/table as normal.
196
250
  if wo_mode == "ac":
251
+ if name not in group:
252
+ msg = "Cannot append column to non-existing struct on disk"
253
+ raise LH5EncodeError(msg, lh5_file, group, name)
254
+
197
255
  old_group = utils.get_h5_group(name, group)
256
+ if "datatype" not in old_group.attrs:
257
+ msg = "Cannot append column to an existing non-LGDO object on disk"
258
+ raise LH5EncodeError(msg, lh5_file, group, name)
259
+
198
260
  lgdotype = datatype.datatype(old_group.attrs["datatype"])
199
261
  fields = datatype.get_struct_fields(old_group.attrs["datatype"])
200
- if not issubclass(lgdotype, types.Struct):
201
- msg = f"Trying to append columns to an object of type {lgdotype.__name__}"
262
+ if lgdotype is not type(obj):
263
+ msg = (
264
+ "Trying to append columns to an object of different "
265
+ f"type {lgdotype.__name__}!={type(obj)}"
266
+ )
202
267
  raise LH5EncodeError(msg, lh5_file, group, name)
203
268
 
204
269
  # If the mode is `append_column`, make sure we aren't appending
@@ -211,8 +276,14 @@ def _h5_write_struct(
211
276
  "column(s) to a table with the same field(s)"
212
277
  )
213
278
  raise LH5EncodeError(msg, lh5_file, group, name)
279
+
214
280
  # It doesn't matter what key we access, as all fields in the old table have the same size
215
- if old_group[next(iter(old_group.keys()))].size != obj.size:
281
+ if (
282
+ isinstance(obj, types.Table)
283
+ and old_group.attrs["datatype"][:6]
284
+ != "struct" # structs dont care about size
285
+ and old_group[next(iter(old_group.keys()))].size != obj.size
286
+ ):
216
287
  msg = (
217
288
  f"Table sizes don't match. Trying to append column of size {obj.size} "
218
289
  f"to a table of size {old_group[next(iter(old_group.keys()))].size}."
@@ -222,16 +293,27 @@ def _h5_write_struct(
222
293
  # Now we can append the obj.keys() to the old fields, and then update obj.attrs.
223
294
  fields.extend(list(obj.keys()))
224
295
  obj.attrs.pop("datatype")
225
- obj.attrs["datatype"] = "table" + "{" + ",".join(fields) + "}"
296
+
297
+ obj.attrs["datatype"] = (
298
+ obj.datatype_name() + "{" + ",".join(sorted(fields)) + "}"
299
+ )
300
+
301
+ # propagating wo_mode="ac" to nested LGDOs does not make any sense
302
+ wo_mode = "append"
303
+
304
+ # overwrite attributes of the existing struct
305
+ attrs_overwrite = True
306
+ else:
307
+ attrs_overwrite = wo_mode == "o"
226
308
 
227
309
  group = utils.get_h5_group(
228
310
  name,
229
311
  group,
230
312
  grp_attrs=obj.attrs,
231
- overwrite=(wo_mode in ["o", "ac"]),
313
+ overwrite=attrs_overwrite,
232
314
  )
233
315
  # If the mode is overwrite, then we need to peek into the file's
234
- # table's existing fields. If we are writing a new table to the
316
+ # table's existing fields. If we are writing a new table to the
235
317
  # group that does not contain an old field, we should delete that
236
318
  # old field from the file
237
319
  if wo_mode == "o":
@@ -260,11 +342,9 @@ def _h5_write_struct(
260
342
  else:
261
343
  obj_fld = obj[field]
262
344
 
263
- # Convert keys to string for dataset names
264
- f = str(field)
265
345
  _h5_write_lgdo(
266
346
  obj_fld,
267
- f,
347
+ str(field),
268
348
  lh5_file,
269
349
  group=group,
270
350
  start_row=start_row,
lgdo/lh5/core.py CHANGED
@@ -5,6 +5,7 @@ import inspect
5
5
  import sys
6
6
  from collections.abc import Mapping, Sequence
7
7
  from contextlib import suppress
8
+ from pathlib import Path
8
9
  from typing import Any
9
10
 
10
11
  import h5py
@@ -18,7 +19,7 @@ from .utils import read_n_rows
18
19
 
19
20
  def read(
20
21
  name: str,
21
- lh5_file: str | h5py.File | Sequence[str | h5py.File],
22
+ lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
22
23
  start_row: int = 0,
23
24
  n_rows: int = sys.maxsize,
24
25
  idx: ArrayLike = None,
@@ -111,8 +112,8 @@ def read(
111
112
  """
112
113
  if isinstance(lh5_file, h5py.File):
113
114
  lh5_obj = lh5_file[name]
114
- elif isinstance(lh5_file, str):
115
- lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
115
+ elif isinstance(lh5_file, (str, Path)):
116
+ lh5_file = h5py.File(str(Path(lh5_file)), mode="r", locking=locking)
116
117
  try:
117
118
  lh5_obj = lh5_file[name]
118
119
  except KeyError as ke:
@@ -194,7 +195,7 @@ def read(
194
195
  def write(
195
196
  obj: types.LGDO,
196
197
  name: str,
197
- lh5_file: str | h5py.File,
198
+ lh5_file: str | Path | h5py.File,
198
199
  group: str | h5py.Group = "/",
199
200
  start_row: int = 0,
200
201
  n_rows: int | None = None,
@@ -268,11 +269,13 @@ def write(
268
269
  end of array is the same as ``append``.
269
270
  - ``overwrite_file`` or ``of``: delete file if present prior to
270
271
  writing to it. `write_start` should be 0 (its ignored).
271
- - ``append_column`` or ``ac``: append columns from an
272
- :class:`~.lgdo.table.Table` `obj` only if there is an existing
273
- :class:`~.lgdo.table.Table` in the `lh5_file` with the same
274
- `name` and :class:`~.lgdo.table.Table.size`. If the sizes don't
275
- match, or if there are matching fields, it errors out.
272
+ - ``append_column`` or ``ac``: append fields/columns from an
273
+ :class:`~.lgdo.struct.Struct` `obj` (and derived types such as
274
+ :class:`~.lgdo.table.Table`) only if there is an existing
275
+ :class:`~.lgdo.struct.Struct` in the `lh5_file` with the same `name`.
276
+ If there are matching fields, it errors out. If appending to a
277
+ ``Table`` and the size of the new column is different from the size
278
+ of the existing table, it errors out.
276
279
  write_start
277
280
  row in the output file (if already existing) to start overwriting
278
281
  from.
@@ -288,7 +291,12 @@ def write(
288
291
  datasets. **Note: `compression` Ignored if compression is specified
289
292
  as an `obj` attribute.**
290
293
  """
291
- if wo_mode in ("w", "write", "of", "overwrite_file"):
294
+
295
+ if (
296
+ isinstance(lh5_file, str)
297
+ and not Path(lh5_file).is_file()
298
+ and wo_mode in ("w", "write_safe", "of", "overwrite_file")
299
+ ):
292
300
  h5py_kwargs.update(
293
301
  {
294
302
  "fs_strategy": "page",
@@ -310,7 +318,7 @@ def write(
310
318
 
311
319
  def read_as(
312
320
  name: str,
313
- lh5_file: str | h5py.File | Sequence[str | h5py.File],
321
+ lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
314
322
  library: str,
315
323
  **kwargs,
316
324
  ) -> Any:
lgdo/lh5/datatype.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import re
4
4
  from collections import OrderedDict
5
+ from itertools import permutations as perm
5
6
 
6
7
  from .. import types as lgdo
7
8
 
@@ -14,7 +15,10 @@ _lgdo_datatype_map: dict[str, lgdo.LGDO] = OrderedDict(
14
15
  lgdo.ArrayOfEncodedEqualSizedArrays,
15
16
  r"^array_of_encoded_equalsized_arrays<1,1>\{.+\}$",
16
17
  ),
17
- (lgdo.Histogram, r"^struct\{binning,weights,isdensity\}$"),
18
+ (
19
+ lgdo.Histogram,
20
+ rf"^struct\{{(?:{'|'.join([','.join(p) for p in perm(['binning', 'weights', 'isdensity'])])})\}}$",
21
+ ),
18
22
  (lgdo.Struct, r"^struct\{.*\}$"),
19
23
  (lgdo.Table, r"^table\{.*\}$"),
20
24
  (lgdo.FixedSizeArray, r"^fixedsize_array<\d+>\{.+\}$"),
lgdo/lh5/iterator.py CHANGED
@@ -17,54 +17,50 @@ LGDO = typing.Union[Array, Scalar, Struct, VectorOfVectors]
17
17
 
18
18
 
19
19
  class LH5Iterator(typing.Iterator):
20
- """
21
- A class for iterating through one or more LH5 files, one block of entries
22
- at a time. This also accepts an entry list/mask to enable event selection,
23
- and a field mask.
24
-
25
- This can be used as an iterator:
26
-
27
-
28
- >>> for lh5_obj in LH5Iterator(...):
29
- >>> # do the thing!
30
-
31
- This is intended for if you are reading a large quantity of data. This
32
- will ensure that you traverse files efficiently to minimize caching time
33
- and will limit your memory usage (particularly when reading in waveforms!).
34
- The ``lh5_obj`` that is read by this class is reused in order to avoid
35
- reallocation of memory; this means that if you want to hold on to data
36
- between reads, you will have to copy it somewhere!
37
-
38
- When defining an LH5Iterator, you must give it a list of files and the
39
- hdf5 groups containing the data tables you are reading. You may also
40
- provide a field mask, and an entry list or mask, specifying which entries
41
- to read from the files. You may also pair it with a friend iterator, which
42
- contains a parallel group of files which will be simultaneously read.
43
- In addition to accessing requested data via ``lh5_obj``, several
44
- properties exist to tell you where that data came from:
45
-
46
- - lh5_it.current_i_entry: get the index within the entry list of the
47
- first entry that is currently read
48
- - lh5_it.current_local_entries: get the entry numbers relative to the
49
- file the data came from
50
- - lh5_it.current_global_entries: get the entry number relative to the
51
- full dataset
52
- - lh5_it.current_files: get the file name corresponding to each entry
53
- - lh5_it.current_groups: get the group name corresponding to each entry
54
-
55
- This class can also be used for random access:
56
-
57
- >>> lh5_obj = lh5_it.read(i_entry)
58
-
59
- to read the block of entries starting at i_entry. In case of multiple files
60
- or the use of an event selection, i_entry refers to a global event index
61
- across files and does not count events that are excluded by the selection.
20
+ """Iterate over chunks of entries from LH5 files.
21
+
22
+ The iterator reads ``buffer_len`` entries at a time from one or more
23
+ files. The LGDO instance returned at each iteration is reused to avoid
24
+ reallocations, so copy the data if it should be preserved.
25
+
26
+ Examples
27
+ --------
28
+ Iterate through a table one chunk at a time::
29
+
30
+ from lgdo.lh5 import LH5Iterator
31
+
32
+ for table in LH5Iterator("data.lh5", "geds/raw/energy", buffer_len=100):
33
+ process(table)
34
+
35
+ ``LH5Iterator`` can also be used for random access::
36
+
37
+ it = LH5Iterator(files, groups)
38
+ table = it.read(i_entry)
39
+
40
+ In case of multiple files or an entry selection, ``i_entry`` refers to the
41
+ global event index across all files.
42
+
43
+ When instantiating an iterator you must provide a list of files and the
44
+ HDF5 groups to read. Optional parameters allow field masking, event
45
+ selection and pairing the iterator with a "friend" iterator that is read in
46
+ parallel. Several properties are available to obtain the provenance of the
47
+ data currently loaded:
48
+
49
+ - ``current_i_entry`` -- index within the entry list of the first entry in
50
+ the buffer
51
+ - ``current_local_entries`` -- entry numbers relative to the file the data
52
+ came from
53
+ - ``current_global_entries`` -- entry number relative to the full dataset
54
+ - ``current_files`` -- file name corresponding to each entry in the buffer
55
+ - ``current_groups`` -- group name corresponding to each entry in the
56
+ buffer
62
57
  """
63
58
 
64
59
  def __init__(
65
60
  self,
66
61
  lh5_files: str | list[str],
67
62
  groups: str | list[str] | list[list[str]],
63
+ *,
68
64
  base_path: str = "",
69
65
  entry_list: list[int] | list[list[int]] | None = None,
70
66
  entry_mask: list[bool] | list[list[bool]] | None = None,
@@ -75,6 +71,7 @@ class LH5Iterator(typing.Iterator):
75
71
  file_cache: int = 10,
76
72
  file_map: NDArray[int] = None,
77
73
  friend: typing.Iterator | None = None,
74
+ h5py_open_mode: str = "r",
78
75
  ) -> None:
79
76
  """
80
77
  Parameters
@@ -115,9 +112,21 @@ class LH5Iterator(typing.Iterator):
115
112
  The friend should have the same length and entry list. A single
116
113
  LH5 table containing columns from both iterators will be returned.
117
114
  Note that buffer_len will be set to the minimum of the two.
115
+ h5py_open_mode
116
+ file open mode used when acquiring file handles. ``r`` (default)
117
+ opens files read-only while ``a`` allow opening files for
118
+ write-appending as well.
118
119
  """
119
120
  self.lh5_st = LH5Store(base_path=base_path, keep_open=file_cache)
120
121
 
122
+ if h5py_open_mode == "read":
123
+ h5py_open_mode = "r"
124
+ if h5py_open_mode == "append":
125
+ h5py_open_mode = "a"
126
+ if h5py_open_mode not in ["r", "a"]:
127
+ msg = f"unknown h5py_open_mode '{h5py_open_mode}'"
128
+ raise ValueError(msg)
129
+
121
130
  # List of files, with wildcards and env vars expanded
122
131
  if isinstance(lh5_files, str):
123
132
  lh5_files = [lh5_files]
@@ -152,6 +161,10 @@ class LH5Iterator(typing.Iterator):
152
161
  self.lh5_files += [f_exp] * len(g)
153
162
  self.groups += list(g)
154
163
 
164
+ # open files in the requested mode so they are writable if needed
165
+ for f in set(self.lh5_files):
166
+ self.lh5_st.gimme_file(f, mode=h5py_open_mode)
167
+
155
168
  if entry_list is not None and entry_mask is not None:
156
169
  msg = "entry_list and entry_mask arguments are mutually exclusive"
157
170
  raise ValueError(msg)
@@ -505,7 +518,7 @@ class LH5Iterator(typing.Iterator):
505
518
  return self
506
519
 
507
520
  def __next__(self) -> tuple[LGDO, int, int]:
508
- """Read next buffer_len entries and return lh5_table and iterator entry."""
521
+ """Read the next chunk of entries and return the buffer."""
509
522
  n_entries = self.n_entries
510
523
  if n_entries is not None:
511
524
  n_entries = min(
lgdo/lh5/settings.py ADDED
@@ -0,0 +1,34 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+
6
+ def default_hdf5_settings() -> dict[str, Any]:
7
+ """Returns the HDF5 settings for writing data to disk to the pydataobj defaults.
8
+
9
+ Examples
10
+ --------
11
+ >>> from lgdo import lh5
12
+ >>> lh5.DEFAULT_HDF5_SETTINGS["compression"] = "lzf"
13
+ >>> lh5.write(data, "data", "file.lh5") # compressed with LZF
14
+ >>> lh5.DEFAULT_HDF5_SETTINGS = lh5.default_hdf5_settings()
15
+ >>> lh5.write(data, "data", "file.lh5", "of") # compressed with default settings (GZIP)
16
+ """
17
+
18
+ return {
19
+ "shuffle": True,
20
+ "compression": "gzip",
21
+ }
22
+
23
+
24
+ DEFAULT_HDF5_SETTINGS: dict[str, ...] = default_hdf5_settings()
25
+ """Global dictionary storing the default HDF5 settings for writing data to disk.
26
+
27
+ Modify this global variable before writing data to disk with this package.
28
+
29
+ Examples
30
+ --------
31
+ >>> from lgdo import lh5
32
+ >>> lh5.DEFAULT_HDF5_SETTINGS["compression"] = "lzf"
33
+ >>> lh5.write(data, "data", "file.lh5") # compressed with LZF
34
+ """
lgdo/lh5/store.py CHANGED
@@ -38,7 +38,10 @@ class LH5Store:
38
38
  """
39
39
 
40
40
  def __init__(
41
- self, base_path: str = "", keep_open: bool = False, locking: bool = False
41
+ self,
42
+ base_path: str | Path = "",
43
+ keep_open: bool = False,
44
+ locking: bool = False,
42
45
  ) -> None:
43
46
  """
44
47
  Parameters
@@ -52,6 +55,7 @@ class LH5Store:
52
55
  locking
53
56
  whether to lock files when reading
54
57
  """
58
+ base_path = str(Path(base_path)) if base_path != "" else ""
55
59
  self.base_path = "" if base_path == "" else utils.expand_path(base_path)
56
60
  self.keep_open = keep_open
57
61
  self.locking = locking
@@ -59,7 +63,7 @@ class LH5Store:
59
63
 
60
64
  def gimme_file(
61
65
  self,
62
- lh5_file: str | h5py.File,
66
+ lh5_file: str | Path | h5py.File,
63
67
  mode: str = "r",
64
68
  page_buffer: int = 0,
65
69
  **file_kwargs,
@@ -83,6 +87,8 @@ class LH5Store:
83
87
  if isinstance(lh5_file, h5py.File):
84
88
  return lh5_file
85
89
 
90
+ lh5_file = str(Path(lh5_file))
91
+
86
92
  if mode == "r":
87
93
  lh5_file = utils.expand_path(lh5_file, base_path=self.base_path)
88
94
  file_kwargs["locking"] = self.locking
@@ -147,7 +153,7 @@ class LH5Store:
147
153
  def get_buffer(
148
154
  self,
149
155
  name: str,
150
- lh5_file: str | h5py.File | Sequence[str | h5py.File],
156
+ lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
151
157
  size: int | None = None,
152
158
  field_mask: Mapping[str, bool] | Sequence[str] | None = None,
153
159
  ) -> types.LGDO:
@@ -162,7 +168,7 @@ class LH5Store:
162
168
  def read(
163
169
  self,
164
170
  name: str,
165
- lh5_file: str | h5py.File | Sequence[str | h5py.File],
171
+ lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
166
172
  start_row: int = 0,
167
173
  n_rows: int = sys.maxsize,
168
174
  idx: ArrayLike = None,
@@ -180,7 +186,7 @@ class LH5Store:
180
186
  .lh5.core.read
181
187
  """
182
188
  # grab files from store
183
- if isinstance(lh5_file, (str, h5py.File)):
189
+ if isinstance(lh5_file, (str, Path, h5py.File)):
184
190
  h5f = self.gimme_file(lh5_file, "r", **file_kwargs)
185
191
  else:
186
192
  h5f = [self.gimme_file(f, "r", **file_kwargs) for f in lh5_file]
@@ -201,7 +207,7 @@ class LH5Store:
201
207
  self,
202
208
  obj: types.LGDO,
203
209
  name: str,
204
- lh5_file: str | h5py.File,
210
+ lh5_file: str | Path | h5py.File,
205
211
  group: str | h5py.Group = "/",
206
212
  start_row: int = 0,
207
213
  n_rows: int | None = None,
@@ -256,14 +262,14 @@ class LH5Store:
256
262
  **h5py_kwargs,
257
263
  )
258
264
 
259
- def read_n_rows(self, name: str, lh5_file: str | h5py.File) -> int | None:
265
+ def read_n_rows(self, name: str, lh5_file: str | Path | h5py.File) -> int | None:
260
266
  """Look up the number of rows in an Array-like object called `name` in `lh5_file`.
261
267
 
262
268
  Return ``None`` if it is a :class:`.Scalar` or a :class:`.Struct`.
263
269
  """
264
270
  return utils.read_n_rows(name, self.gimme_file(lh5_file, "r"))
265
271
 
266
- def read_size_in_bytes(self, name: str, lh5_file: str | h5py.File) -> int:
272
+ def read_size_in_bytes(self, name: str, lh5_file: str | Path | h5py.File) -> int:
267
273
  """Look up the size (in B) of the object in memory. Will recursively
268
274
  crawl through all objects in a Struct or Table
269
275
  """
lgdo/lh5/tools.py CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import fnmatch
4
4
  import logging
5
5
  from copy import copy
6
+ from pathlib import Path
6
7
 
7
8
  import h5py
8
9
 
@@ -13,7 +14,7 @@ log = logging.getLogger(__name__)
13
14
 
14
15
 
15
16
  def ls(
16
- lh5_file: str | h5py.Group,
17
+ lh5_file: str | Path | h5py.Group,
17
18
  lh5_group: str = "",
18
19
  recursive: bool = False,
19
20
  ) -> list[str]:
@@ -39,8 +40,8 @@ def ls(
39
40
 
40
41
  lh5_st = LH5Store()
41
42
  # To use recursively, make lh5_file a h5group instead of a string
42
- if isinstance(lh5_file, str):
43
- lh5_file = lh5_st.gimme_file(lh5_file, "r")
43
+ if isinstance(lh5_file, (str, Path)):
44
+ lh5_file = lh5_st.gimme_file(str(Path(lh5_file)), "r")
44
45
  if lh5_group.startswith("/"):
45
46
  lh5_group = lh5_group[1:]
46
47
 
@@ -75,7 +76,7 @@ def ls(
75
76
 
76
77
 
77
78
  def show(
78
- lh5_file: str | h5py.Group,
79
+ lh5_file: str | Path | h5py.Group,
79
80
  lh5_group: str = "/",
80
81
  attrs: bool = False,
81
82
  indent: str = "",
@@ -121,8 +122,8 @@ def show(
121
122
  return
122
123
 
123
124
  # open file
124
- if isinstance(lh5_file, str):
125
- lh5_file = h5py.File(utils.expand_path(lh5_file), "r", locking=False)
125
+ if isinstance(lh5_file, (str, Path)):
126
+ lh5_file = h5py.File(utils.expand_path(Path(lh5_file)), "r", locking=False)
126
127
 
127
128
  # go to group
128
129
  if lh5_group != "/":
lgdo/lh5/utils.py CHANGED
@@ -21,7 +21,7 @@ log = logging.getLogger(__name__)
21
21
 
22
22
  def get_buffer(
23
23
  name: str,
24
- lh5_file: str | h5py.File | Sequence[str | h5py.File],
24
+ lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
25
25
  size: int | None = None,
26
26
  field_mask: Mapping[str, bool] | Sequence[str] | None = None,
27
27
  ) -> types.LGDO:
@@ -39,7 +39,7 @@ def get_buffer(
39
39
  return obj
40
40
 
41
41
 
42
- def read_n_rows(name: str, h5f: str | h5py.File) -> int | None:
42
+ def read_n_rows(name: str, h5f: str | Path | h5py.File) -> int | None:
43
43
  """Look up the number of rows in an Array-like LGDO object on disk.
44
44
 
45
45
  Return ``None`` if `name` is a :class:`.Scalar` or a :class:`.Struct`.
@@ -56,7 +56,7 @@ def read_n_rows(name: str, h5f: str | h5py.File) -> int | None:
56
56
  return _serializers.read.utils.read_n_rows(h5o, h5f.name, name)
57
57
 
58
58
 
59
- def read_size_in_bytes(name: str, h5f: str | h5py.File) -> int | None:
59
+ def read_size_in_bytes(name: str, h5f: str | Path | h5py.File) -> int | None:
60
60
  """Look up the size (in B) in an LGDO object in memory. Will crawl
61
61
  recursively through members of a Struct or Table
62
62
  """
@@ -111,7 +111,7 @@ def get_h5_group(
111
111
  grp_attrs is not None
112
112
  and len(set(grp_attrs.items()) ^ set(group.attrs.items())) > 0
113
113
  ):
114
- if not overwrite:
114
+ if not overwrite and len(group.attrs) != 0:
115
115
  msg = (
116
116
  f"Provided {grp_attrs=} are different from "
117
117
  f"existing ones {dict(group.attrs)=} but overwrite flag is not set"
@@ -158,10 +158,10 @@ def expand_vars(expr: str, substitute: dict[str, str] | None = None) -> str:
158
158
 
159
159
 
160
160
  def expand_path(
161
- path: str,
161
+ path: str | Path,
162
162
  substitute: dict[str, str] | None = None,
163
163
  list: bool = False,
164
- base_path: str | None = None,
164
+ base_path: str | Path | None = None,
165
165
  ) -> str | list:
166
166
  """Expand (environment) variables and wildcards to return absolute paths.
167
167
 
@@ -184,18 +184,26 @@ def expand_path(
184
184
  Unique absolute path, or list of all absolute paths
185
185
  """
186
186
  if base_path is not None and base_path != "":
187
- base_path = Path(os.path.expandvars(base_path)).expanduser()
188
- path = base_path / path
187
+ base_path = Path(expand_vars(str(base_path))).expanduser()
188
+ if not Path(path).expanduser().is_absolute():
189
+ path = base_path / path
189
190
 
190
191
  # first expand variables
191
- _path = expand_vars(path, substitute)
192
+ _path = expand_vars(str(path), substitute)
192
193
 
193
194
  # then expand wildcards
194
195
  # pathlib glob works differently so use glob for now
195
196
  paths = sorted(glob.glob(str(Path(_path).expanduser()))) # noqa: PTH207
196
197
 
197
198
  if base_path is not None and base_path != "":
198
- paths = [os.path.relpath(p, base_path) for p in paths]
199
+ rel_paths = []
200
+ for p in paths:
201
+ p_path = Path(p)
202
+ try:
203
+ rel_paths.append(str(p_path.relative_to(base_path)))
204
+ except ValueError:
205
+ rel_paths.append(str(p_path))
206
+ paths = rel_paths
199
207
 
200
208
  if not list:
201
209
  if len(paths) == 0:
lgdo/types/array.py CHANGED
@@ -109,14 +109,14 @@ class Array(LGDOCollection):
109
109
 
110
110
  @property
111
111
  def shape(self):
112
- return (len(self),) + self._nda.shape[1:]
112
+ return (len(self), *self._nda.shape[1:])
113
113
 
114
114
  def reserve_capacity(self, capacity: int) -> None:
115
115
  "Set size (number of rows) of internal memory buffer"
116
116
  if capacity < len(self):
117
117
  msg = "Cannot reduce capacity below Array length"
118
118
  raise ValueError(msg)
119
- self._nda.resize((capacity,) + self._nda.shape[1:], refcheck=False)
119
+ self._nda.resize((capacity, *self._nda.shape[1:]), refcheck=False)
120
120
 
121
121
  def get_capacity(self) -> int:
122
122
  "Get capacity (i.e. max size before memory must be re-allocated)"
@@ -190,6 +190,9 @@ class Array(LGDOCollection):
190
190
 
191
191
  return False
192
192
 
193
+ def __hash__(self):
194
+ return hash(self.name)
195
+
193
196
  def __iter__(self) -> Iterator:
194
197
  yield from self.nda
195
198
 
lgdo/types/encoded.py CHANGED
@@ -92,6 +92,9 @@ class VectorOfEncodedVectors(LGDOCollection):
92
92
 
93
93
  return False
94
94
 
95
+ def __hash__(self):
96
+ return hash(self.name)
97
+
95
98
  def reserve_capacity(self, *capacity: int) -> None:
96
99
  self.encoded_data.reserve_capacity(*capacity)
97
100
  self.decoded_size.reserve_capacity(capacity[0])
@@ -345,6 +348,9 @@ class ArrayOfEncodedEqualSizedArrays(LGDOCollection):
345
348
 
346
349
  return False
347
350
 
351
+ def __hash__(self):
352
+ return hash(self.name)
353
+
348
354
  def reserve_capacity(self, *capacity: int) -> None:
349
355
  self.encoded_data.reserve_capacity(capacity)
350
356
 
lgdo/types/scalar.py CHANGED
@@ -63,6 +63,9 @@ class Scalar(LGDO):
63
63
 
64
64
  return False
65
65
 
66
+ def __hash__(self):
67
+ return hash(self.name)
68
+
66
69
  def __str__(self) -> str:
67
70
  attrs = self.getattrs()
68
71
  return f"{self.value!s} with attrs={attrs!r}"
lgdo/types/struct.py CHANGED
@@ -5,7 +5,9 @@ utilities.
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
+ import copy
8
9
  import logging
10
+ import re
9
11
  from collections.abc import Mapping
10
12
  from typing import Any
11
13
 
@@ -56,7 +58,21 @@ class Struct(LGDO, dict):
56
58
  # assign
57
59
  super().update({k: v})
58
60
 
59
- # call LGDO constructor to setup attributes
61
+ # check the datatype attribute passed by the user and sort the fields
62
+ # to ensure consistent behavior
63
+ if attrs is not None and "datatype" in attrs:
64
+ _attrs = copy.copy(dict(attrs))
65
+
66
+ if not _is_struct_datatype(self.datatype_name(), _attrs["datatype"]):
67
+ msg = (
68
+ f"datatype attribute ({self.attrs['datatype']}) is not "
69
+ f"compatible with class datatype!"
70
+ )
71
+ raise ValueError(msg)
72
+
73
+ _attrs["datatype"] = _sort_datatype_fields(_attrs["datatype"])
74
+ attrs = _attrs
75
+
60
76
  super().__init__(attrs)
61
77
 
62
78
  def datatype_name(self) -> str:
@@ -64,7 +80,10 @@ class Struct(LGDO, dict):
64
80
 
65
81
  def form_datatype(self) -> str:
66
82
  return (
67
- self.datatype_name() + "{" + ",".join([str(k) for k in self.keys()]) + "}"
83
+ self.datatype_name()
84
+ + "{"
85
+ + ",".join(sorted([str(k) for k in self.keys()]))
86
+ + "}"
68
87
  )
69
88
 
70
89
  def update_datatype(self) -> None:
@@ -157,3 +176,34 @@ class Struct(LGDO, dict):
157
176
  "not possible. Call view_as() on the fields instead."
158
177
  )
159
178
  raise NotImplementedError(msg)
179
+
180
+
181
+ def _is_struct_datatype(dt_name, expr):
182
+ return re.search("^" + dt_name + r"\{(.*)\}$", expr) is not None
183
+
184
+
185
+ def _get_struct_fields(expr: str) -> list[str]:
186
+ assert _is_struct_datatype(".*", expr)
187
+
188
+ arr = re.search(r"\{(.*)\}$", expr).group(1).split(",")
189
+ if arr == [""]:
190
+ arr = []
191
+
192
+ return sorted(arr)
193
+
194
+
195
+ def _struct_datatype_equal(dt_name, dt1, dt2):
196
+ if any(not _is_struct_datatype(dt_name, dt) for dt in (dt1, dt2)):
197
+ return False
198
+
199
+ return _get_struct_fields(dt1) == _get_struct_fields(dt2)
200
+
201
+
202
+ def _sort_datatype_fields(expr):
203
+ assert _is_struct_datatype(".*", expr)
204
+
205
+ match = re.search(r"^(.*)\{.*\}$", expr)
206
+ struct_type = match.group(1)
207
+ fields = _get_struct_fields(expr)
208
+
209
+ return struct_type + "{" + ",".join(sorted([str(k) for k in fields])) + "}"
lgdo/types/table.py CHANGED
@@ -81,8 +81,9 @@ class Table(Struct, LGDOCollection):
81
81
  col_dict = _ak_to_lgdo_or_col_dict(col_dict)
82
82
 
83
83
  # call Struct constructor
84
- Struct.__init__(self, obj_dict=col_dict)
85
- LGDOCollection.__init__(self, attrs=attrs)
84
+ Struct.__init__(self, obj_dict=col_dict, attrs=attrs)
85
+ # no need to call the LGDOCollection constructor, as we are calling the
86
+ # Struct constructor already
86
87
 
87
88
  # if col_dict is not empty, set size according to it
88
89
  # if size is also supplied, resize all fields to match it
@@ -329,9 +330,10 @@ class Table(Struct, LGDOCollection):
329
330
  :func:`numexpr.evaluate`` as `local_dict` argument or to
330
331
  :func:`eval` as `locals` argument.
331
332
  modules
332
- a dictionary of additional modules used by the expression. If this is not `None`
333
- then :func:`eval`is used and the expression can depend on any modules from this dictionary in
334
- addition to awkward and numpy. These are passed to :func:`eval` as `globals` argument.
333
+ a dictionary of additional modules used by the expression. If this
334
+ is not `None` then :func:`eval`is used and the expression can
335
+ depend on any modules from this dictionary in addition to awkward
336
+ and numpy. These are passed to :func:`eval` as `globals` argument.
335
337
 
336
338
  Examples
337
339
  --------
@@ -402,7 +404,10 @@ class Table(Struct, LGDOCollection):
402
404
  return _make_lgdo(out_data)
403
405
 
404
406
  except Exception:
405
- msg = f"Warning {expr} could not be evaluated with numexpr probably due to some not allowed characters, trying with eval()."
407
+ msg = (
408
+ f"Warning {expr} could not be evaluated with numexpr probably "
409
+ "due to some not allowed characters, trying with eval()."
410
+ )
406
411
  log.debug(msg)
407
412
 
408
413
  # resort to good ol' eval()
@@ -284,6 +284,9 @@ class VectorOfVectors(LGDOCollection):
284
284
 
285
285
  return False
286
286
 
287
+ def __hash__(self):
288
+ return hash(self.name)
289
+
287
290
  def __getitem__(self, i: int) -> NDArray:
288
291
  """Return a view of the vector at index `i` along the first axis."""
289
292
  if self.ndim == 2: