legend-pydataobj 1.13.0__py3-none-any.whl → 1.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: legend_pydataobj
3
- Version: 1.13.0
3
+ Version: 1.14.1
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -1,6 +1,6 @@
1
- legend_pydataobj-1.13.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
1
+ legend_pydataobj-1.14.1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
2
2
  lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
3
- lgdo/_version.py,sha256=tH8KJgTzCjiwedQXB_0jJIzqyRjqKXz40ci9OEj6wNA,513
3
+ lgdo/_version.py,sha256=dJRV9pQgpOiwiRbOfIEKw29vLhc6CuMVTXwqoHaRZvM,513
4
4
  lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
5
5
  lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
6
6
  lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
@@ -9,48 +9,48 @@ lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
9
9
  lgdo/compression/__init__.py,sha256=xHt_8Th0LxxNwj9iYHf5uGNTm3A_4qyW7zEVdAX3NwI,1127
10
10
  lgdo/compression/base.py,sha256=82cQJujfvoAOKBFx761dEcx_xM02TBCBBuBo6i78tuI,838
11
11
  lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2515
12
- lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
12
+ lgdo/compression/radware.py,sha256=-W7LgvkSVzdVJ6qNn7Ts3O9EcRcl8mUiApTLqR4dtIo,23836
13
13
  lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
14
- lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
14
+ lgdo/compression/varlen.py,sha256=bjyxhHzfpi6PIPy-Uc47W8_LrRbFoJLJ2kVeD5nhyqo,15125
15
15
  lgdo/lh5/__init__.py,sha256=smHTawINIiogHNfYJq3aPvtxleTnBMdPADRCdc1wea8,748
16
16
  lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
17
- lgdo/lh5/core.py,sha256=U0ZZk6EmojRRYFBEo_bMy7jZ3SKBU41MIsSulyFxZIU,13752
17
+ lgdo/lh5/core.py,sha256=ER5tAEevD5--xEm_kNHpeHC9hcSaSBFHW3qohyvuvmM,14120
18
18
  lgdo/lh5/datatype.py,sha256=ry3twFaosuBoskiTKqtBYRMk9PQAf403593xKaItfog,1827
19
- lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
20
- lgdo/lh5/iterator.py,sha256=DOx6tDaJE1NajA7klTVWeblL_k6M0JWjmBiHYhpSri0,19970
19
+ lgdo/lh5/exceptions.py,sha256=Q374YeqajpptVCYfxJYrThiPZSnfpdbGV3qVwoUuEFo,1697
20
+ lgdo/lh5/iterator.py,sha256=vuN98pa-xHDWXM2GMxvMxFEJGfHatMX6ajqnaP55PuY,20680
21
21
  lgdo/lh5/settings.py,sha256=cmPd6ZvneAF5sFMA1qf-9g_YSSygJcQSRmZDp1_sBEU,1001
22
- lgdo/lh5/store.py,sha256=qkBm3gPbr1R2UlQpUuDR5sGRMzpYJBWFL8fDIry6tmQ,8474
23
- lgdo/lh5/tools.py,sha256=drtJWHY82wCFuFr6LVVnm2AQgs_wZuFmAvyOB4tcOHs,6431
24
- lgdo/lh5/utils.py,sha256=D3Cn1U7NLEkUVGpKdUzTjoNIhaMBFZPbjGsjnxpeu6A,6362
22
+ lgdo/lh5/store.py,sha256=HJuDjWQ8ztrKDoyWW3cplhtWDnz3J4a-Fe2WF4fzOY4,8676
23
+ lgdo/lh5/tools.py,sha256=EZTCj3TMMp4Rnocq1F0QeO1yYHzx4yMR7l_Em4G7sC4,6503
24
+ lgdo/lh5/utils.py,sha256=hxPoaG25MOhuu7emrw2xzx3zerl-GzeMWdlfoQmLiYo,6667
25
25
  lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
26
26
  lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
28
28
  lgdo/lh5/_serializers/read/composite.py,sha256=UvkZHEhf0V7SFLxzF52eyP68hU0guGOLqosrfmIfeys,11729
29
29
  lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
30
- lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
30
+ lgdo/lh5/_serializers/read/ndarray.py,sha256=cxzZ7esT5BzxyoXfITBG_EDTtCVxSeSu6dVZrohOdOY,3685
31
31
  lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
32
32
  lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNxQVJXd0,7581
33
33
  lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
34
34
  lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- lgdo/lh5/_serializers/write/array.py,sha256=gAB1EjPa9YojPqohVwY_VPeuY7_bLN-lttCmfgty-qk,3175
36
- lgdo/lh5/_serializers/write/composite.py,sha256=sZfV8aGZCH0mvMZ2dGDKt-MoepgL4PlR9ZWbT_JNIjQ,12171
35
+ lgdo/lh5/_serializers/write/array.py,sha256=qzRNPQ4mtvc7HYPE3vUcM6bi7lWYnolNStdJVcDfzPU,3174
36
+ lgdo/lh5/_serializers/write/composite.py,sha256=Cm9q0xVk27fmmrAeAp1q18E7VP7UwHako01115XdFHg,12964
37
37
  lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
38
38
  lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
39
39
  lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
40
- lgdo/types/array.py,sha256=vxViJScqKw4zGUrrIOuuU_9Y0oTfOkEEhs0TOyUYjwI,9284
40
+ lgdo/types/array.py,sha256=TpZINHgGIptslwr5mwKYWU_PrYAk8bH1ECJ4XfLkWxg,9338
41
41
  lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
42
- lgdo/types/encoded.py,sha256=_e8u_BPfpjJbLnEdyTo9QG3kbNsGj0BN4gjdj3L1ndw,15640
42
+ lgdo/types/encoded.py,sha256=8DJHb3kxz6RrmjkeLWS6iyjvIJqx86mDInWqqjpMON0,15752
43
43
  lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
44
44
  lgdo/types/histogram.py,sha256=Jz1lLH56BfYnmcUhxUHK1h2wLDQ0Abgyd-6LznU-3-k,19979
45
45
  lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
46
- lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
46
+ lgdo/types/scalar.py,sha256=nBPiqX4g3GrPavEbG6nCt2Jel7Mj0IchXqwxB6ei_rg,1989
47
47
  lgdo/types/struct.py,sha256=m3pYfGfKptV8ti3wb4n1nsPKMvhjdWCFoRdR5YooZBM,6353
48
- lgdo/types/table.py,sha256=hvOwhFkm-_CkNhGmD8SJoeepZcwFY6ItYOS76LztKtA,20158
49
- lgdo/types/vectorofvectors.py,sha256=GbAKV_ehXN4XdWSwnmKS_ErCiudRetcH_3wo7iDrVjw,26854
48
+ lgdo/types/table.py,sha256=huhgpzdAUx0bRaEaitwnb-Ve7oAu5B6zxPK5EXPUfg0,20233
49
+ lgdo/types/vectorofvectors.py,sha256=k1LwNnX3TcRAhOujj85kNkfZN0MXZYL9aaMUbr82JlE,26910
50
50
  lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
51
51
  lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
52
- legend_pydataobj-1.13.0.dist-info/METADATA,sha256=j_3ElXkBp__BND0nYsfGPEEwVgB_-_P4LNcffIFiv70,44443
53
- legend_pydataobj-1.13.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
- legend_pydataobj-1.13.0.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
55
- legend_pydataobj-1.13.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
56
- legend_pydataobj-1.13.0.dist-info/RECORD,,
52
+ legend_pydataobj-1.14.1.dist-info/METADATA,sha256=zkJj_GxpFmtHsxmgZOxN2hjUxEZduPKpswZoIS_Kq54,44443
53
+ legend_pydataobj-1.14.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
+ legend_pydataobj-1.14.1.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
55
+ legend_pydataobj-1.14.1.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
56
+ legend_pydataobj-1.14.1.dist-info/RECORD,,
lgdo/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '1.13.0'
21
- __version_tuple__ = version_tuple = (1, 13, 0)
20
+ __version__ = version = '1.14.1'
21
+ __version_tuple__ = version_tuple = (1, 14, 1)
@@ -95,13 +95,13 @@ def encode(
95
95
  if isinstance(sig_in, np.ndarray):
96
96
  s = sig_in.shape
97
97
  if len(sig_in) == 0:
98
- return np.empty(s[:-1] + (0,), dtype=ubyte), np.empty(0, dtype=uint32)
98
+ return np.empty((*s[:-1], 0), dtype=ubyte), np.empty(0, dtype=uint32)
99
99
 
100
100
  if sig_out is None:
101
101
  # the encoded signal is an array of bytes
102
102
  # -> twice as long as a uint16
103
103
  # pre-allocate ubyte (uint8) array, expand last dimension
104
- sig_out = np.empty(s[:-1] + (s[-1] * 2,), dtype=ubyte)
104
+ sig_out = np.empty((*s[:-1], s[-1] * 2), dtype=ubyte)
105
105
 
106
106
  if sig_out.dtype != ubyte:
107
107
  msg = "sig_out must be of type ubyte"
@@ -226,7 +226,7 @@ def decode(
226
226
  # allocate output array with lasd dim as large as the longest
227
227
  # uncompressed wf
228
228
  maxs = np.max(_get_hton_u16(sig_in[0], 0))
229
- sig_out = np.empty(s[:-1] + (maxs,), dtype=int32)
229
+ sig_out = np.empty((*s[:-1], maxs), dtype=int32)
230
230
 
231
231
  # siglen has one dimension less (the last)
232
232
  siglen = np.empty(s[:-1], dtype=uint32)
@@ -74,14 +74,14 @@ def encode(
74
74
  if isinstance(sig_in, np.ndarray):
75
75
  s = sig_in.shape
76
76
  if len(sig_in) == 0:
77
- return np.empty(s[:-1] + (0,), dtype=ubyte), np.empty(0, dtype=uint32)
77
+ return np.empty((*s[:-1], 0), dtype=ubyte), np.empty(0, dtype=uint32)
78
78
 
79
79
  if sig_out is None:
80
80
  # the encoded signal is an array of bytes
81
81
  # pre-allocate ubyte (uint8) array with a generous (but safe) size
82
82
  max_b = int(np.ceil(np.iinfo(sig_in.dtype).bits / 16) * 5)
83
83
  # expand last dimension
84
- sig_out = np.empty(s[:-1] + (s[-1] * max_b,), dtype=ubyte)
84
+ sig_out = np.empty((*s[:-1], s[-1] * max_b), dtype=ubyte)
85
85
 
86
86
  if sig_out.dtype != ubyte:
87
87
  msg = "sig_out must be of type ubyte"
@@ -57,7 +57,7 @@ def _h5_read_ndarray(
57
57
  (start_row,) + (0,) * (h5d.rank - 1),
58
58
  (1,) * h5d.rank,
59
59
  None,
60
- (n_rows_to_read,) + fspace.shape[1:],
60
+ (n_rows_to_read, *fspace.shape[1:]),
61
61
  )
62
62
  elif use_h5idx:
63
63
  # Note that h5s will automatically merge adjacent elements into a range
@@ -67,7 +67,7 @@ def _h5_read_ndarray(
67
67
  (i,) + (0,) * (h5d.rank - 1),
68
68
  (1,) * h5d.rank,
69
69
  None,
70
- (1,) + fspace.shape[1:],
70
+ (1, *fspace.shape[1:]),
71
71
  h5py.h5s.SELECT_OR,
72
72
  )
73
73
 
@@ -84,7 +84,7 @@ def _h5_read_ndarray(
84
84
  (obj_buf_start,) + (0,) * (h5d.rank - 1),
85
85
  (1,) * h5d.rank,
86
86
  None,
87
- (n_rows_to_read,) + fspace.shape[1:],
87
+ (n_rows_to_read, *fspace.shape[1:]),
88
88
  )
89
89
  h5d.read(mspace, fspace, obj_buf.nda)
90
90
  else:
@@ -93,10 +93,10 @@ def _h5_read_ndarray(
93
93
  obj_buf.nda[dest_sel, ...] = tmp[idx, ...]
94
94
  nda = obj_buf.nda
95
95
  elif n_rows == 0:
96
- tmp_shape = (0,) + h5d.shape[1:]
96
+ tmp_shape = (0, *h5d.shape[1:])
97
97
  nda = np.empty(tmp_shape, h5d.dtype)
98
98
  else:
99
- mspace = h5py.h5s.create_simple((n_rows_to_read,) + fspace.shape[1:])
99
+ mspace = h5py.h5s.create_simple((n_rows_to_read, *fspace.shape[1:]))
100
100
  nda = np.empty(mspace.shape, h5d.dtype)
101
101
  if idx is None or use_h5idx:
102
102
  h5d.read(mspace, fspace, nda)
@@ -40,7 +40,7 @@ def _h5_write_array(
40
40
  # this is needed in order to have a resizable (in the first
41
41
  # axis) data set, i.e. rows can be appended later
42
42
  # NOTE: this automatically turns chunking on!
43
- maxshape = (None,) + nda.shape[1:]
43
+ maxshape = (None, *nda.shape[1:])
44
44
  h5py_kwargs.setdefault("maxshape", maxshape)
45
45
 
46
46
  if wo_mode == "o" and name in group:
@@ -52,140 +52,166 @@ def _h5_write_lgdo(
52
52
  # In hdf5, 'a' is really "modify" -- in addition to appending, you can
53
53
  # change any object in the file. So we use file:append for
54
54
  # write_object:overwrite.
55
+ opened_here = False
55
56
  if not isinstance(lh5_file, h5py.File):
56
57
  mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
57
- lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
58
58
 
59
- log.debug(
60
- f"writing {obj!r}[{start_row}:{n_rows}] as "
61
- f"{lh5_file.filename}:{group}/{name}[{write_start}:], "
62
- f"mode = {wo_mode}, h5py_kwargs = {h5py_kwargs}"
63
- )
64
-
65
- group = utils.get_h5_group(group, lh5_file)
59
+ try:
60
+ fh = h5py.File(lh5_file, mode=mode, **file_kwargs)
61
+ except OSError as oe:
62
+ raise LH5EncodeError(str(oe), lh5_file, None) from oe
66
63
 
67
- # name already in file
68
- if name in group or (
69
- ("datatype" in group.attrs or group == "/")
70
- and (len(name) <= 2 or "/" not in name[1:-1])
71
- ):
72
- pass
73
- # group is in file but not struct or need to create nesting
64
+ opened_here = True
74
65
  else:
75
- # check if name is nested
76
- # if name is nested, iterate up from parent
77
- # otherwise we just need to iterate the group
78
- if len(name) > 2 and "/" in name[1:-1]:
79
- group = utils.get_h5_group(
80
- name[:-1].rsplit("/", 1)[0],
81
- group,
82
- )
83
- curr_name = (
84
- name.rsplit("/", 1)[1]
85
- if name[-1] != "/"
86
- else name[:-1].rsplit("/", 1)[1]
87
- )
88
- else:
89
- curr_name = name
90
- # initialize the object to be written
91
- obj = types.Struct({curr_name.replace("/", ""): obj})
66
+ fh = lh5_file
92
67
 
93
- # if base group already has a child we just append
94
- if len(group) >= 1:
95
- wo_mode = "ac"
96
- else:
97
- # iterate up the group hierarchy until we reach the root or a group with more than one child
98
- while group.name != "/":
99
- if len(group) > 1:
100
- break
101
- curr_name = group.name
102
- group = group.parent
103
- if group.name != "/":
104
- obj = types.Struct({curr_name[len(group.name) + 1 :]: obj})
105
- else:
106
- obj = types.Struct({curr_name[1:]: obj})
107
- # if the group has more than one child, we need to append else we can overwrite
108
- wo_mode = "ac" if len(group) > 1 else "o"
109
-
110
- # set the new name
111
- if group.name == "/":
112
- name = "/"
113
- elif group.parent.name == "/":
114
- name = group.name[1:]
115
- else:
116
- name = group.name[len(group.parent.name) + 1 :]
117
- # get the new group
118
- group = utils.get_h5_group(group.parent if group.name != "/" else "/", lh5_file)
68
+ try:
69
+ log.debug(
70
+ f"writing {obj!r}[{start_row}:{n_rows}] as "
71
+ f"{fh.filename}:{group}/{name}[{write_start}:], "
72
+ f"mode = {wo_mode}, h5py_kwargs = {h5py_kwargs}"
73
+ )
119
74
 
120
- if wo_mode == "w" and name in group:
121
- msg = f"can't overwrite '{name}' in wo_mode 'write_safe'"
122
- raise LH5EncodeError(msg, lh5_file, group, name)
75
+ group = utils.get_h5_group(group, fh)
123
76
 
124
- # struct, table, waveform table or histogram.
125
- if isinstance(obj, types.Struct):
126
- if (
127
- isinstance(obj, types.Histogram)
128
- and wo_mode not in ["w", "o", "of"]
129
- and name in group
77
+ # name already in file
78
+ if name in group or (
79
+ ("datatype" in group.attrs or group == "/")
80
+ and (len(name) <= 2 or "/" not in name[1:-1])
130
81
  ):
131
- msg = f"can't append-write to histogram in wo_mode '{wo_mode}'"
132
- raise LH5EncodeError(msg, lh5_file, group, name)
133
- if isinstance(obj, types.Histogram) and write_start != 0:
134
- msg = f"can't write histogram in wo_mode '{wo_mode}' with write_start != 0"
135
- raise LH5EncodeError(msg, lh5_file, group, name)
136
-
137
- return _h5_write_struct(
138
- obj,
139
- name,
140
- lh5_file,
141
- group=group,
142
- start_row=start_row,
143
- n_rows=n_rows, # if isinstance(obj, types.Table | types.Histogram) else None,
144
- wo_mode=wo_mode,
145
- write_start=write_start,
146
- **h5py_kwargs,
147
- )
148
-
149
- # scalars
150
- if isinstance(obj, types.Scalar):
151
- return _h5_write_scalar(obj, name, lh5_file, group, wo_mode)
82
+ pass
83
+ # group is in file but not struct or need to create nesting
84
+ else:
85
+ # check if name is nested
86
+ # if name is nested, iterate up from parent
87
+ # otherwise we just need to iterate the group
88
+ if len(name) > 2 and "/" in name[1:-1]:
89
+ group = utils.get_h5_group(
90
+ name[:-1].rsplit("/", 1)[0],
91
+ group,
92
+ )
93
+ curr_name = (
94
+ name.rsplit("/", 1)[1]
95
+ if name[-1] != "/"
96
+ else name[:-1].rsplit("/", 1)[1]
97
+ )
98
+ else:
99
+ curr_name = name
100
+ # initialize the object to be written
101
+ obj = types.Struct({curr_name.replace("/", ""): obj})
102
+
103
+ # if base group already has a child we just append
104
+ if len(group) >= 1:
105
+ wo_mode = "ac"
106
+ else:
107
+ # iterate up the group hierarchy until we reach the root or a group with more than one child
108
+ while group.name != "/":
109
+ if len(group) > 1:
110
+ break
111
+ curr_name = group.name
112
+ group = group.parent
113
+ if group.name != "/":
114
+ obj = types.Struct({curr_name[len(group.name) + 1 :]: obj})
115
+ else:
116
+ obj = types.Struct({curr_name[1:]: obj})
117
+ # if the group has more than one child, we need to append else we can overwrite
118
+ wo_mode = "ac" if len(group) > 1 else "o"
119
+
120
+ # set the new name
121
+ if group.name == "/":
122
+ name = "/"
123
+ elif group.parent.name == "/":
124
+ name = group.name[1:]
125
+ else:
126
+ name = group.name[len(group.parent.name) + 1 :]
127
+ # get the new group
128
+ group = utils.get_h5_group(group.parent if group.name != "/" else "/", fh)
129
+
130
+ if wo_mode == "w" and name in group:
131
+ msg = f"can't overwrite '{name}' in wo_mode 'write_safe'"
132
+ raise LH5EncodeError(msg, fh, group, name)
133
+
134
+ # struct, table, waveform table or histogram.
135
+ if isinstance(obj, types.Struct):
136
+ if (
137
+ isinstance(obj, types.Histogram)
138
+ and wo_mode not in ["w", "o", "of"]
139
+ and name in group
140
+ ):
141
+ msg = f"can't append-write to histogram in wo_mode '{wo_mode}'"
142
+ raise LH5EncodeError(msg, fh, group, name)
143
+ if isinstance(obj, types.Histogram) and write_start != 0:
144
+ msg = f"can't write histogram in wo_mode '{wo_mode}' with write_start != 0"
145
+ raise LH5EncodeError(msg, fh, group, name)
146
+
147
+ return _h5_write_struct(
148
+ obj,
149
+ name,
150
+ fh,
151
+ group=group,
152
+ start_row=start_row,
153
+ n_rows=n_rows, # if isinstance(obj, types.Table | types.Histogram) else None,
154
+ wo_mode=wo_mode,
155
+ write_start=write_start,
156
+ **h5py_kwargs,
157
+ )
152
158
 
153
- # vector of encoded vectors
154
- if isinstance(
155
- obj, (types.VectorOfEncodedVectors, types.ArrayOfEncodedEqualSizedArrays)
156
- ):
157
- group = utils.get_h5_group(
158
- name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
159
- )
159
+ # scalars
160
+ if isinstance(obj, types.Scalar):
161
+ return _h5_write_scalar(obj, name, fh, group, wo_mode)
160
162
 
161
- # ask not to further compress flattened_data, it is already compressed!
162
- obj.encoded_data.flattened_data.attrs["compression"] = None
163
+ # vector of encoded vectors
164
+ if isinstance(
165
+ obj, (types.VectorOfEncodedVectors, types.ArrayOfEncodedEqualSizedArrays)
166
+ ):
167
+ group = utils.get_h5_group(
168
+ name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
169
+ )
163
170
 
164
- _h5_write_vector_of_vectors(
165
- obj.encoded_data,
166
- "encoded_data",
167
- lh5_file,
168
- group=group,
169
- start_row=start_row,
170
- n_rows=n_rows,
171
- wo_mode=wo_mode,
172
- write_start=write_start,
173
- **h5py_kwargs,
174
- )
171
+ # ask not to further compress flattened_data, it is already compressed!
172
+ obj.encoded_data.flattened_data.attrs["compression"] = None
175
173
 
176
- if isinstance(obj.decoded_size, types.Scalar):
177
- _h5_write_scalar(
178
- obj.decoded_size,
179
- "decoded_size",
180
- lh5_file,
174
+ _h5_write_vector_of_vectors(
175
+ obj.encoded_data,
176
+ "encoded_data",
177
+ fh,
181
178
  group=group,
179
+ start_row=start_row,
180
+ n_rows=n_rows,
182
181
  wo_mode=wo_mode,
182
+ write_start=write_start,
183
+ **h5py_kwargs,
183
184
  )
184
- else:
185
- _h5_write_array(
186
- obj.decoded_size,
187
- "decoded_size",
188
- lh5_file,
185
+
186
+ if isinstance(obj.decoded_size, types.Scalar):
187
+ _h5_write_scalar(
188
+ obj.decoded_size,
189
+ "decoded_size",
190
+ fh,
191
+ group=group,
192
+ wo_mode=wo_mode,
193
+ )
194
+ else:
195
+ _h5_write_array(
196
+ obj.decoded_size,
197
+ "decoded_size",
198
+ fh,
199
+ group=group,
200
+ start_row=start_row,
201
+ n_rows=n_rows,
202
+ wo_mode=wo_mode,
203
+ write_start=write_start,
204
+ **h5py_kwargs,
205
+ )
206
+
207
+ return None
208
+
209
+ # vector of vectors
210
+ if isinstance(obj, types.VectorOfVectors):
211
+ return _h5_write_vector_of_vectors(
212
+ obj,
213
+ name,
214
+ fh,
189
215
  group=group,
190
216
  start_row=start_row,
191
217
  n_rows=n_rows,
@@ -194,38 +220,25 @@ def _h5_write_lgdo(
194
220
  **h5py_kwargs,
195
221
  )
196
222
 
197
- return None
198
-
199
- # vector of vectors
200
- if isinstance(obj, types.VectorOfVectors):
201
- return _h5_write_vector_of_vectors(
202
- obj,
203
- name,
204
- lh5_file,
205
- group=group,
206
- start_row=start_row,
207
- n_rows=n_rows,
208
- wo_mode=wo_mode,
209
- write_start=write_start,
210
- **h5py_kwargs,
211
- )
212
-
213
- # if we get this far, must be one of the Array types
214
- if isinstance(obj, types.Array):
215
- return _h5_write_array(
216
- obj,
217
- name,
218
- lh5_file,
219
- group=group,
220
- start_row=start_row,
221
- n_rows=n_rows,
222
- wo_mode=wo_mode,
223
- write_start=write_start,
224
- **h5py_kwargs,
225
- )
223
+ # if we get this far, must be one of the Array types
224
+ if isinstance(obj, types.Array):
225
+ return _h5_write_array(
226
+ obj,
227
+ name,
228
+ fh,
229
+ group=group,
230
+ start_row=start_row,
231
+ n_rows=n_rows,
232
+ wo_mode=wo_mode,
233
+ write_start=write_start,
234
+ **h5py_kwargs,
235
+ )
226
236
 
227
- msg = f"do not know how to write '{name}' of type '{type(obj).__name__}'"
228
- raise LH5EncodeError(msg, lh5_file, group, name)
237
+ msg = f"do not know how to write '{name}' of type '{type(obj).__name__}'"
238
+ raise LH5EncodeError(msg, fh, group, name)
239
+ finally:
240
+ if opened_here:
241
+ fh.close()
229
242
 
230
243
 
231
244
  def _h5_write_struct(
lgdo/lh5/core.py CHANGED
@@ -14,12 +14,13 @@ from numpy.typing import ArrayLike
14
14
 
15
15
  from .. import types
16
16
  from . import _serializers
17
+ from .exceptions import LH5DecodeError
17
18
  from .utils import read_n_rows
18
19
 
19
20
 
20
21
  def read(
21
22
  name: str,
22
- lh5_file: str | h5py.File | Sequence[str | h5py.File],
23
+ lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
23
24
  start_row: int = 0,
24
25
  n_rows: int = sys.maxsize,
25
26
  idx: ArrayLike = None,
@@ -110,15 +111,20 @@ def read(
110
111
  object
111
112
  the read-out object
112
113
  """
114
+ close_after = False
113
115
  if isinstance(lh5_file, h5py.File):
114
116
  lh5_obj = lh5_file[name]
115
- elif isinstance(lh5_file, str):
116
- lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
117
+ elif isinstance(lh5_file, (str, Path)):
118
+ try:
119
+ lh5_file = h5py.File(str(Path(lh5_file)), mode="r", locking=locking)
120
+ except (OSError, FileExistsError) as oe:
121
+ raise LH5DecodeError(str(oe), lh5_file, None) from oe
122
+
123
+ close_after = True
117
124
  try:
118
125
  lh5_obj = lh5_file[name]
119
126
  except KeyError as ke:
120
- err = f"Object {name} not found in file {lh5_file.filename}"
121
- raise KeyError(err) from ke
127
+ raise LH5DecodeError(str(ke), lh5_file, name) from ke
122
128
  else:
123
129
  if obj_buf is not None:
124
130
  obj_buf.resize(obj_buf_start)
@@ -173,29 +179,32 @@ def read(
173
179
  if isinstance(idx, np.ndarray) and idx.dtype == np.dtype("?"):
174
180
  idx = np.where(idx)[0]
175
181
 
176
- obj, n_rows_read = _serializers._h5_read_lgdo(
177
- lh5_obj.id,
178
- lh5_obj.file.filename,
179
- lh5_obj.name,
180
- start_row=start_row,
181
- n_rows=n_rows,
182
- idx=idx,
183
- use_h5idx=use_h5idx,
184
- field_mask=field_mask,
185
- obj_buf=obj_buf,
186
- obj_buf_start=obj_buf_start,
187
- decompress=decompress,
188
- )
189
- with suppress(AttributeError):
190
- obj.resize(obj_buf_start + n_rows_read)
191
-
192
- return obj
182
+ try:
183
+ obj, n_rows_read = _serializers._h5_read_lgdo(
184
+ lh5_obj.id,
185
+ lh5_obj.file.filename,
186
+ lh5_obj.name,
187
+ start_row=start_row,
188
+ n_rows=n_rows,
189
+ idx=idx,
190
+ use_h5idx=use_h5idx,
191
+ field_mask=field_mask,
192
+ obj_buf=obj_buf,
193
+ obj_buf_start=obj_buf_start,
194
+ decompress=decompress,
195
+ )
196
+ with suppress(AttributeError):
197
+ obj.resize(obj_buf_start + n_rows_read)
198
+ return obj
199
+ finally:
200
+ if close_after:
201
+ lh5_file.close()
193
202
 
194
203
 
195
204
  def write(
196
205
  obj: types.LGDO,
197
206
  name: str,
198
- lh5_file: str | h5py.File,
207
+ lh5_file: str | Path | h5py.File,
199
208
  group: str | h5py.Group = "/",
200
209
  start_row: int = 0,
201
210
  n_rows: int | None = None,
@@ -318,7 +327,7 @@ def write(
318
327
 
319
328
  def read_as(
320
329
  name: str,
321
- lh5_file: str | h5py.File | Sequence[str | h5py.File],
330
+ lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
322
331
  library: str,
323
332
  **kwargs,
324
333
  ) -> Any:
lgdo/lh5/exceptions.py CHANGED
@@ -4,17 +4,21 @@ import h5py
4
4
 
5
5
 
6
6
  class LH5DecodeError(Exception):
7
- def __init__(self, message: str, fname: str, oname: str) -> None:
7
+ def __init__(
8
+ self, message: str, file: str | h5py.File, oname: str | None = None
9
+ ) -> None:
8
10
  super().__init__(message)
9
11
 
10
- self.file = fname
12
+ self.file = file.filename if isinstance(file, h5py.File) else file
11
13
  self.obj = oname
12
14
 
13
15
  def __str__(self) -> str:
14
- return (
15
- f"while reading object '{self.obj}' in file {self.file}: "
16
- + super().__str__()
17
- )
16
+ if self.obj is None:
17
+ msg = f"while opening file {self.file} for decoding: "
18
+ else:
19
+ msg = f"while decoding object '{self.obj}' in file {self.file}: "
20
+
21
+ return msg + super().__str__()
18
22
 
19
23
  def __reduce__(self) -> tuple: # for pickling.
20
24
  return self.__class__, (*self.args, self.file, self.obj)
@@ -22,19 +26,30 @@ class LH5DecodeError(Exception):
22
26
 
23
27
  class LH5EncodeError(Exception):
24
28
  def __init__(
25
- self, message: str, file: str | h5py.File, group: str | h5py.Group, name: str
29
+ self,
30
+ message: str,
31
+ file: str | h5py.File,
32
+ group: str | h5py.Group | None = None,
33
+ name: str | None = None,
26
34
  ) -> None:
27
35
  super().__init__(message)
28
36
 
29
37
  self.file = file.filename if isinstance(file, h5py.File) else file
30
- self.group = (group.name if isinstance(file, h5py.File) else group).rstrip("/")
31
- self.name = name.lstrip("/")
38
+ self.group = (
39
+ (group.name if isinstance(file, h5py.File) else group).rstrip("/")
40
+ if group is not None
41
+ else None
42
+ )
43
+ self.name = name.lstrip("/") if name is not None else None
32
44
 
33
45
  def __str__(self) -> str:
34
- return (
35
- f"while writing object {self.group}/{self.name} to file {self.file}: "
36
- + super().__str__()
37
- )
46
+ if self.name is None:
47
+ msg = f"while opening file {self.file} for encoding: "
48
+ else:
49
+ msg = (
50
+ f"while encoding object {self.group}/{self.name} to file {self.file}: "
51
+ )
52
+ return msg + super().__str__()
38
53
 
39
54
  def __reduce__(self) -> tuple: # for pickling.
40
55
  return self.__class__, (*self.args, self.file, self.group, self.name)
lgdo/lh5/iterator.py CHANGED
@@ -60,6 +60,7 @@ class LH5Iterator(typing.Iterator):
60
60
  self,
61
61
  lh5_files: str | list[str],
62
62
  groups: str | list[str] | list[list[str]],
63
+ *,
63
64
  base_path: str = "",
64
65
  entry_list: list[int] | list[list[int]] | None = None,
65
66
  entry_mask: list[bool] | list[list[bool]] | None = None,
@@ -70,6 +71,7 @@ class LH5Iterator(typing.Iterator):
70
71
  file_cache: int = 10,
71
72
  file_map: NDArray[int] = None,
72
73
  friend: typing.Iterator | None = None,
74
+ h5py_open_mode: str = "r",
73
75
  ) -> None:
74
76
  """
75
77
  Parameters
@@ -110,9 +112,21 @@ class LH5Iterator(typing.Iterator):
110
112
  The friend should have the same length and entry list. A single
111
113
  LH5 table containing columns from both iterators will be returned.
112
114
  Note that buffer_len will be set to the minimum of the two.
115
+ h5py_open_mode
116
+ file open mode used when acquiring file handles. ``r`` (default)
117
+ opens files read-only while ``a`` allow opening files for
118
+ write-appending as well.
113
119
  """
114
120
  self.lh5_st = LH5Store(base_path=base_path, keep_open=file_cache)
115
121
 
122
+ if h5py_open_mode == "read":
123
+ h5py_open_mode = "r"
124
+ if h5py_open_mode == "append":
125
+ h5py_open_mode = "a"
126
+ if h5py_open_mode not in ["r", "a"]:
127
+ msg = f"unknown h5py_open_mode '{h5py_open_mode}'"
128
+ raise ValueError(msg)
129
+
116
130
  # List of files, with wildcards and env vars expanded
117
131
  if isinstance(lh5_files, str):
118
132
  lh5_files = [lh5_files]
@@ -147,6 +161,10 @@ class LH5Iterator(typing.Iterator):
147
161
  self.lh5_files += [f_exp] * len(g)
148
162
  self.groups += list(g)
149
163
 
164
+ # open files in the requested mode so they are writable if needed
165
+ for f in set(self.lh5_files):
166
+ self.lh5_st.gimme_file(f, mode=h5py_open_mode)
167
+
150
168
  if entry_list is not None and entry_mask is not None:
151
169
  msg = "entry_list and entry_mask arguments are mutually exclusive"
152
170
  raise ValueError(msg)
lgdo/lh5/store.py CHANGED
@@ -38,7 +38,10 @@ class LH5Store:
38
38
  """
39
39
 
40
40
  def __init__(
41
- self, base_path: str = "", keep_open: bool = False, locking: bool = False
41
+ self,
42
+ base_path: str | Path = "",
43
+ keep_open: bool = False,
44
+ locking: bool = False,
42
45
  ) -> None:
43
46
  """
44
47
  Parameters
@@ -52,6 +55,7 @@ class LH5Store:
52
55
  locking
53
56
  whether to lock files when reading
54
57
  """
58
+ base_path = str(Path(base_path)) if base_path != "" else ""
55
59
  self.base_path = "" if base_path == "" else utils.expand_path(base_path)
56
60
  self.keep_open = keep_open
57
61
  self.locking = locking
@@ -59,7 +63,7 @@ class LH5Store:
59
63
 
60
64
  def gimme_file(
61
65
  self,
62
- lh5_file: str | h5py.File,
66
+ lh5_file: str | Path | h5py.File,
63
67
  mode: str = "r",
64
68
  page_buffer: int = 0,
65
69
  **file_kwargs,
@@ -83,6 +87,8 @@ class LH5Store:
83
87
  if isinstance(lh5_file, h5py.File):
84
88
  return lh5_file
85
89
 
90
+ lh5_file = str(Path(lh5_file))
91
+
86
92
  if mode == "r":
87
93
  lh5_file = utils.expand_path(lh5_file, base_path=self.base_path)
88
94
  file_kwargs["locking"] = self.locking
@@ -147,7 +153,7 @@ class LH5Store:
147
153
  def get_buffer(
148
154
  self,
149
155
  name: str,
150
- lh5_file: str | h5py.File | Sequence[str | h5py.File],
156
+ lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
151
157
  size: int | None = None,
152
158
  field_mask: Mapping[str, bool] | Sequence[str] | None = None,
153
159
  ) -> types.LGDO:
@@ -162,7 +168,7 @@ class LH5Store:
162
168
  def read(
163
169
  self,
164
170
  name: str,
165
- lh5_file: str | h5py.File | Sequence[str | h5py.File],
171
+ lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
166
172
  start_row: int = 0,
167
173
  n_rows: int = sys.maxsize,
168
174
  idx: ArrayLike = None,
@@ -180,7 +186,7 @@ class LH5Store:
180
186
  .lh5.core.read
181
187
  """
182
188
  # grab files from store
183
- if isinstance(lh5_file, (str, h5py.File)):
189
+ if isinstance(lh5_file, (str, Path, h5py.File)):
184
190
  h5f = self.gimme_file(lh5_file, "r", **file_kwargs)
185
191
  else:
186
192
  h5f = [self.gimme_file(f, "r", **file_kwargs) for f in lh5_file]
@@ -201,7 +207,7 @@ class LH5Store:
201
207
  self,
202
208
  obj: types.LGDO,
203
209
  name: str,
204
- lh5_file: str | h5py.File,
210
+ lh5_file: str | Path | h5py.File,
205
211
  group: str | h5py.Group = "/",
206
212
  start_row: int = 0,
207
213
  n_rows: int | None = None,
@@ -256,14 +262,14 @@ class LH5Store:
256
262
  **h5py_kwargs,
257
263
  )
258
264
 
259
- def read_n_rows(self, name: str, lh5_file: str | h5py.File) -> int | None:
265
+ def read_n_rows(self, name: str, lh5_file: str | Path | h5py.File) -> int | None:
260
266
  """Look up the number of rows in an Array-like object called `name` in `lh5_file`.
261
267
 
262
268
  Return ``None`` if it is a :class:`.Scalar` or a :class:`.Struct`.
263
269
  """
264
270
  return utils.read_n_rows(name, self.gimme_file(lh5_file, "r"))
265
271
 
266
- def read_size_in_bytes(self, name: str, lh5_file: str | h5py.File) -> int:
272
+ def read_size_in_bytes(self, name: str, lh5_file: str | Path | h5py.File) -> int:
267
273
  """Look up the size (in B) of the object in memory. Will recursively
268
274
  crawl through all objects in a Struct or Table
269
275
  """
lgdo/lh5/tools.py CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import fnmatch
4
4
  import logging
5
5
  from copy import copy
6
+ from pathlib import Path
6
7
 
7
8
  import h5py
8
9
 
@@ -13,7 +14,7 @@ log = logging.getLogger(__name__)
13
14
 
14
15
 
15
16
  def ls(
16
- lh5_file: str | h5py.Group,
17
+ lh5_file: str | Path | h5py.Group,
17
18
  lh5_group: str = "",
18
19
  recursive: bool = False,
19
20
  ) -> list[str]:
@@ -39,8 +40,8 @@ def ls(
39
40
 
40
41
  lh5_st = LH5Store()
41
42
  # To use recursively, make lh5_file a h5group instead of a string
42
- if isinstance(lh5_file, str):
43
- lh5_file = lh5_st.gimme_file(lh5_file, "r")
43
+ if isinstance(lh5_file, (str, Path)):
44
+ lh5_file = lh5_st.gimme_file(str(Path(lh5_file)), "r")
44
45
  if lh5_group.startswith("/"):
45
46
  lh5_group = lh5_group[1:]
46
47
 
@@ -75,7 +76,7 @@ def ls(
75
76
 
76
77
 
77
78
  def show(
78
- lh5_file: str | h5py.Group,
79
+ lh5_file: str | Path | h5py.Group,
79
80
  lh5_group: str = "/",
80
81
  attrs: bool = False,
81
82
  indent: str = "",
@@ -121,8 +122,8 @@ def show(
121
122
  return
122
123
 
123
124
  # open file
124
- if isinstance(lh5_file, str):
125
- lh5_file = h5py.File(utils.expand_path(lh5_file), "r", locking=False)
125
+ if isinstance(lh5_file, (str, Path)):
126
+ lh5_file = h5py.File(utils.expand_path(Path(lh5_file)), "r", locking=False)
126
127
 
127
128
  # go to group
128
129
  if lh5_group != "/":
lgdo/lh5/utils.py CHANGED
@@ -21,7 +21,7 @@ log = logging.getLogger(__name__)
21
21
 
22
22
  def get_buffer(
23
23
  name: str,
24
- lh5_file: str | h5py.File | Sequence[str | h5py.File],
24
+ lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
25
25
  size: int | None = None,
26
26
  field_mask: Mapping[str, bool] | Sequence[str] | None = None,
27
27
  ) -> types.LGDO:
@@ -39,7 +39,7 @@ def get_buffer(
39
39
  return obj
40
40
 
41
41
 
42
- def read_n_rows(name: str, h5f: str | h5py.File) -> int | None:
42
+ def read_n_rows(name: str, h5f: str | Path | h5py.File) -> int | None:
43
43
  """Look up the number of rows in an Array-like LGDO object on disk.
44
44
 
45
45
  Return ``None`` if `name` is a :class:`.Scalar` or a :class:`.Struct`.
@@ -56,7 +56,7 @@ def read_n_rows(name: str, h5f: str | h5py.File) -> int | None:
56
56
  return _serializers.read.utils.read_n_rows(h5o, h5f.name, name)
57
57
 
58
58
 
59
- def read_size_in_bytes(name: str, h5f: str | h5py.File) -> int | None:
59
+ def read_size_in_bytes(name: str, h5f: str | Path | h5py.File) -> int | None:
60
60
  """Look up the size (in B) in an LGDO object in memory. Will crawl
61
61
  recursively through members of a Struct or Table
62
62
  """
@@ -158,10 +158,10 @@ def expand_vars(expr: str, substitute: dict[str, str] | None = None) -> str:
158
158
 
159
159
 
160
160
  def expand_path(
161
- path: str,
161
+ path: str | Path,
162
162
  substitute: dict[str, str] | None = None,
163
163
  list: bool = False,
164
- base_path: str | None = None,
164
+ base_path: str | Path | None = None,
165
165
  ) -> str | list:
166
166
  """Expand (environment) variables and wildcards to return absolute paths.
167
167
 
@@ -184,18 +184,26 @@ def expand_path(
184
184
  Unique absolute path, or list of all absolute paths
185
185
  """
186
186
  if base_path is not None and base_path != "":
187
- base_path = Path(os.path.expandvars(base_path)).expanduser()
188
- path = base_path / path
187
+ base_path = Path(expand_vars(str(base_path))).expanduser()
188
+ if not Path(path).expanduser().is_absolute():
189
+ path = base_path / path
189
190
 
190
191
  # first expand variables
191
- _path = expand_vars(path, substitute)
192
+ _path = expand_vars(str(path), substitute)
192
193
 
193
194
  # then expand wildcards
194
195
  # pathlib glob works differently so use glob for now
195
196
  paths = sorted(glob.glob(str(Path(_path).expanduser()))) # noqa: PTH207
196
197
 
197
198
  if base_path is not None and base_path != "":
198
- paths = [os.path.relpath(p, base_path) for p in paths]
199
+ rel_paths = []
200
+ for p in paths:
201
+ p_path = Path(p)
202
+ try:
203
+ rel_paths.append(str(p_path.relative_to(base_path)))
204
+ except ValueError:
205
+ rel_paths.append(str(p_path))
206
+ paths = rel_paths
199
207
 
200
208
  if not list:
201
209
  if len(paths) == 0:
lgdo/types/array.py CHANGED
@@ -109,14 +109,14 @@ class Array(LGDOCollection):
109
109
 
110
110
  @property
111
111
  def shape(self):
112
- return (len(self),) + self._nda.shape[1:]
112
+ return (len(self), *self._nda.shape[1:])
113
113
 
114
114
  def reserve_capacity(self, capacity: int) -> None:
115
115
  "Set size (number of rows) of internal memory buffer"
116
116
  if capacity < len(self):
117
117
  msg = "Cannot reduce capacity below Array length"
118
118
  raise ValueError(msg)
119
- self._nda.resize((capacity,) + self._nda.shape[1:], refcheck=False)
119
+ self._nda.resize((capacity, *self._nda.shape[1:]), refcheck=False)
120
120
 
121
121
  def get_capacity(self) -> int:
122
122
  "Get capacity (i.e. max size before memory must be re-allocated)"
@@ -190,6 +190,9 @@ class Array(LGDOCollection):
190
190
 
191
191
  return False
192
192
 
193
+ def __hash__(self):
194
+ return hash(self.name)
195
+
193
196
  def __iter__(self) -> Iterator:
194
197
  yield from self.nda
195
198
 
lgdo/types/encoded.py CHANGED
@@ -92,6 +92,9 @@ class VectorOfEncodedVectors(LGDOCollection):
92
92
 
93
93
  return False
94
94
 
95
+ def __hash__(self):
96
+ return hash(self.name)
97
+
95
98
  def reserve_capacity(self, *capacity: int) -> None:
96
99
  self.encoded_data.reserve_capacity(*capacity)
97
100
  self.decoded_size.reserve_capacity(capacity[0])
@@ -345,6 +348,9 @@ class ArrayOfEncodedEqualSizedArrays(LGDOCollection):
345
348
 
346
349
  return False
347
350
 
351
+ def __hash__(self):
352
+ return hash(self.name)
353
+
348
354
  def reserve_capacity(self, *capacity: int) -> None:
349
355
  self.encoded_data.reserve_capacity(capacity)
350
356
 
lgdo/types/scalar.py CHANGED
@@ -63,6 +63,9 @@ class Scalar(LGDO):
63
63
 
64
64
  return False
65
65
 
66
+ def __hash__(self):
67
+ return hash(self.name)
68
+
66
69
  def __str__(self) -> str:
67
70
  attrs = self.getattrs()
68
71
  return f"{self.value!s} with attrs={attrs!r}"
lgdo/types/table.py CHANGED
@@ -330,9 +330,10 @@ class Table(Struct, LGDOCollection):
330
330
  :func:`numexpr.evaluate`` as `local_dict` argument or to
331
331
  :func:`eval` as `locals` argument.
332
332
  modules
333
- a dictionary of additional modules used by the expression. If this is not `None`
334
- then :func:`eval`is used and the expression can depend on any modules from this dictionary in
335
- addition to awkward and numpy. These are passed to :func:`eval` as `globals` argument.
333
+ a dictionary of additional modules used by the expression. If this
334
+ is not `None` then :func:`eval`is used and the expression can
335
+ depend on any modules from this dictionary in addition to awkward
336
+ and numpy. These are passed to :func:`eval` as `globals` argument.
336
337
 
337
338
  Examples
338
339
  --------
@@ -403,7 +404,10 @@ class Table(Struct, LGDOCollection):
403
404
  return _make_lgdo(out_data)
404
405
 
405
406
  except Exception:
406
- msg = f"Warning {expr} could not be evaluated with numexpr probably due to some not allowed characters, trying with eval()."
407
+ msg = (
408
+ f"Warning {expr} could not be evaluated with numexpr probably "
409
+ "due to some not allowed characters, trying with eval()."
410
+ )
407
411
  log.debug(msg)
408
412
 
409
413
  # resort to good ol' eval()
@@ -284,6 +284,9 @@ class VectorOfVectors(LGDOCollection):
284
284
 
285
285
  return False
286
286
 
287
+ def __hash__(self):
288
+ return hash(self.name)
289
+
287
290
  def __getitem__(self, i: int) -> NDArray:
288
291
  """Return a view of the vector at index `i` along the first axis."""
289
292
  if self.ndim == 2: