legend-pydataobj 1.9.0__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,17 +4,21 @@ import logging
4
4
  import sys
5
5
  from bisect import bisect_left
6
6
 
7
+ import h5py
7
8
  import numpy as np
8
9
 
9
10
  from ....types import Array
10
11
  from ... import datatype
11
12
  from ...exceptions import LH5DecodeError
13
+ from .utils import read_attrs
12
14
 
13
15
  log = logging.getLogger(__name__)
14
16
 
15
17
 
16
18
  def _h5_read_ndarray(
17
19
  h5d,
20
+ fname,
21
+ oname,
18
22
  start_row=0,
19
23
  n_rows=sys.maxsize,
20
24
  idx=None,
@@ -24,48 +28,48 @@ def _h5_read_ndarray(
24
28
  ):
25
29
  if obj_buf is not None and not isinstance(obj_buf, Array):
26
30
  msg = "object buffer is not an Array"
27
- raise LH5DecodeError(msg, h5d)
31
+ raise LH5DecodeError(msg, fname, oname)
28
32
 
29
33
  # compute the number of rows to read
30
34
  # we culled idx above for start_row and n_rows, now we have to apply
31
35
  # the constraint of the length of the dataset
32
36
  try:
33
- ds_n_rows = h5d.shape[0]
37
+ fspace = h5d.get_space()
38
+ ds_n_rows = fspace.shape[0]
34
39
  except AttributeError as e:
35
40
  msg = "does not seem to be an HDF5 dataset"
36
- raise LH5DecodeError(msg, h5d) from e
41
+ raise LH5DecodeError(msg, fname, oname) from e
37
42
 
38
43
  if idx is not None:
39
- if len(idx[0]) > 0 and idx[0][-1] >= ds_n_rows:
44
+ if len(idx) > 0 and idx[-1] >= ds_n_rows:
40
45
  log.warning("idx indexed past the end of the array in the file. Culling...")
41
- n_rows_to_read = bisect_left(idx[0], ds_n_rows)
42
- idx = (idx[0][:n_rows_to_read],)
43
- if len(idx[0]) == 0:
46
+ n_rows_to_read = bisect_left(idx, ds_n_rows)
47
+ idx = idx[:n_rows_to_read]
48
+ if len(idx) == 0:
44
49
  log.warning("idx empty after culling.")
45
- n_rows_to_read = len(idx[0])
50
+ n_rows_to_read = len(idx)
46
51
  else:
47
52
  n_rows_to_read = ds_n_rows - start_row
48
- if n_rows_to_read > n_rows:
49
- n_rows_to_read = n_rows
50
-
51
- # if idx is passed, check if we can make it a slice instead (faster)
52
- change_idx_to_slice = False
53
-
54
- # prepare the selection for the read. Use idx if available
55
- if idx is not None:
56
- # check if idx is empty and convert to slice instead
57
- if len(idx[0]) == 0:
58
- source_sel = np.s_[0:0]
59
- change_idx_to_slice = True
60
- # check if idx is contiguous and increasing
61
- # if so, convert it to a slice instead (faster)
62
- elif np.all(np.diff(idx[0]) == 1):
63
- source_sel = np.s_[idx[0][0] : idx[0][-1] + 1]
64
- change_idx_to_slice = True
65
- else:
66
- source_sel = idx
67
- else:
68
- source_sel = np.s_[start_row : start_row + n_rows_to_read]
53
+ n_rows_to_read = min(n_rows_to_read, n_rows)
54
+
55
+ if idx is None:
56
+ fspace.select_hyperslab(
57
+ (start_row,) + (0,) * (h5d.rank - 1),
58
+ (1,) * h5d.rank,
59
+ None,
60
+ (n_rows_to_read,) + fspace.shape[1:],
61
+ )
62
+ elif use_h5idx:
63
+ # Note that h5s will automatically merge adjacent elements into a range
64
+ fspace.select_none()
65
+ for i in idx:
66
+ fspace.select_hyperslab(
67
+ (i,) + (0,) * (h5d.rank - 1),
68
+ (1,) * h5d.rank,
69
+ None,
70
+ (1,) + fspace.shape[1:],
71
+ h5py.h5s.SELECT_OR,
72
+ )
69
73
 
70
74
  # Now read the array
71
75
  if obj_buf is not None and n_rows_to_read > 0:
@@ -74,30 +78,39 @@ def _h5_read_ndarray(
74
78
  obj_buf.resize(buf_size)
75
79
  dest_sel = np.s_[obj_buf_start:buf_size]
76
80
 
77
- # this is required to make the read of multiple files faster
78
- # until a better solution found.
79
- if change_idx_to_slice or idx is None or use_h5idx:
80
- h5d.read_direct(obj_buf.nda, source_sel, dest_sel)
81
+ if idx is None or use_h5idx:
82
+ mspace = h5py.h5s.create_simple(obj_buf.nda.shape)
83
+ mspace.select_hyperslab(
84
+ (obj_buf_start,) + (0,) * (h5d.rank - 1),
85
+ (1,) * h5d.rank,
86
+ None,
87
+ (n_rows_to_read,) + fspace.shape[1:],
88
+ )
89
+ h5d.read(mspace, fspace, obj_buf.nda)
81
90
  else:
82
- # it is faster to read the whole object and then do fancy indexing
83
- obj_buf.nda[dest_sel] = h5d[...][source_sel]
84
-
91
+ tmp = np.empty(fspace.shape, h5d.dtype)
92
+ h5d.read(fspace, fspace, tmp)
93
+ obj_buf.nda[dest_sel, ...] = tmp[idx, ...]
85
94
  nda = obj_buf.nda
86
95
  elif n_rows == 0:
87
96
  tmp_shape = (0,) + h5d.shape[1:]
88
97
  nda = np.empty(tmp_shape, h5d.dtype)
89
- elif change_idx_to_slice or idx is None or use_h5idx:
90
- nda = h5d[source_sel]
91
98
  else:
92
- # it is faster to read the whole object and then do fancy indexing
93
- nda = h5d[...][source_sel]
99
+ mspace = h5py.h5s.create_simple((n_rows_to_read,) + fspace.shape[1:])
100
+ nda = np.empty(mspace.shape, h5d.dtype)
101
+ if idx is None or use_h5idx:
102
+ h5d.read(mspace, fspace, nda)
103
+ else:
104
+ tmp = np.empty(fspace.shape, h5d.dtype)
105
+ h5d.read(fspace, fspace, tmp)
106
+ nda[:, ...] = tmp[idx, ...]
94
107
 
95
108
  # Finally, set attributes and return objects
96
- attrs = dict(h5d.attrs)
109
+ attrs = read_attrs(h5d, fname, oname)
97
110
 
98
111
  # special handling for bools
99
112
  # (c and Julia store as uint8 so cast to bool)
100
113
  if datatype.get_nested_datatype_string(attrs["datatype"]) == "bool":
101
- nda = nda.astype(np.bool_)
114
+ nda = nda.astype(np.bool_, copy=False)
102
115
 
103
116
  return (nda, attrs, n_rows_to_read)
@@ -2,20 +2,27 @@ from __future__ import annotations
2
2
 
3
3
  import logging
4
4
 
5
+ import h5py
5
6
  import numpy as np
6
7
 
7
8
  from ....types import Scalar
8
9
  from ...exceptions import LH5DecodeError
10
+ from . import utils
9
11
 
10
12
  log = logging.getLogger(__name__)
11
13
 
12
14
 
13
15
  def _h5_read_scalar(
14
16
  h5d,
17
+ fname,
18
+ oname,
15
19
  obj_buf=None,
16
20
  ):
17
- value = h5d[()]
18
- attrs = dict(h5d.attrs)
21
+ value = np.empty((), h5d.dtype)
22
+ sp = h5py.h5s.create(h5py.h5s.SCALAR)
23
+ h5d.read(sp, sp, value)
24
+ value = value[()]
25
+ attrs = utils.read_attrs(h5d, fname, oname)
19
26
 
20
27
  # special handling for bools
21
28
  # (c and Julia store as uint8 so cast to bool)
@@ -25,7 +32,7 @@ def _h5_read_scalar(
25
32
  if obj_buf is not None:
26
33
  if not isinstance(obj_buf, Scalar):
27
34
  msg = "object buffer a Scalar"
28
- raise LH5DecodeError(msg, h5d)
35
+ raise LH5DecodeError(msg, fname, oname)
29
36
 
30
37
  obj_buf.value = value
31
38
  obj_buf.attrs.update(attrs)
@@ -1,12 +1,174 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import logging
4
+
5
+ import h5py
6
+ import numpy as np
7
+
8
+ from .... import types
9
+ from ... import datatype
3
10
  from ...exceptions import LH5DecodeError
11
+ from . import scalar
4
12
 
13
+ log = logging.getLogger(__name__)
5
14
 
6
- def check_obj_buf_attrs(attrs, new_attrs, obj):
15
+
16
+ def check_obj_buf_attrs(attrs, new_attrs, fname, oname):
7
17
  if set(attrs.keys()) != set(new_attrs.keys()):
8
18
  msg = (
9
19
  f"existing buffer and new data chunk have different attributes: "
10
- f"obj_buf.attrs={attrs} != {obj.file.filename}[{obj.name}].attrs={new_attrs}"
20
+ f"obj_buf.attrs={attrs} != {fname}[{oname}].attrs={new_attrs}"
11
21
  )
12
- raise LH5DecodeError(msg, obj)
22
+ raise LH5DecodeError(msg, fname, oname)
23
+
24
+
25
+ def read_attrs(h5o, fname, oname):
26
+ """Read all attributes for an hdf5 dataset or group using low level API
27
+ and return them as a dict. Assume all are strings or scalar types."""
28
+ attrs = {}
29
+ for i_attr in range(h5py.h5a.get_num_attrs(h5o)):
30
+ h5a = h5py.h5a.open(h5o, index=i_attr)
31
+ name = h5a.get_name().decode()
32
+ if h5a.shape != ():
33
+ msg = f"attribute {oname} is not a string or scalar"
34
+ raise LH5DecodeError(msg, fname, oname)
35
+ val = np.empty((), h5a.dtype)
36
+ h5a.read(val)
37
+ if h5a.get_type().get_class() == h5py.h5t.STRING:
38
+ attrs[name] = val.item().decode()
39
+ else:
40
+ attrs[name] = val.item()
41
+ h5a.close()
42
+ return attrs
43
+
44
+
45
+ def read_n_rows(h5o, fname, oname):
46
+ """Read number of rows in LH5 object"""
47
+ if not h5py.h5a.exists(h5o, b"datatype"):
48
+ msg = "missing 'datatype' attribute"
49
+ raise LH5DecodeError(msg, fname, oname)
50
+
51
+ h5a = h5py.h5a.open(h5o, b"datatype")
52
+ type_attr = np.empty((), h5a.dtype)
53
+ h5a.read(type_attr)
54
+ type_attr = type_attr.item().decode()
55
+ lgdotype = datatype.datatype(type_attr)
56
+
57
+ # scalars are dim-0 datasets
58
+ if lgdotype is types.Scalar:
59
+ return None
60
+
61
+ # structs don't have rows
62
+ if lgdotype is types.Struct:
63
+ return None
64
+
65
+ # tables should have elements with all the same length
66
+ if lgdotype is types.Table:
67
+ # read out each of the fields
68
+ rows_read = None
69
+ for field in datatype.get_struct_fields(type_attr):
70
+ obj = h5py.h5o.open(h5o, field.encode())
71
+ n_rows_read = read_n_rows(obj, fname, field)
72
+ obj.close()
73
+ if not rows_read:
74
+ rows_read = n_rows_read
75
+ elif rows_read != n_rows_read:
76
+ log.warning(
77
+ f"'{field}' field in table '{oname}' has {rows_read} rows, "
78
+ f"{n_rows_read} was expected"
79
+ )
80
+
81
+ return rows_read
82
+
83
+ # length of vector of vectors is the length of its cumulative_length
84
+ if lgdotype is types.VectorOfVectors:
85
+ obj = h5py.h5o.open(h5o, b"cumulative_length")
86
+ n_rows = read_n_rows(obj, fname, "cumulative_length")
87
+ obj.close()
88
+ return n_rows
89
+
90
+ # length of vector of encoded vectors is the length of its decoded_size
91
+ if lgdotype in (types.VectorOfEncodedVectors, types.ArrayOfEncodedEqualSizedArrays):
92
+ obj = h5py.h5o.open(h5o, b"encoded_data")
93
+ n_rows = read_n_rows(obj, fname, "encoded_data")
94
+ obj.close()
95
+ return n_rows
96
+
97
+ # return array length (without reading the array!)
98
+ if issubclass(lgdotype, types.Array):
99
+ # compute the number of rows to read
100
+ return h5o.get_space().shape[0]
101
+
102
+ msg = f"don't know how to read rows of LGDO {lgdotype.__name__}"
103
+ raise LH5DecodeError(msg, fname, oname)
104
+
105
+
106
+ def read_size_in_bytes(h5o, fname, oname, field_mask=None):
107
+ """Read number size in LH5 object in memory (in B)"""
108
+ if not h5py.h5a.exists(h5o, b"datatype"):
109
+ msg = "missing 'datatype' attribute"
110
+ raise LH5DecodeError(msg, fname, oname)
111
+
112
+ h5a = h5py.h5a.open(h5o, b"datatype")
113
+ type_attr = np.empty((), h5a.dtype)
114
+ h5a.read(type_attr)
115
+ type_attr = type_attr.item().decode()
116
+ lgdotype = datatype.datatype(type_attr)
117
+
118
+ # scalars are dim-0 datasets
119
+ if lgdotype in (
120
+ types.Scalar,
121
+ types.Array,
122
+ types.ArrayOfEqualSizedArrays,
123
+ types.FixedSizeArray,
124
+ ):
125
+ return int(np.prod(h5o.shape) * h5o.dtype.itemsize)
126
+
127
+ # structs don't have rows
128
+ if lgdotype in (types.Struct, types.Histogram, types.Histogram.Axis):
129
+ size = 0
130
+ for key in h5o:
131
+ obj = h5py.h5o.open(h5o, key)
132
+ size += read_size_in_bytes(obj, fname, oname, field_mask)
133
+ obj.close()
134
+ return size
135
+
136
+ # tables should have elements with all the same length
137
+ if lgdotype in (types.Table, types.WaveformTable):
138
+ # read out each of the fields
139
+ size = 0
140
+ if not field_mask:
141
+ field_mask = datatype.get_struct_fields(type_attr)
142
+ for field in field_mask:
143
+ obj = h5py.h5o.open(h5o, field.encode())
144
+ size += read_size_in_bytes(obj, fname, field)
145
+ obj.close()
146
+ return size
147
+
148
+ # length of vector of vectors is the length of its cumulative_length
149
+ if lgdotype is types.VectorOfVectors:
150
+ size = 0
151
+ obj = h5py.h5o.open(h5o, b"cumulative_length")
152
+ size += read_size_in_bytes(obj, fname, "cumulative_length")
153
+ obj.close()
154
+ obj = h5py.h5o.open(h5o, b"flattened_data")
155
+ size += read_size_in_bytes(obj, fname, "flattened_data")
156
+ obj.close()
157
+ return size
158
+
159
+ # length of vector of encoded vectors is the length of its decoded_size
160
+ if lgdotype is types.ArrayOfEncodedEqualSizedArrays:
161
+ obj = h5py.h5o.open(h5o, b"decoded_size")
162
+ size = scalar._h5_read_scalar(obj, fname, "decoded_size")[0].value
163
+ obj.close()
164
+
165
+ obj = h5py.h5o.open(h5o, b"encoded_data")
166
+ cl = h5py.h5o.open(obj, b"cumulative_length")
167
+ size *= cl.shape[0]
168
+ size *= 4 # TODO: UPDATE WHEN CODECS SUPPORT MORE DTYPES
169
+ obj.close()
170
+
171
+ return size
172
+
173
+ msg = f"don't know how to read size of LGDO {lgdotype.__name__}"
174
+ raise LH5DecodeError(msg, fname, oname)
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import logging
4
4
  import sys
5
5
 
6
+ import h5py
6
7
  import numba
7
8
  import numpy as np
8
9
 
@@ -15,12 +16,15 @@ from ...exceptions import LH5DecodeError
15
16
  from .array import (
16
17
  _h5_read_array,
17
18
  )
19
+ from .utils import read_attrs
18
20
 
19
21
  log = logging.getLogger(__name__)
20
22
 
21
23
 
22
24
  def _h5_read_vector_of_vectors(
23
25
  h5g,
26
+ fname,
27
+ oname,
24
28
  start_row=0,
25
29
  n_rows=sys.maxsize,
26
30
  idx=None,
@@ -30,12 +34,15 @@ def _h5_read_vector_of_vectors(
30
34
  ):
31
35
  if obj_buf is not None and not isinstance(obj_buf, VectorOfVectors):
32
36
  msg = "object buffer is not a VectorOfVectors"
33
- raise LH5DecodeError(msg, h5g)
37
+ raise LH5DecodeError(msg, fname, oname)
34
38
 
35
39
  # read out cumulative_length
36
40
  cumulen_buf = None if obj_buf is None else obj_buf.cumulative_length
41
+ h5d_cl = h5py.h5d.open(h5g, b"cumulative_length")
37
42
  cumulative_length, n_rows_read = _h5_read_array(
38
- h5g["cumulative_length"],
43
+ h5d_cl,
44
+ fname,
45
+ f"{oname}/cumulative_length",
39
46
  start_row=start_row,
40
47
  n_rows=n_rows,
41
48
  idx=idx,
@@ -51,17 +58,19 @@ def _h5_read_vector_of_vectors(
51
58
  if idx is not None and n_rows_read > 0:
52
59
  # get the starting indices for each array in flattened data:
53
60
  # the starting index for array[i] is cumulative_length[i-1]
54
- idx2 = (np.asarray(idx[0]).copy() - 1,)
61
+ idx2 = np.asarray(idx).copy() - 1
55
62
 
56
63
  # re-read cumulative_length with these indices
57
64
  # note this will allocate memory for fd_starts!
58
65
  fd_start = None
59
- if idx2[0][0] == -1:
60
- idx2 = (idx2[0][1:],)
66
+ if idx2[0] == -1:
67
+ idx2 = idx2[1:]
61
68
  fd_start = 0 # this variable avoids an ndarray append
62
69
 
63
70
  fd_starts, fds_n_rows_read = _h5_read_array(
64
- h5g["cumulative_length"],
71
+ h5d_cl,
72
+ fname,
73
+ f"{oname}/cumulative_length",
65
74
  start_row=start_row,
66
75
  n_rows=n_rows,
67
76
  idx=idx2,
@@ -98,7 +107,11 @@ def _h5_read_vector_of_vectors(
98
107
  # need to read out the cumulen sample -before- the first sample
99
108
  # read above in order to get the starting row of the first
100
109
  # vector to read out in flattened_data
101
- fd_start = h5g["cumulative_length"][start_row - 1]
110
+ fspace = h5d_cl.get_space()
111
+ fspace.select_elements([[start_row - 1]])
112
+ mspace = h5py.h5s.create(h5py.h5s.SCALAR)
113
+ fd_start = np.empty((), h5d_cl.dtype)
114
+ h5d_cl.read(mspace, fspace, fd_start)
102
115
 
103
116
  # check limits for values that will be used subsequently
104
117
  if this_cumulen_nda[-1] < fd_start:
@@ -112,7 +125,7 @@ def _h5_read_vector_of_vectors(
112
125
  f"cumulative_length non-increasing between entries "
113
126
  f"{start_row} and {start_row+n_rows_read}"
114
127
  )
115
- raise LH5DecodeError(msg, h5g)
128
+ raise LH5DecodeError(msg, fname, oname)
116
129
 
117
130
  # determine the number of rows for the flattened_data readout
118
131
  fd_n_rows = this_cumulen_nda[-1] if n_rows_read > 0 else 0
@@ -126,6 +139,8 @@ def _h5_read_vector_of_vectors(
126
139
  # read for flattened_data
127
140
  this_cumulen_nda -= fd_start
128
141
 
142
+ h5d_cl.close()
143
+
129
144
  # If we started with a partially-filled buffer, add the
130
145
  # appropriate offset for the start of the in-memory flattened
131
146
  # data for this read.
@@ -141,20 +156,26 @@ def _h5_read_vector_of_vectors(
141
156
  # grow fd_buf if necessary to hold the data
142
157
  fdb_size = fd_buf_start + fd_n_rows
143
158
  if len(fd_buf) < fdb_size:
144
- fd_buf.resize(fdb_size)
159
+ fd_buf.nda.resize(fdb_size, refcheck=False)
145
160
 
146
161
  # now read
147
- lgdotype = dtypeutils.datatype(h5g["flattened_data"].attrs["datatype"])
162
+ h5o = h5py.h5o.open(h5g, b"flattened_data")
163
+ h5a_dtype = h5py.h5a.open(h5o, b"datatype")
164
+ val = np.empty((), "O")
165
+ h5a_dtype.read(val)
166
+ lgdotype = dtypeutils.datatype(val.item().decode())
148
167
  if lgdotype is Array:
149
168
  _func = _h5_read_array
150
169
  elif lgdotype is VectorOfVectors:
151
170
  _func = _h5_read_vector_of_vectors
152
171
  else:
153
172
  msg = "type {lgdotype.__name__} is not supported"
154
- raise LH5DecodeError(msg, h5g, "flattened_data")
173
+ raise LH5DecodeError(msg, fname, f"{oname}/flattened_data")
155
174
 
156
175
  flattened_data, _ = _func(
157
- h5g["flattened_data"],
176
+ h5o,
177
+ fname,
178
+ f"{oname}/flattened_data",
158
179
  start_row=fd_start,
159
180
  n_rows=fd_n_rows,
160
181
  idx=fd_idx,
@@ -162,6 +183,7 @@ def _h5_read_vector_of_vectors(
162
183
  obj_buf=fd_buf,
163
184
  obj_buf_start=fd_buf_start,
164
185
  )
186
+ h5o.close()
165
187
 
166
188
  if obj_buf is not None:
167
189
  # if the buffer is partially filled, cumulative_length will be invalid
@@ -176,7 +198,7 @@ def _h5_read_vector_of_vectors(
176
198
  VectorOfVectors(
177
199
  flattened_data=flattened_data,
178
200
  cumulative_length=cumulative_length,
179
- attrs=dict(h5g.attrs),
201
+ attrs=read_attrs(h5g, fname, oname),
180
202
  ),
181
203
  n_rows_read,
182
204
  )
@@ -194,4 +216,4 @@ def _make_fd_idx(starts, stops, idx):
194
216
  for i in range(starts[j], stops[j]):
195
217
  idx[k] = i
196
218
  k += 1
197
- return (idx,)
219
+ return idx
@@ -71,7 +71,12 @@ def _h5_write_array(
71
71
  _attrs = obj.getattrs(datatype=True)
72
72
  _attrs.pop("compression", None)
73
73
  _attrs.pop("hdf5_settings", None)
74
- ds.attrs.update(_attrs)
74
+ ds.attrs.update(
75
+ {
76
+ k: v.encode("utf-8") if isinstance(v, str) else v
77
+ for k, v in _attrs.items()
78
+ }
79
+ )
75
80
 
76
81
  return
77
82
 
@@ -1,6 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ import os
5
+ from inspect import signature
4
6
 
5
7
  import h5py
6
8
 
@@ -27,6 +29,10 @@ def _h5_write_lgdo(
27
29
  ):
28
30
  assert isinstance(obj, types.LGDO)
29
31
 
32
+ file_kwargs = {
33
+ k: h5py_kwargs[k] for k in h5py_kwargs & signature(h5py.File).parameters.keys()
34
+ }
35
+ h5py_kwargs = {k: h5py_kwargs[k] for k in h5py_kwargs - file_kwargs.keys()}
30
36
  if wo_mode == "write_safe":
31
37
  wo_mode = "w"
32
38
  if wo_mode == "append":
@@ -46,10 +52,9 @@ def _h5_write_lgdo(
46
52
  # In hdf5, 'a' is really "modify" -- in addition to appending, you can
47
53
  # change any object in the file. So we use file:append for
48
54
  # write_object:overwrite.
49
- mode = "w" if wo_mode == "of" else "a"
50
-
51
55
  if not isinstance(lh5_file, h5py.File):
52
- lh5_file = h5py.File(lh5_file, mode=mode)
56
+ mode = "w" if wo_mode == "of" or not os.path.exists(lh5_file) else "a"
57
+ lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
53
58
 
54
59
  log.debug(
55
60
  f"writing {obj!r}[{start_row}:{n_rows}] as "
@@ -65,8 +70,12 @@ def _h5_write_lgdo(
65
70
 
66
71
  # struct, table, waveform table or histogram.
67
72
  if isinstance(obj, types.Struct):
68
- if isinstance(obj, types.Histogram) and wo_mode not in ["w", "o", "of"]:
69
- msg = f"can't append-write histogram in wo_mode '{wo_mode}'"
73
+ if (
74
+ isinstance(obj, types.Histogram)
75
+ and wo_mode not in ["w", "o", "of"]
76
+ and name in group
77
+ ):
78
+ msg = f"can't append-write to histogram in wo_mode '{wo_mode}'"
70
79
  raise LH5EncodeError(msg, lh5_file, group, name)
71
80
  if isinstance(obj, types.Histogram) and write_start != 0:
72
81
  msg = f"can't write histogram in wo_mode '{wo_mode}' with write_start != 0"
@@ -20,4 +20,9 @@ def _h5_write_scalar(obj, name, lh5_file, group="/", wo_mode="append"):
20
20
  raise LH5EncodeError(msg, lh5_file, group, name)
21
21
 
22
22
  ds = group.create_dataset(name, shape=(), data=obj.value)
23
- ds.attrs.update(obj.attrs)
23
+ ds.attrs.update(
24
+ {
25
+ k: v.encode("utf-8") if isinstance(v, str) else v
26
+ for k, v in obj.attrs.items()
27
+ }
28
+ )