legend-pydataobj 1.5.1__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/METADATA +1 -1
  2. legend_pydataobj-1.6.0.dist-info/RECORD +54 -0
  3. {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/WHEEL +1 -1
  4. {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/entry_points.txt +1 -0
  5. lgdo/__init__.py +7 -4
  6. lgdo/_version.py +2 -2
  7. lgdo/cli.py +237 -12
  8. lgdo/compression/__init__.py +1 -0
  9. lgdo/lh5/__init__.py +9 -1
  10. lgdo/lh5/_serializers/__init__.py +43 -0
  11. lgdo/lh5/_serializers/read/__init__.py +0 -0
  12. lgdo/lh5/_serializers/read/array.py +34 -0
  13. lgdo/lh5/_serializers/read/composite.py +405 -0
  14. lgdo/lh5/_serializers/read/encoded.py +129 -0
  15. lgdo/lh5/_serializers/read/ndarray.py +104 -0
  16. lgdo/lh5/_serializers/read/scalar.py +34 -0
  17. lgdo/lh5/_serializers/read/utils.py +12 -0
  18. lgdo/lh5/_serializers/read/vector_of_vectors.py +195 -0
  19. lgdo/lh5/_serializers/write/__init__.py +0 -0
  20. lgdo/lh5/_serializers/write/array.py +92 -0
  21. lgdo/lh5/_serializers/write/composite.py +259 -0
  22. lgdo/lh5/_serializers/write/scalar.py +23 -0
  23. lgdo/lh5/_serializers/write/vector_of_vectors.py +95 -0
  24. lgdo/lh5/core.py +272 -0
  25. lgdo/lh5/datatype.py +46 -0
  26. lgdo/lh5/exceptions.py +34 -0
  27. lgdo/lh5/iterator.py +1 -1
  28. lgdo/lh5/store.py +69 -1160
  29. lgdo/lh5/tools.py +27 -53
  30. lgdo/lh5/utils.py +130 -27
  31. lgdo/lh5_store.py +11 -2
  32. lgdo/logging.py +1 -0
  33. lgdo/types/__init__.py +1 -0
  34. lgdo/types/array.py +1 -0
  35. lgdo/types/arrayofequalsizedarrays.py +1 -0
  36. lgdo/types/encoded.py +3 -8
  37. lgdo/types/fixedsizearray.py +1 -0
  38. lgdo/types/struct.py +1 -0
  39. lgdo/types/table.py +37 -5
  40. lgdo/types/vectorofvectors.py +314 -458
  41. lgdo/types/vovutils.py +320 -0
  42. lgdo/types/waveformtable.py +1 -0
  43. lgdo/utils.py +1 -32
  44. legend_pydataobj-1.5.1.dist-info/RECORD +0 -36
  45. {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/LICENSE +0 -0
  46. {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,195 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import sys
5
+
6
+ import numba
7
+ import numpy as np
8
+
9
+ from ....types import (
10
+ Array,
11
+ VectorOfVectors,
12
+ )
13
+ from ... import datatype as dtypeutils
14
+ from ...exceptions import LH5DecodeError
15
+ from .array import (
16
+ _h5_read_array,
17
+ )
18
+
19
+ log = logging.getLogger(__name__)
20
+
21
+
22
+ def _h5_read_vector_of_vectors(
23
+ name,
24
+ h5f,
25
+ start_row=0,
26
+ n_rows=sys.maxsize,
27
+ idx=None,
28
+ use_h5idx=False,
29
+ obj_buf=None,
30
+ obj_buf_start=0,
31
+ ):
32
+ if obj_buf is not None and not isinstance(obj_buf, VectorOfVectors):
33
+ msg = "object buffer is not a VectorOfVectors"
34
+ raise LH5DecodeError(msg, h5f, name)
35
+
36
+ # read out cumulative_length
37
+ cumulen_buf = None if obj_buf is None else obj_buf.cumulative_length
38
+ cumulative_length, n_rows_read = _h5_read_array(
39
+ f"{name}/cumulative_length",
40
+ h5f,
41
+ start_row=start_row,
42
+ n_rows=n_rows,
43
+ idx=idx,
44
+ use_h5idx=use_h5idx,
45
+ obj_buf=cumulen_buf,
46
+ obj_buf_start=obj_buf_start,
47
+ )
48
+ # get a view of just what was read out for cleaner code below
49
+ this_cumulen_nda = cumulative_length.nda[
50
+ obj_buf_start : obj_buf_start + n_rows_read
51
+ ]
52
+
53
+ if idx is not None and n_rows_read > 0:
54
+ # get the starting indices for each array in flattened data:
55
+ # the starting index for array[i] is cumulative_length[i-1]
56
+ idx2 = (np.asarray(idx[0]).copy() - 1,)
57
+
58
+ # re-read cumulative_length with these indices
59
+ # note this will allocate memory for fd_starts!
60
+ fd_start = None
61
+ if idx2[0][0] == -1:
62
+ idx2 = (idx2[0][1:],)
63
+ fd_start = 0 # this variable avoids an ndarray append
64
+
65
+ fd_starts, fds_n_rows_read = _h5_read_array(
66
+ f"{name}/cumulative_length",
67
+ h5f,
68
+ start_row=start_row,
69
+ n_rows=n_rows,
70
+ idx=idx2,
71
+ use_h5idx=use_h5idx,
72
+ obj_buf=None,
73
+ )
74
+ fd_starts = fd_starts.nda # we just need the nda
75
+ if fd_start is None:
76
+ fd_start = fd_starts[0]
77
+
78
+ # compute the length that flattened_data will have after the
79
+ # fancy-indexed read
80
+ fd_n_rows = np.sum(this_cumulen_nda[-len(fd_starts) :] - fd_starts)
81
+ if fd_start == 0:
82
+ fd_n_rows += this_cumulen_nda[0]
83
+
84
+ # now make fd_idx
85
+ fd_idx = np.empty(fd_n_rows, dtype="int32")
86
+ fd_idx = _make_fd_idx(fd_starts, this_cumulen_nda, fd_idx)
87
+
88
+ # Now clean up this_cumulen_nda, to be ready
89
+ # to match the in-memory version of flattened_data. Note: these
90
+ # operations on the view change the original array because they are
91
+ # numpy arrays, not lists.
92
+ this_cumulen_nda[-len(fd_starts) :] -= fd_starts
93
+ np.cumsum(this_cumulen_nda, out=this_cumulen_nda)
94
+
95
+ else:
96
+ fd_idx = None
97
+
98
+ # determine the start_row and n_rows for the flattened_data readout
99
+ fd_start = 0
100
+ if start_row > 0 and n_rows_read > 0:
101
+ # need to read out the cumulen sample -before- the first sample
102
+ # read above in order to get the starting row of the first
103
+ # vector to read out in flattened_data
104
+ fd_start = h5f[f"{name}/cumulative_length"][start_row - 1]
105
+
106
+ # check limits for values that will be used subsequently
107
+ if this_cumulen_nda[-1] < fd_start:
108
+ log.debug(
109
+ f"this_cumulen_nda[-1] = {this_cumulen_nda[-1]}, "
110
+ f"fd_start = {fd_start}, "
111
+ f"start_row = {start_row}, "
112
+ f"n_rows_read = {n_rows_read}"
113
+ )
114
+ msg = (
115
+ f"cumulative_length non-increasing between entries "
116
+ f"{start_row} and {start_row+n_rows_read}"
117
+ )
118
+ raise LH5DecodeError(msg, h5f, name)
119
+
120
+ # determine the number of rows for the flattened_data readout
121
+ fd_n_rows = this_cumulen_nda[-1] if n_rows_read > 0 else 0
122
+
123
+ # Now done with this_cumulen_nda, so we can clean it up to be ready
124
+ # to match the in-memory version of flattened_data. Note: these
125
+ # operations on the view change the original array because they are
126
+ # numpy arrays, not lists.
127
+ #
128
+ # First we need to subtract off the in-file offset for the start of
129
+ # read for flattened_data
130
+ this_cumulen_nda -= fd_start
131
+
132
+ # If we started with a partially-filled buffer, add the
133
+ # appropriate offset for the start of the in-memory flattened
134
+ # data for this read.
135
+ fd_buf_start = np.uint32(0)
136
+ if obj_buf_start > 0:
137
+ fd_buf_start = cumulative_length.nda[obj_buf_start - 1]
138
+ this_cumulen_nda += fd_buf_start
139
+
140
+ # Now prepare the object buffer if necessary
141
+ fd_buf = None
142
+ if obj_buf is not None:
143
+ fd_buf = obj_buf.flattened_data
144
+ # grow fd_buf if necessary to hold the data
145
+ fdb_size = fd_buf_start + fd_n_rows
146
+ if len(fd_buf) < fdb_size:
147
+ fd_buf.resize(fdb_size)
148
+
149
+ # now read
150
+ lgdotype = dtypeutils.datatype(h5f[f"{name}/flattened_data"].attrs["datatype"])
151
+ if lgdotype is Array:
152
+ _func = _h5_read_array
153
+ elif lgdotype is VectorOfVectors:
154
+ _func = _h5_read_vector_of_vectors
155
+ else:
156
+ msg = "type {lgdotype.__name__} is not supported"
157
+ raise LH5DecodeError(msg, h5f, f"{name}/flattened_data")
158
+
159
+ flattened_data, _ = _func(
160
+ f"{name}/flattened_data",
161
+ h5f,
162
+ start_row=fd_start,
163
+ n_rows=fd_n_rows,
164
+ idx=fd_idx,
165
+ use_h5idx=use_h5idx,
166
+ obj_buf=fd_buf,
167
+ obj_buf_start=fd_buf_start,
168
+ )
169
+
170
+ if obj_buf is not None:
171
+ return obj_buf, n_rows_read
172
+
173
+ return (
174
+ VectorOfVectors(
175
+ flattened_data=flattened_data,
176
+ cumulative_length=cumulative_length,
177
+ attrs=h5f[name].attrs,
178
+ ),
179
+ n_rows_read,
180
+ )
181
+
182
+
183
+ @numba.njit(parallel=False, fastmath=True)
184
+ def _make_fd_idx(starts, stops, idx):
185
+ k = 0
186
+ if len(starts) < len(stops):
187
+ for i in range(stops[0]):
188
+ idx[k] = i
189
+ k += 1
190
+ stops = stops[1:]
191
+ for j in range(len(starts)):
192
+ for i in range(starts[j], stops[j]):
193
+ idx[k] = i
194
+ k += 1
195
+ return (idx,)
File without changes
@@ -0,0 +1,92 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ import h5py
6
+ import numpy as np
7
+
8
+ from .... import types
9
+ from ...exceptions import LH5EncodeError
10
+
11
+ log = logging.getLogger(__name__)
12
+
13
+ DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"}
14
+
15
+
16
+ def _h5_write_array(
17
+ obj,
18
+ name,
19
+ lh5_file,
20
+ group="/",
21
+ start_row=0,
22
+ n_rows=None,
23
+ wo_mode="append",
24
+ write_start=0,
25
+ **h5py_kwargs,
26
+ ):
27
+ assert isinstance(obj, types.Array)
28
+
29
+ if n_rows is None or n_rows > obj.nda.shape[0] - start_row:
30
+ n_rows = obj.nda.shape[0] - start_row
31
+
32
+ nda = obj.nda[start_row : start_row + n_rows]
33
+
34
+ # hack to store bools as uint8 for c / Julia compliance
35
+ if nda.dtype.name == "bool":
36
+ nda = nda.astype(np.uint8)
37
+
38
+ # need to create dataset from ndarray the first time for speed
39
+ # creating an empty dataset and appending to that is super slow!
40
+ if (wo_mode != "a" and write_start == 0) or name not in group:
41
+ # this is needed in order to have a resizable (in the first
42
+ # axis) data set, i.e. rows can be appended later
43
+ # NOTE: this automatically turns chunking on!
44
+ maxshape = (None,) + nda.shape[1:]
45
+ h5py_kwargs.setdefault("maxshape", maxshape)
46
+
47
+ if wo_mode == "o" and name in group:
48
+ log.debug(f"overwriting {name} in {group}")
49
+ del group[name]
50
+
51
+ # set default compression options
52
+ for k, v in DEFAULT_HDF5_SETTINGS.items():
53
+ h5py_kwargs.setdefault(k, v)
54
+
55
+ # compress using the 'compression' LGDO attribute, if available
56
+ if "compression" in obj.attrs:
57
+ comp_algo = obj.attrs["compression"]
58
+ if isinstance(comp_algo, dict):
59
+ h5py_kwargs |= obj.attrs["compression"]
60
+ else:
61
+ h5py_kwargs["compression"] = obj.attrs["compression"]
62
+
63
+ # and even the 'hdf5_settings' one, preferred
64
+ if "hdf5_settings" in obj.attrs:
65
+ h5py_kwargs |= obj.attrs["hdf5_settings"]
66
+
67
+ # create HDF5 dataset
68
+ ds = group.create_dataset(name, data=nda, **h5py_kwargs)
69
+
70
+ # attach HDF5 dataset attributes, but not "compression"!
71
+ _attrs = obj.getattrs(datatype=True)
72
+ _attrs.pop("compression", None)
73
+ _attrs.pop("hdf5_settings", None)
74
+ ds.attrs.update(_attrs)
75
+
76
+ return
77
+
78
+ # Now append or overwrite
79
+ ds = group[name]
80
+ if not isinstance(ds, h5py.Dataset):
81
+ msg = (
82
+ f"existing HDF5 object '{name}' in group '{group}'"
83
+ " is not a dataset! Cannot overwrite or append"
84
+ )
85
+ raise LH5EncodeError(msg, lh5_file, group, name)
86
+
87
+ old_len = ds.shape[0]
88
+ if wo_mode == "a":
89
+ write_start = old_len
90
+
91
+ ds.resize(write_start + nda.shape[0], axis=0)
92
+ ds[write_start:] = nda
@@ -0,0 +1,259 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ import h5py
6
+
7
+ from .... import compression, types
8
+ from ... import datatype, utils
9
+ from ...exceptions import LH5EncodeError
10
+ from .array import _h5_write_array
11
+ from .scalar import _h5_write_scalar
12
+ from .vector_of_vectors import _h5_write_vector_of_vectors
13
+
14
+ log = logging.getLogger(__name__)
15
+
16
+
17
+ def _h5_write_lgdo(
18
+ obj,
19
+ name,
20
+ lh5_file,
21
+ group="/",
22
+ start_row=0,
23
+ n_rows=None,
24
+ wo_mode="append",
25
+ write_start=0,
26
+ **h5py_kwargs,
27
+ ):
28
+ assert isinstance(obj, types.LGDO)
29
+
30
+ if wo_mode == "write_safe":
31
+ wo_mode = "w"
32
+ if wo_mode == "append":
33
+ wo_mode = "a"
34
+ if wo_mode == "overwrite":
35
+ wo_mode = "o"
36
+ if wo_mode == "overwrite_file":
37
+ wo_mode = "of"
38
+ write_start = 0
39
+ if wo_mode == "append_column":
40
+ wo_mode = "ac"
41
+ if wo_mode not in ["w", "a", "o", "of", "ac"]:
42
+ msg = f"unknown wo_mode '{wo_mode}'"
43
+ raise LH5EncodeError(msg, lh5_file, group, name)
44
+
45
+ # "mode" is for the h5df.File and wo_mode is for this function
46
+ # In hdf5, 'a' is really "modify" -- in addition to appending, you can
47
+ # change any object in the file. So we use file:append for
48
+ # write_object:overwrite.
49
+ mode = "w" if wo_mode == "of" else "a"
50
+
51
+ if not isinstance(lh5_file, h5py.File):
52
+ lh5_file = h5py.File(lh5_file, mode=mode)
53
+
54
+ log.debug(
55
+ f"writing {obj!r}[{start_row}:{n_rows}] as "
56
+ f"{lh5_file.filename}:{group}/{name}[{write_start}:], "
57
+ f"mode = {wo_mode}, h5py_kwargs = {h5py_kwargs}"
58
+ )
59
+
60
+ group = utils.get_h5_group(group, lh5_file)
61
+
62
+ if wo_mode == "w" and name in group:
63
+ msg = f"can't overwrite '{name}' in wo_mode 'write_safe'"
64
+ raise LH5EncodeError(msg, lh5_file, group, name)
65
+
66
+ # struct or table or waveform table
67
+ if isinstance(obj, types.Struct):
68
+ return _h5_write_struct(
69
+ obj,
70
+ name,
71
+ lh5_file,
72
+ group=group,
73
+ start_row=start_row,
74
+ n_rows=n_rows,
75
+ wo_mode=wo_mode,
76
+ write_start=write_start,
77
+ **h5py_kwargs,
78
+ )
79
+
80
+ # scalars
81
+ if isinstance(obj, types.Scalar):
82
+ return _h5_write_scalar(obj, name, lh5_file, group, wo_mode)
83
+
84
+ # vector of encoded vectors
85
+ if isinstance(
86
+ obj, (types.VectorOfEncodedVectors, types.ArrayOfEncodedEqualSizedArrays)
87
+ ):
88
+ group = utils.get_h5_group(
89
+ name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
90
+ )
91
+
92
+ # ask not to further compress flattened_data, it is already compressed!
93
+ obj.encoded_data.flattened_data.attrs["compression"] = None
94
+
95
+ _h5_write_vector_of_vectors(
96
+ obj.encoded_data,
97
+ "encoded_data",
98
+ lh5_file,
99
+ group=group,
100
+ start_row=start_row,
101
+ n_rows=n_rows,
102
+ wo_mode=wo_mode,
103
+ write_start=write_start,
104
+ **h5py_kwargs,
105
+ )
106
+
107
+ if isinstance(obj.decoded_size, types.Scalar):
108
+ _h5_write_scalar(
109
+ obj.decoded_size,
110
+ "decoded_size",
111
+ lh5_file,
112
+ group=group,
113
+ wo_mode=wo_mode,
114
+ )
115
+ else:
116
+ _h5_write_array(
117
+ obj.decoded_size,
118
+ "decoded_size",
119
+ lh5_file,
120
+ group=group,
121
+ start_row=start_row,
122
+ n_rows=n_rows,
123
+ wo_mode=wo_mode,
124
+ write_start=write_start,
125
+ **h5py_kwargs,
126
+ )
127
+
128
+ return None
129
+
130
+ # vector of vectors
131
+ if isinstance(obj, types.VectorOfVectors):
132
+ return _h5_write_vector_of_vectors(
133
+ obj,
134
+ name,
135
+ lh5_file,
136
+ group=group,
137
+ start_row=start_row,
138
+ n_rows=n_rows,
139
+ wo_mode=wo_mode,
140
+ write_start=write_start,
141
+ **h5py_kwargs,
142
+ )
143
+
144
+ # if we get this far, must be one of the Array types
145
+ if isinstance(obj, types.Array):
146
+ return _h5_write_array(
147
+ obj,
148
+ name,
149
+ lh5_file,
150
+ group=group,
151
+ start_row=start_row,
152
+ n_rows=n_rows,
153
+ wo_mode=wo_mode,
154
+ write_start=write_start,
155
+ **h5py_kwargs,
156
+ )
157
+
158
+ msg = f"do not know how to write '{name}' of type '{type(obj).__name__}'"
159
+ raise LH5EncodeError(msg, lh5_file, group, name)
160
+
161
+
162
+ def _h5_write_struct(
163
+ obj,
164
+ name,
165
+ lh5_file,
166
+ group="/",
167
+ start_row=0,
168
+ n_rows=None,
169
+ wo_mode="append",
170
+ write_start=0,
171
+ **h5py_kwargs,
172
+ ):
173
+ assert isinstance(obj, types.Struct)
174
+
175
+ # In order to append a column, we need to update the
176
+ # `table{old_fields}` value in `group.attrs['datatype"]` to include
177
+ # the new fields. One way to do this is to override
178
+ # `obj.attrs["datatype"]` to include old and new fields. Then we
179
+ # can write the fields to the table as normal.
180
+ if wo_mode == "ac":
181
+ old_group = utils.get_h5_group(name, group)
182
+ lgdotype = datatype.datatype(old_group.attrs["datatype"])
183
+ fields = datatype.get_struct_fields(old_group.attrs["datatype"])
184
+ if not issubclass(lgdotype, types.Struct):
185
+ msg = f"Trying to append columns to an object of type {lgdotype.__name__}"
186
+ raise LH5EncodeError(msg, lh5_file, group, name)
187
+
188
+ # If the mode is `append_column`, make sure we aren't appending
189
+ # a table that has a column of the same name as in the existing
190
+ # table. Also make sure that the field we are adding has the
191
+ # same size
192
+ if len(list(set(fields).intersection(set(obj.keys())))) != 0:
193
+ msg = (
194
+ f"Can't append {list(set(fields).intersection(set(obj.keys())))} "
195
+ "column(s) to a table with the same field(s)"
196
+ )
197
+ raise LH5EncodeError(msg, lh5_file, group, name)
198
+ # It doesn't matter what key we access, as all fields in the old table have the same size
199
+ if old_group[next(iter(old_group.keys()))].size != obj.size:
200
+ msg = (
201
+ f"Table sizes don't match. Trying to append column of size {obj.size} "
202
+ f"to a table of size {old_group[next(iter(old_group.keys()))].size}."
203
+ )
204
+ raise LH5EncodeError(msg, lh5_file, group, name)
205
+
206
+ # Now we can append the obj.keys() to the old fields, and then update obj.attrs.
207
+ fields.extend(list(obj.keys()))
208
+ obj.attrs.pop("datatype")
209
+ obj.attrs["datatype"] = "table" + "{" + ",".join(fields) + "}"
210
+
211
+ group = utils.get_h5_group(
212
+ name,
213
+ group,
214
+ grp_attrs=obj.attrs,
215
+ overwrite=(wo_mode in ["o", "ac"]),
216
+ )
217
+ # If the mode is overwrite, then we need to peek into the file's
218
+ # table's existing fields. If we are writing a new table to the
219
+ # group that does not contain an old field, we should delete that
220
+ # old field from the file
221
+ if wo_mode == "o":
222
+ # Find the old keys in the group that are not present in the
223
+ # new table's keys, then delete them
224
+ for key in list(set(group.keys()) - set(obj.keys())):
225
+ log.debug(f"{key} is not present in new table, deleting field")
226
+ del group[key]
227
+
228
+ for field in obj:
229
+ # eventually compress waveform table values with LGDO's
230
+ # custom codecs before writing
231
+ # if waveformtable.values.attrs["compression"] is NOT a
232
+ # WaveformCodec, just leave it there
233
+ obj_fld = None
234
+ if (
235
+ isinstance(obj, types.WaveformTable)
236
+ and field == "values"
237
+ and not isinstance(obj.values, types.VectorOfEncodedVectors)
238
+ and not isinstance(obj.values, types.ArrayOfEncodedEqualSizedArrays)
239
+ and "compression" in obj.values.attrs
240
+ and isinstance(obj.values.attrs["compression"], compression.WaveformCodec)
241
+ ):
242
+ codec = obj.values.attrs["compression"]
243
+ obj_fld = compression.encode(obj.values, codec=codec)
244
+ else:
245
+ obj_fld = obj[field]
246
+
247
+ # Convert keys to string for dataset names
248
+ f = str(field)
249
+ _h5_write_lgdo(
250
+ obj_fld,
251
+ f,
252
+ lh5_file,
253
+ group=group,
254
+ start_row=start_row,
255
+ n_rows=n_rows,
256
+ wo_mode=wo_mode,
257
+ write_start=write_start,
258
+ **h5py_kwargs,
259
+ )
@@ -0,0 +1,23 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from .... import types
6
+ from ...exceptions import LH5EncodeError
7
+
8
+ log = logging.getLogger(__name__)
9
+
10
+
11
+ def _h5_write_scalar(obj, name, lh5_file, group="/", wo_mode="append"):
12
+ assert isinstance(obj, types.Scalar)
13
+
14
+ if name in group:
15
+ if wo_mode in ["o", "a"]:
16
+ log.debug(f"overwriting {name} in {group}")
17
+ del group[name]
18
+ else:
19
+ msg = f"tried to overwrite but wo_mode is {wo_mode!r}"
20
+ raise LH5EncodeError(msg, lh5_file, group, name)
21
+
22
+ ds = group.create_dataset(name, shape=(), data=obj.value)
23
+ ds.attrs.update(obj.attrs)
@@ -0,0 +1,95 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from .... import types
6
+ from ... import utils
7
+ from ...exceptions import LH5EncodeError
8
+ from .array import _h5_write_array
9
+
10
+ log = logging.getLogger(__name__)
11
+
12
+
13
+ def _h5_write_vector_of_vectors(
14
+ obj,
15
+ name,
16
+ lh5_file,
17
+ group="/",
18
+ start_row=0,
19
+ n_rows=None,
20
+ wo_mode="append",
21
+ write_start=0,
22
+ **h5py_kwargs,
23
+ ):
24
+ assert isinstance(obj, types.VectorOfVectors)
25
+
26
+ group = utils.get_h5_group(
27
+ name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
28
+ )
29
+ if n_rows is None or n_rows > obj.cumulative_length.nda.shape[0] - start_row:
30
+ n_rows = obj.cumulative_length.nda.shape[0] - start_row
31
+
32
+ # if appending we need to add an appropriate offset to the
33
+ # cumulative lengths as appropriate for the in-file object
34
+ offset = 0 # declare here because we have to subtract it off at the end
35
+ if (wo_mode in ("a", "o")) and "cumulative_length" in group:
36
+ len_cl = len(group["cumulative_length"])
37
+ if wo_mode == "a":
38
+ write_start = len_cl
39
+ if len_cl > 0:
40
+ offset = group["cumulative_length"][write_start - 1]
41
+
42
+ # First write flattened_data array. Only write rows with data.
43
+ fd_start = 0 if start_row == 0 else obj.cumulative_length.nda[start_row - 1]
44
+ fd_n_rows = (
45
+ obj.cumulative_length.nda[start_row + n_rows - 1] - fd_start
46
+ if len(obj.cumulative_length) > 0
47
+ else 0
48
+ )
49
+
50
+ if isinstance(obj.flattened_data, types.Array):
51
+ _func = _h5_write_array
52
+ elif isinstance(obj.flattened_data, types.VectorOfVectors):
53
+ _func = _h5_write_vector_of_vectors
54
+ else:
55
+ msg = (
56
+ "don't know how to serialize to disk flattened_data "
57
+ "of {type(obj.flattened_data).__name__} type"
58
+ )
59
+ raise LH5EncodeError(msg, lh5_file, group, f"{name}.flattened_data")
60
+
61
+ _func(
62
+ obj.flattened_data,
63
+ "flattened_data",
64
+ lh5_file,
65
+ group=group,
66
+ start_row=fd_start,
67
+ n_rows=fd_n_rows,
68
+ wo_mode=wo_mode,
69
+ write_start=offset,
70
+ **h5py_kwargs,
71
+ )
72
+
73
+ # now offset is used to give appropriate in-file values for
74
+ # cumulative_length. Need to adjust it for start_row
75
+ if start_row > 0:
76
+ offset -= obj.cumulative_length.nda[start_row - 1]
77
+
78
+ # Add offset to obj.cumulative_length itself to avoid memory allocation.
79
+ # Then subtract it off after writing! (otherwise it will be changed
80
+ # upon return)
81
+ cl_dtype = obj.cumulative_length.nda.dtype.type
82
+ obj.cumulative_length.nda += cl_dtype(offset)
83
+
84
+ _h5_write_array(
85
+ obj.cumulative_length,
86
+ "cumulative_length",
87
+ lh5_file,
88
+ group=group,
89
+ start_row=start_row,
90
+ n_rows=n_rows,
91
+ wo_mode=wo_mode,
92
+ write_start=write_start,
93
+ **h5py_kwargs,
94
+ )
95
+ obj.cumulative_length.nda -= cl_dtype(offset)