legend-pydataobj 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.3.0.dist-info → legend_pydataobj-1.4.0.dist-info}/METADATA +1 -1
- {legend_pydataobj-1.3.0.dist-info → legend_pydataobj-1.4.0.dist-info}/RECORD +8 -8
- lgdo/_version.py +2 -2
- lgdo/lh5_store.py +150 -46
- {legend_pydataobj-1.3.0.dist-info → legend_pydataobj-1.4.0.dist-info}/LICENSE +0 -0
- {legend_pydataobj-1.3.0.dist-info → legend_pydataobj-1.4.0.dist-info}/WHEEL +0 -0
- {legend_pydataobj-1.3.0.dist-info → legend_pydataobj-1.4.0.dist-info}/entry_points.txt +0 -0
- {legend_pydataobj-1.3.0.dist-info → legend_pydataobj-1.4.0.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,8 @@
|
|
1
1
|
lgdo/__init__.py,sha256=mY6pUNy2yJ2MYzq_ZdhVZZ7xItBW1KJ8h9qA29bOECU,2878
|
2
|
-
lgdo/_version.py,sha256=
|
2
|
+
lgdo/_version.py,sha256=R8-T9fmURjcuoxYpHTAjyNAhgJPDtI2jogCjqYYkfCU,411
|
3
3
|
lgdo/cli.py,sha256=6o2vGwEq0Fq1y67RTxOHjkVNmN9XGhIBnb8DFFm8ANQ,1428
|
4
4
|
lgdo/lgdo_utils.py,sha256=LvqE_eQZjKOuLrocbxc21rvWvh7NA4BSiaJh-jhlxVs,5598
|
5
|
-
lgdo/lh5_store.py,sha256
|
5
|
+
lgdo/lh5_store.py,sha256=-ceGURWdu0jKTsDTL9bqQLxQ0T8USeeSXAk2cqd-2UU,74246
|
6
6
|
lgdo/logging.py,sha256=Nu3wgIoWN7cyUxuzPom5rMwFvTlBu8p8d9uONHDquRg,965
|
7
7
|
lgdo/compression/__init__.py,sha256=oT9OXiDDxC7BZciWrQVfHZNkOxXfj4p8EpF2tF04w84,1091
|
8
8
|
lgdo/compression/base.py,sha256=ujQY2kYF4z3ZdAy7gXaoDPXFbG2Av1IQ1Nnx6UGLjmk,896
|
@@ -21,9 +21,9 @@ lgdo/types/struct.py,sha256=UxV0wnCHoQM5rSmzEC9EIKWYV6drHVyK5Ab7UQztuj4,2984
|
|
21
21
|
lgdo/types/table.py,sha256=kgJtI4Ea6jNhsQWS_R-9Ilt7Xm9n5B97sv4Cq6m5q7E,12667
|
22
22
|
lgdo/types/vectorofvectors.py,sha256=1oxKJDX8VVWpmvUUDHHEzEYw0RRWJrMjOB-jHRY12N4,21859
|
23
23
|
lgdo/types/waveform_table.py,sha256=52vqjGudX5_ZR1-b087jx3vuTxJ_yEPO-dO8Dpi0ceg,9407
|
24
|
-
legend_pydataobj-1.
|
25
|
-
legend_pydataobj-1.
|
26
|
-
legend_pydataobj-1.
|
27
|
-
legend_pydataobj-1.
|
28
|
-
legend_pydataobj-1.
|
29
|
-
legend_pydataobj-1.
|
24
|
+
legend_pydataobj-1.4.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
25
|
+
legend_pydataobj-1.4.0.dist-info/METADATA,sha256=NIX9NkqNDbKopKc0EgUEzy7fBdALFOfiGRJjHh1b01w,3577
|
26
|
+
legend_pydataobj-1.4.0.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
|
27
|
+
legend_pydataobj-1.4.0.dist-info/entry_points.txt,sha256=j22HoS-1cVhTtKJkDnKB49uNH0nEVER2Tpw-lVh1aws,41
|
28
|
+
legend_pydataobj-1.4.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
|
29
|
+
legend_pydataobj-1.4.0.dist-info/RECORD,,
|
lgdo/_version.py
CHANGED
lgdo/lh5_store.py
CHANGED
@@ -38,7 +38,7 @@ LGDO = Union[Array, Scalar, Struct, VectorOfVectors]
|
|
38
38
|
|
39
39
|
log = logging.getLogger(__name__)
|
40
40
|
|
41
|
-
|
41
|
+
DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"}
|
42
42
|
|
43
43
|
|
44
44
|
class LH5Store:
|
@@ -169,6 +169,7 @@ class LH5Store:
|
|
169
169
|
start_row: int = 0,
|
170
170
|
n_rows: int = sys.maxsize,
|
171
171
|
idx: np.ndarray | list | tuple | list[np.ndarray | list | tuple] = None,
|
172
|
+
use_h5idx: bool = False,
|
172
173
|
field_mask: dict[str, bool] | list[str] | tuple[str] = None,
|
173
174
|
obj_buf: LGDO = None,
|
174
175
|
obj_buf_start: int = 0,
|
@@ -176,6 +177,14 @@ class LH5Store:
|
|
176
177
|
) -> tuple[LGDO, int]:
|
177
178
|
"""Read LH5 object data from a file.
|
178
179
|
|
180
|
+
Use the ``idx`` parameter to read out particular rows of the data. The ``use_h5idx`` flag
|
181
|
+
controls whether *only* those rows are read from disk or if the rows are indexed after reading
|
182
|
+
the entire object. Reading individual rows can be orders of magnitude slower than reading
|
183
|
+
the whole object and then indexing the desired rows. The default behavior (``use_h5idx=False``)
|
184
|
+
is to use slightly more memory for a much faster read. See
|
185
|
+
`legend-pydataobj #29 <https://github.com/legend-exp/legend-pydataobj/issues/29>`_
|
186
|
+
for additional information.
|
187
|
+
|
179
188
|
Parameters
|
180
189
|
----------
|
181
190
|
name
|
@@ -192,16 +201,27 @@ class LH5Store:
|
|
192
201
|
actual number of rows read will be returned as one of the return
|
193
202
|
values (see below).
|
194
203
|
idx
|
195
|
-
For NumPy-style "fancying indexing" for the read
|
196
|
-
rows
|
197
|
-
axis is supported, so tuple arguments
|
198
|
-
is not false, `idx` will be truncated to
|
199
|
-
with a list of files, can pass in a list of
|
200
|
-
file) or use a long contiguous list (e.g. built from a previous
|
204
|
+
For NumPy-style "fancying indexing" for the read to select only some
|
205
|
+
rows, e.g. after applying some cuts to particular columns.
|
206
|
+
Only selection along the first axis is supported, so tuple arguments
|
207
|
+
must be one-tuples. If `n_rows` is not false, `idx` will be truncated to
|
208
|
+
`n_rows` before reading. To use with a list of files, can pass in a list of
|
209
|
+
`idx`'s (one for each file) or use a long contiguous list (e.g. built from a previous
|
201
210
|
identical read). If used in conjunction with `start_row` and `n_rows`,
|
202
211
|
will be sliced to obey those constraints, where `n_rows` is
|
203
212
|
interpreted as the (max) number of *selected* values (in `idx`) to be
|
204
|
-
read out.
|
213
|
+
read out. Note that the ``use_h5idx`` parameter controls some behaviour of the
|
214
|
+
read and that the default behavior (``use_h5idx=False``) prioritizes speed over
|
215
|
+
a small memory penalty.
|
216
|
+
use_h5idx
|
217
|
+
``True`` will directly pass the ``idx`` parameter to the underlying
|
218
|
+
``h5py`` call such that only the selected rows are read directly into memory,
|
219
|
+
which conserves memory at the cost of speed. There can be a significant penalty
|
220
|
+
to speed for larger files (1 - 2 orders of magnitude longer time).
|
221
|
+
``False`` (default) will read the entire object into memory before
|
222
|
+
performing the indexing. The default is much faster but requires additional memory,
|
223
|
+
though a relatively small amount in the typical use case. It is recommended to
|
224
|
+
leave this parameter as its default.
|
205
225
|
field_mask
|
206
226
|
For tables and structs, determines which fields get written out.
|
207
227
|
Only applies to immediate fields of the requested objects. If a dict
|
@@ -223,6 +243,7 @@ class LH5Store:
|
|
223
243
|
after reading. The option has no effect on data encoded with HDF5
|
224
244
|
built-in filters, which is always decompressed upstream by HDF5.
|
225
245
|
|
246
|
+
|
226
247
|
Returns
|
227
248
|
-------
|
228
249
|
(object, n_rows_read)
|
@@ -236,6 +257,14 @@ class LH5Store:
|
|
236
257
|
if not isinstance(lh5_file, (str, h5py.File)):
|
237
258
|
lh5_file = list(lh5_file)
|
238
259
|
n_rows_read = 0
|
260
|
+
|
261
|
+
# to know whether we are reading in a list of files.
|
262
|
+
# this is part of the fix for reading data by idx
|
263
|
+
# (see https://github.com/legend-exp/legend-pydataobj/issues/29)
|
264
|
+
# so that we only make a copy of the data if absolutely necessary
|
265
|
+
# or if we can read the data from file without having to make a copy
|
266
|
+
self.in_file_loop = True
|
267
|
+
|
239
268
|
for i, h5f in enumerate(lh5_file):
|
240
269
|
if isinstance(idx, list) and len(idx) > 0 and not np.isscalar(idx[0]):
|
241
270
|
# a list of lists: must be one per file
|
@@ -255,22 +284,32 @@ class LH5Store:
|
|
255
284
|
else:
|
256
285
|
idx_i = None
|
257
286
|
n_rows_i = n_rows - n_rows_read
|
287
|
+
|
288
|
+
# maybe someone passed in a list of len==1?
|
289
|
+
if i == (len(lh5_file) - 1):
|
290
|
+
self.in_file_loop = False
|
291
|
+
|
258
292
|
obj_buf, n_rows_read_i = self.read_object(
|
259
293
|
name,
|
260
294
|
lh5_file[i],
|
261
295
|
start_row=start_row,
|
262
296
|
n_rows=n_rows_i,
|
263
297
|
idx=idx_i,
|
298
|
+
use_h5idx=use_h5idx,
|
264
299
|
field_mask=field_mask,
|
265
300
|
obj_buf=obj_buf,
|
266
301
|
obj_buf_start=obj_buf_start,
|
267
302
|
decompress=decompress,
|
268
303
|
)
|
304
|
+
|
269
305
|
n_rows_read += n_rows_read_i
|
270
306
|
if n_rows_read >= n_rows or obj_buf is None:
|
271
307
|
return obj_buf, n_rows_read
|
272
308
|
start_row = 0
|
273
309
|
obj_buf_start += n_rows_read_i
|
310
|
+
|
311
|
+
self.in_file_loop = False
|
312
|
+
|
274
313
|
return obj_buf, n_rows_read
|
275
314
|
|
276
315
|
# get the file from the store
|
@@ -358,6 +397,7 @@ class LH5Store:
|
|
358
397
|
start_row=start_row,
|
359
398
|
n_rows=n_rows,
|
360
399
|
idx=idx,
|
400
|
+
use_h5idx=use_h5idx,
|
361
401
|
decompress=decompress,
|
362
402
|
)
|
363
403
|
# modify datatype in attrs if a field_mask was used
|
@@ -404,6 +444,7 @@ class LH5Store:
|
|
404
444
|
start_row=start_row,
|
405
445
|
n_rows=n_rows,
|
406
446
|
idx=idx,
|
447
|
+
use_h5idx=use_h5idx,
|
407
448
|
obj_buf=fld_buf,
|
408
449
|
obj_buf_start=obj_buf_start,
|
409
450
|
decompress=decompress,
|
@@ -497,6 +538,7 @@ class LH5Store:
|
|
497
538
|
start_row=start_row,
|
498
539
|
n_rows=n_rows,
|
499
540
|
idx=idx,
|
541
|
+
use_h5idx=use_h5idx,
|
500
542
|
obj_buf=None if decompress else decoded_size_buf,
|
501
543
|
obj_buf_start=0 if decompress else obj_buf_start,
|
502
544
|
)
|
@@ -508,6 +550,7 @@ class LH5Store:
|
|
508
550
|
start_row=start_row,
|
509
551
|
n_rows=n_rows,
|
510
552
|
idx=idx,
|
553
|
+
use_h5idx=use_h5idx,
|
511
554
|
obj_buf=None if decompress else encoded_data_buf,
|
512
555
|
obj_buf_start=0 if decompress else obj_buf_start,
|
513
556
|
)
|
@@ -573,6 +616,7 @@ class LH5Store:
|
|
573
616
|
start_row=start_row,
|
574
617
|
n_rows=n_rows,
|
575
618
|
idx=idx,
|
619
|
+
use_h5idx=use_h5idx,
|
576
620
|
obj_buf=cumulen_buf,
|
577
621
|
obj_buf_start=obj_buf_start,
|
578
622
|
)
|
@@ -597,6 +641,7 @@ class LH5Store:
|
|
597
641
|
start_row=start_row,
|
598
642
|
n_rows=n_rows,
|
599
643
|
idx=idx2,
|
644
|
+
use_h5idx=use_h5idx,
|
600
645
|
)
|
601
646
|
fd_starts = fd_starts.nda # we just need the nda
|
602
647
|
if fd_start is None:
|
@@ -679,6 +724,7 @@ class LH5Store:
|
|
679
724
|
start_row=fd_start,
|
680
725
|
n_rows=fd_n_rows,
|
681
726
|
idx=fd_idx,
|
727
|
+
use_h5idx=use_h5idx,
|
682
728
|
obj_buf=fd_buf,
|
683
729
|
obj_buf_start=fd_buf_start,
|
684
730
|
)
|
@@ -722,9 +768,22 @@ class LH5Store:
|
|
722
768
|
if n_rows_to_read > n_rows:
|
723
769
|
n_rows_to_read = n_rows
|
724
770
|
|
771
|
+
# if idx is passed, check if we can make it a slice instead (faster)
|
772
|
+
change_idx_to_slice = False
|
773
|
+
|
725
774
|
# prepare the selection for the read. Use idx if available
|
726
775
|
if idx is not None:
|
727
|
-
|
776
|
+
# check if idx is empty and convert to slice instead
|
777
|
+
if len(idx[0]) == 0:
|
778
|
+
source_sel = np.s_[0:0]
|
779
|
+
change_idx_to_slice = True
|
780
|
+
# check if idx is contiguous and increasing
|
781
|
+
# if so, convert it to a slice instead (faster)
|
782
|
+
elif np.all(np.diff(idx[0]) == 1):
|
783
|
+
source_sel = np.s_[idx[0][0] : idx[0][-1] + 1]
|
784
|
+
change_idx_to_slice = True
|
785
|
+
else:
|
786
|
+
source_sel = idx
|
728
787
|
else:
|
729
788
|
source_sel = np.s_[start_row : start_row + n_rows_to_read]
|
730
789
|
|
@@ -734,14 +793,34 @@ class LH5Store:
|
|
734
793
|
if len(obj_buf) < buf_size:
|
735
794
|
obj_buf.resize(buf_size)
|
736
795
|
dest_sel = np.s_[obj_buf_start:buf_size]
|
737
|
-
|
796
|
+
|
797
|
+
# this is required to make the read of multiple files faster
|
798
|
+
# until a better solution found.
|
799
|
+
if change_idx_to_slice or idx is None or use_h5idx:
|
800
|
+
h5f[name].read_direct(obj_buf.nda, source_sel, dest_sel)
|
801
|
+
else:
|
802
|
+
# it is faster to read the whole object and then do fancy indexing
|
803
|
+
obj_buf.nda[dest_sel] = h5f[name][...][source_sel]
|
804
|
+
|
738
805
|
nda = obj_buf.nda
|
739
806
|
else:
|
740
807
|
if n_rows == 0:
|
741
808
|
tmp_shape = (0,) + h5f[name].shape[1:]
|
742
809
|
nda = np.empty(tmp_shape, h5f[name].dtype)
|
743
810
|
else:
|
744
|
-
|
811
|
+
if change_idx_to_slice or idx is None or use_h5idx:
|
812
|
+
nda = h5f[name][source_sel]
|
813
|
+
else:
|
814
|
+
# it is faster to read the whole object and then do fancy indexing
|
815
|
+
nda = h5f[name][...][source_sel]
|
816
|
+
|
817
|
+
# if reading a list of files recursively, this is given to obj_buf on
|
818
|
+
# the first file read. obj_buf needs to be resized and therefore
|
819
|
+
# it needs to hold the data itself (not a view of the data).
|
820
|
+
# a view is returned by the source_sel indexing, which cannot be resized
|
821
|
+
# by ndarray.resize().
|
822
|
+
if hasattr(self, "in_file_loop") and self.in_file_loop:
|
823
|
+
nda = np.copy(nda)
|
745
824
|
|
746
825
|
# special handling for bools
|
747
826
|
# (c and Julia store as uint8 so cast to bool)
|
@@ -781,7 +860,7 @@ class LH5Store:
|
|
781
860
|
n_rows: int = None,
|
782
861
|
wo_mode: str = "append",
|
783
862
|
write_start: int = 0,
|
784
|
-
|
863
|
+
**h5py_kwargs,
|
785
864
|
) -> None:
|
786
865
|
"""Write an LGDO into an LH5 file.
|
787
866
|
|
@@ -796,20 +875,30 @@ class LH5Store:
|
|
796
875
|
passed directly to :meth:`h5py.Group.create_dataset`.
|
797
876
|
|
798
877
|
:class:`.WaveformCodec` object
|
799
|
-
If `obj` is a :class:`.WaveformTable
|
800
|
-
this algorithm. More
|
801
|
-
compression algorithms at
|
878
|
+
If `obj` is a :class:`.WaveformTable` and ``obj.values`` holds the
|
879
|
+
attribute, compress ``values`` using this algorithm. More
|
880
|
+
documentation about the supported waveform compression algorithms at
|
881
|
+
:mod:`.lgdo.compression`.
|
882
|
+
|
883
|
+
If the `obj` :class:`.LGDO` has a `hdf5_settings` attribute holding a
|
884
|
+
dictionary, it is interpreted as a list of keyword arguments to be
|
885
|
+
forwarded directly to :meth:`h5py.Group.create_dataset` (exactly like
|
886
|
+
the first format of `compression` above). This is the preferred way to
|
887
|
+
specify HDF5 dataset options such as chunking etc. If compression
|
888
|
+
options are specified, they take precedence over those set with the
|
889
|
+
`compression` attribute.
|
802
890
|
|
803
891
|
Note
|
804
892
|
----
|
805
|
-
The `compression` attribute takes precedence over the
|
806
|
-
|
893
|
+
The `compression` LGDO attribute takes precedence over the default HDF5
|
894
|
+
compression settings. The `hdf5_settings` attribute takes precedence
|
895
|
+
over `compression`. These attributes are not written to disk.
|
807
896
|
|
808
897
|
Note
|
809
898
|
----
|
810
|
-
HDF5 compression is skipped for the `encoded_data`
|
811
|
-
:class:`.VectorOfEncodedVectors` and
|
812
|
-
:class
|
899
|
+
HDF5 compression is skipped for the `encoded_data.flattened_data`
|
900
|
+
dataset of :class:`.VectorOfEncodedVectors` and
|
901
|
+
:class:`.ArrayOfEncodedEqualSizedArrays`.
|
813
902
|
|
814
903
|
Parameters
|
815
904
|
----------
|
@@ -845,15 +934,17 @@ class LH5Store:
|
|
845
934
|
write_start
|
846
935
|
row in the output file (if already existing) to start overwriting
|
847
936
|
from.
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
937
|
+
**h5py_kwargs
|
938
|
+
additional keyword arguments forwarded to
|
939
|
+
:meth:`h5py.Group.create_dataset` to specify, for example, an HDF5
|
940
|
+
compression filter to be applied before writing non-scalar
|
941
|
+
datasets. **Note: `compression` Ignored if compression is specified
|
942
|
+
as an `obj` attribute.**
|
852
943
|
"""
|
853
944
|
log.debug(
|
854
945
|
f"writing {repr(obj)}[{start_row}:{n_rows}] as "
|
855
946
|
f"{lh5_file}:{group}/{name}[{write_start}:], "
|
856
|
-
f"mode = {wo_mode},
|
947
|
+
f"mode = {wo_mode}, h5py_kwargs = {h5py_kwargs}"
|
857
948
|
)
|
858
949
|
|
859
950
|
if wo_mode == "write_safe":
|
@@ -926,8 +1017,8 @@ class LH5Store:
|
|
926
1017
|
for field in obj.keys():
|
927
1018
|
# eventually compress waveform table values with LGDO's
|
928
1019
|
# custom codecs before writing
|
929
|
-
# if waveformtable.values.attrs["compression"] is a
|
930
|
-
#
|
1020
|
+
# if waveformtable.values.attrs["compression"] is NOT a
|
1021
|
+
# WaveformCodec, just leave it there
|
931
1022
|
obj_fld = None
|
932
1023
|
if (
|
933
1024
|
isinstance(obj, WaveformTable)
|
@@ -953,7 +1044,7 @@ class LH5Store:
|
|
953
1044
|
n_rows=n_rows,
|
954
1045
|
wo_mode=wo_mode,
|
955
1046
|
write_start=write_start,
|
956
|
-
|
1047
|
+
**h5py_kwargs,
|
957
1048
|
)
|
958
1049
|
return
|
959
1050
|
|
@@ -977,6 +1068,9 @@ class LH5Store:
|
|
977
1068
|
name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
|
978
1069
|
)
|
979
1070
|
|
1071
|
+
# ask not to further compress flattened_data, it is already compressed!
|
1072
|
+
obj.encoded_data.flattened_data.attrs["compression"] = None
|
1073
|
+
|
980
1074
|
self.write_object(
|
981
1075
|
obj.encoded_data,
|
982
1076
|
"encoded_data",
|
@@ -986,7 +1080,7 @@ class LH5Store:
|
|
986
1080
|
n_rows=n_rows,
|
987
1081
|
wo_mode=wo_mode,
|
988
1082
|
write_start=write_start,
|
989
|
-
|
1083
|
+
**h5py_kwargs,
|
990
1084
|
)
|
991
1085
|
|
992
1086
|
self.write_object(
|
@@ -998,7 +1092,7 @@ class LH5Store:
|
|
998
1092
|
n_rows=n_rows,
|
999
1093
|
wo_mode=wo_mode,
|
1000
1094
|
write_start=write_start,
|
1001
|
-
|
1095
|
+
**h5py_kwargs,
|
1002
1096
|
)
|
1003
1097
|
|
1004
1098
|
# vector of vectors
|
@@ -1034,7 +1128,7 @@ class LH5Store:
|
|
1034
1128
|
n_rows=fd_n_rows,
|
1035
1129
|
wo_mode=wo_mode,
|
1036
1130
|
write_start=offset,
|
1037
|
-
|
1131
|
+
**h5py_kwargs,
|
1038
1132
|
)
|
1039
1133
|
|
1040
1134
|
# now offset is used to give appropriate in-file values for
|
@@ -1057,7 +1151,7 @@ class LH5Store:
|
|
1057
1151
|
n_rows=n_rows,
|
1058
1152
|
wo_mode=wo_mode,
|
1059
1153
|
write_start=write_start,
|
1060
|
-
|
1154
|
+
**h5py_kwargs,
|
1061
1155
|
)
|
1062
1156
|
obj.cumulative_length.nda -= cl_dtype(offset)
|
1063
1157
|
|
@@ -1077,29 +1171,39 @@ class LH5Store:
|
|
1077
1171
|
# need to create dataset from ndarray the first time for speed
|
1078
1172
|
# creating an empty dataset and appending to that is super slow!
|
1079
1173
|
if (wo_mode != "a" and write_start == 0) or name not in group:
|
1174
|
+
# this is needed in order to have a resizable (in the first
|
1175
|
+
# axis) data set, i.e. rows can be appended later
|
1176
|
+
# NOTE: this automatically turns chunking on!
|
1080
1177
|
maxshape = (None,) + nda.shape[1:]
|
1178
|
+
h5py_kwargs.setdefault("maxshape", maxshape)
|
1179
|
+
|
1081
1180
|
if wo_mode == "o" and name in group:
|
1082
1181
|
log.debug(f"overwriting {name} in {group}")
|
1083
1182
|
del group[name]
|
1084
1183
|
|
1184
|
+
# set default compression options
|
1185
|
+
for k, v in DEFAULT_HDF5_SETTINGS.items():
|
1186
|
+
h5py_kwargs.setdefault(k, v)
|
1187
|
+
|
1188
|
+
# compress using the 'compression' LGDO attribute, if available
|
1189
|
+
if "compression" in obj.attrs:
|
1190
|
+
comp_algo = obj.attrs["compression"]
|
1191
|
+
if isinstance(comp_algo, dict):
|
1192
|
+
h5py_kwargs |= obj.attrs["compression"]
|
1193
|
+
else:
|
1194
|
+
h5py_kwargs["compression"] = obj.attrs["compression"]
|
1195
|
+
|
1196
|
+
# and even the 'hdf5_settings' one, preferred
|
1197
|
+
if "hdf5_settings" in obj.attrs:
|
1198
|
+
h5py_kwargs |= obj.attrs["hdf5_settings"]
|
1199
|
+
|
1085
1200
|
# create HDF5 dataset
|
1086
|
-
|
1087
|
-
# available
|
1088
|
-
# - otherwise use "hdf5_compression"
|
1089
|
-
# - attach HDF5 dataset attributes, but not "compression"!
|
1090
|
-
comp_algo = obj.attrs.get("compression", hdf5_compression)
|
1091
|
-
comp_kwargs = {}
|
1092
|
-
if isinstance(comp_algo, str):
|
1093
|
-
comp_kwargs = {"compression": comp_algo}
|
1094
|
-
elif comp_algo is not None:
|
1095
|
-
comp_kwargs = comp_algo
|
1096
|
-
|
1097
|
-
ds = group.create_dataset(
|
1098
|
-
name, data=nda, maxshape=maxshape, **comp_kwargs
|
1099
|
-
)
|
1201
|
+
ds = group.create_dataset(name, data=nda, **h5py_kwargs)
|
1100
1202
|
|
1203
|
+
# attach HDF5 dataset attributes, but not "compression"!
|
1101
1204
|
_attrs = obj.getattrs(datatype=True)
|
1102
1205
|
_attrs.pop("compression", None)
|
1206
|
+
_attrs.pop("hdf5_settings", None)
|
1103
1207
|
ds.attrs.update(_attrs)
|
1104
1208
|
return
|
1105
1209
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|