legend-pydataobj 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: legend-pydataobj
3
- Version: 1.3.0
3
+ Version: 1.4.0
4
4
  Summary: LEGEND Python Data Objects
5
5
  Home-page: https://github.com/legend-exp/legend-pydataobj
6
6
  Author: The LEGEND Collaboration
@@ -1,8 +1,8 @@
1
1
  lgdo/__init__.py,sha256=mY6pUNy2yJ2MYzq_ZdhVZZ7xItBW1KJ8h9qA29bOECU,2878
2
- lgdo/_version.py,sha256=HGwtpza1HCPtlyqElUvIyH97K44TO13CYiYVZNezQ1M,411
2
+ lgdo/_version.py,sha256=R8-T9fmURjcuoxYpHTAjyNAhgJPDtI2jogCjqYYkfCU,411
3
3
  lgdo/cli.py,sha256=6o2vGwEq0Fq1y67RTxOHjkVNmN9XGhIBnb8DFFm8ANQ,1428
4
4
  lgdo/lgdo_utils.py,sha256=LvqE_eQZjKOuLrocbxc21rvWvh7NA4BSiaJh-jhlxVs,5598
5
- lgdo/lh5_store.py,sha256=KKJUF3HaTbYleuMHqhHIeUp17nOYkga0pYSmEo1j444,68660
5
+ lgdo/lh5_store.py,sha256=-ceGURWdu0jKTsDTL9bqQLxQ0T8USeeSXAk2cqd-2UU,74246
6
6
  lgdo/logging.py,sha256=Nu3wgIoWN7cyUxuzPom5rMwFvTlBu8p8d9uONHDquRg,965
7
7
  lgdo/compression/__init__.py,sha256=oT9OXiDDxC7BZciWrQVfHZNkOxXfj4p8EpF2tF04w84,1091
8
8
  lgdo/compression/base.py,sha256=ujQY2kYF4z3ZdAy7gXaoDPXFbG2Av1IQ1Nnx6UGLjmk,896
@@ -21,9 +21,9 @@ lgdo/types/struct.py,sha256=UxV0wnCHoQM5rSmzEC9EIKWYV6drHVyK5Ab7UQztuj4,2984
21
21
  lgdo/types/table.py,sha256=kgJtI4Ea6jNhsQWS_R-9Ilt7Xm9n5B97sv4Cq6m5q7E,12667
22
22
  lgdo/types/vectorofvectors.py,sha256=1oxKJDX8VVWpmvUUDHHEzEYw0RRWJrMjOB-jHRY12N4,21859
23
23
  lgdo/types/waveform_table.py,sha256=52vqjGudX5_ZR1-b087jx3vuTxJ_yEPO-dO8Dpi0ceg,9407
24
- legend_pydataobj-1.3.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
25
- legend_pydataobj-1.3.0.dist-info/METADATA,sha256=s-gX6dzI_9itWMKYKsgF6D0xgB562eMVnNacLINgj-o,3577
26
- legend_pydataobj-1.3.0.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
27
- legend_pydataobj-1.3.0.dist-info/entry_points.txt,sha256=j22HoS-1cVhTtKJkDnKB49uNH0nEVER2Tpw-lVh1aws,41
28
- legend_pydataobj-1.3.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
29
- legend_pydataobj-1.3.0.dist-info/RECORD,,
24
+ legend_pydataobj-1.4.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
25
+ legend_pydataobj-1.4.0.dist-info/METADATA,sha256=NIX9NkqNDbKopKc0EgUEzy7fBdALFOfiGRJjHh1b01w,3577
26
+ legend_pydataobj-1.4.0.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
27
+ legend_pydataobj-1.4.0.dist-info/entry_points.txt,sha256=j22HoS-1cVhTtKJkDnKB49uNH0nEVER2Tpw-lVh1aws,41
28
+ legend_pydataobj-1.4.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
29
+ legend_pydataobj-1.4.0.dist-info/RECORD,,
lgdo/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.3.0'
16
- __version_tuple__ = version_tuple = (1, 3, 0)
15
+ __version__ = version = '1.4.0'
16
+ __version_tuple__ = version_tuple = (1, 4, 0)
lgdo/lh5_store.py CHANGED
@@ -38,7 +38,7 @@ LGDO = Union[Array, Scalar, Struct, VectorOfVectors]
38
38
 
39
39
  log = logging.getLogger(__name__)
40
40
 
41
- DEFAULT_HDF5_COMPRESSION = None
41
+ DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"}
42
42
 
43
43
 
44
44
  class LH5Store:
@@ -169,6 +169,7 @@ class LH5Store:
169
169
  start_row: int = 0,
170
170
  n_rows: int = sys.maxsize,
171
171
  idx: np.ndarray | list | tuple | list[np.ndarray | list | tuple] = None,
172
+ use_h5idx: bool = False,
172
173
  field_mask: dict[str, bool] | list[str] | tuple[str] = None,
173
174
  obj_buf: LGDO = None,
174
175
  obj_buf_start: int = 0,
@@ -176,6 +177,14 @@ class LH5Store:
176
177
  ) -> tuple[LGDO, int]:
177
178
  """Read LH5 object data from a file.
178
179
 
180
+ Use the ``idx`` parameter to read out particular rows of the data. The ``use_h5idx`` flag
181
+ controls whether *only* those rows are read from disk or if the rows are indexed after reading
182
+ the entire object. Reading individual rows can be orders of magnitude slower than reading
183
+ the whole object and then indexing the desired rows. The default behavior (``use_h5idx=False``)
184
+ is to use slightly more memory for a much faster read. See
185
+ `legend-pydataobj #29 <https://github.com/legend-exp/legend-pydataobj/issues/29>`_
186
+ for additional information.
187
+
179
188
  Parameters
180
189
  ----------
181
190
  name
@@ -192,16 +201,27 @@ class LH5Store:
192
201
  actual number of rows read will be returned as one of the return
193
202
  values (see below).
194
203
  idx
195
- For NumPy-style "fancying indexing" for the read. Used to read out
196
- rows that pass some selection criteria. Only selection along the first
197
- axis is supported, so tuple arguments must be one-tuples. If `n_rows`
198
- is not false, `idx` will be truncated to `n_rows` before reading. To use
199
- with a list of files, can pass in a list of `idx`'s (one for each
200
- file) or use a long contiguous list (e.g. built from a previous
204
+ For NumPy-style "fancying indexing" for the read to select only some
205
+ rows, e.g. after applying some cuts to particular columns.
206
+ Only selection along the first axis is supported, so tuple arguments
207
+ must be one-tuples. If `n_rows` is not false, `idx` will be truncated to
208
+ `n_rows` before reading. To use with a list of files, can pass in a list of
209
+ `idx`'s (one for each file) or use a long contiguous list (e.g. built from a previous
201
210
  identical read). If used in conjunction with `start_row` and `n_rows`,
202
211
  will be sliced to obey those constraints, where `n_rows` is
203
212
  interpreted as the (max) number of *selected* values (in `idx`) to be
204
- read out.
213
+ read out. Note that the ``use_h5idx`` parameter controls some behaviour of the
214
+ read and that the default behavior (``use_h5idx=False``) prioritizes speed over
215
+ a small memory penalty.
216
+ use_h5idx
217
+ ``True`` will directly pass the ``idx`` parameter to the underlying
218
+ ``h5py`` call such that only the selected rows are read directly into memory,
219
+ which conserves memory at the cost of speed. There can be a significant penalty
220
+ to speed for larger files (1 - 2 orders of magnitude longer time).
221
+ ``False`` (default) will read the entire object into memory before
222
+ performing the indexing. The default is much faster but requires additional memory,
223
+ though a relatively small amount in the typical use case. It is recommended to
224
+ leave this parameter as its default.
205
225
  field_mask
206
226
  For tables and structs, determines which fields get written out.
207
227
  Only applies to immediate fields of the requested objects. If a dict
@@ -223,6 +243,7 @@ class LH5Store:
223
243
  after reading. The option has no effect on data encoded with HDF5
224
244
  built-in filters, which is always decompressed upstream by HDF5.
225
245
 
246
+
226
247
  Returns
227
248
  -------
228
249
  (object, n_rows_read)
@@ -236,6 +257,14 @@ class LH5Store:
236
257
  if not isinstance(lh5_file, (str, h5py.File)):
237
258
  lh5_file = list(lh5_file)
238
259
  n_rows_read = 0
260
+
261
+ # to know whether we are reading in a list of files.
262
+ # this is part of the fix for reading data by idx
263
+ # (see https://github.com/legend-exp/legend-pydataobj/issues/29)
264
+ # so that we only make a copy of the data if absolutely necessary
265
+ # or if we can read the data from file without having to make a copy
266
+ self.in_file_loop = True
267
+
239
268
  for i, h5f in enumerate(lh5_file):
240
269
  if isinstance(idx, list) and len(idx) > 0 and not np.isscalar(idx[0]):
241
270
  # a list of lists: must be one per file
@@ -255,22 +284,32 @@ class LH5Store:
255
284
  else:
256
285
  idx_i = None
257
286
  n_rows_i = n_rows - n_rows_read
287
+
288
+ # maybe someone passed in a list of len==1?
289
+ if i == (len(lh5_file) - 1):
290
+ self.in_file_loop = False
291
+
258
292
  obj_buf, n_rows_read_i = self.read_object(
259
293
  name,
260
294
  lh5_file[i],
261
295
  start_row=start_row,
262
296
  n_rows=n_rows_i,
263
297
  idx=idx_i,
298
+ use_h5idx=use_h5idx,
264
299
  field_mask=field_mask,
265
300
  obj_buf=obj_buf,
266
301
  obj_buf_start=obj_buf_start,
267
302
  decompress=decompress,
268
303
  )
304
+
269
305
  n_rows_read += n_rows_read_i
270
306
  if n_rows_read >= n_rows or obj_buf is None:
271
307
  return obj_buf, n_rows_read
272
308
  start_row = 0
273
309
  obj_buf_start += n_rows_read_i
310
+
311
+ self.in_file_loop = False
312
+
274
313
  return obj_buf, n_rows_read
275
314
 
276
315
  # get the file from the store
@@ -358,6 +397,7 @@ class LH5Store:
358
397
  start_row=start_row,
359
398
  n_rows=n_rows,
360
399
  idx=idx,
400
+ use_h5idx=use_h5idx,
361
401
  decompress=decompress,
362
402
  )
363
403
  # modify datatype in attrs if a field_mask was used
@@ -404,6 +444,7 @@ class LH5Store:
404
444
  start_row=start_row,
405
445
  n_rows=n_rows,
406
446
  idx=idx,
447
+ use_h5idx=use_h5idx,
407
448
  obj_buf=fld_buf,
408
449
  obj_buf_start=obj_buf_start,
409
450
  decompress=decompress,
@@ -497,6 +538,7 @@ class LH5Store:
497
538
  start_row=start_row,
498
539
  n_rows=n_rows,
499
540
  idx=idx,
541
+ use_h5idx=use_h5idx,
500
542
  obj_buf=None if decompress else decoded_size_buf,
501
543
  obj_buf_start=0 if decompress else obj_buf_start,
502
544
  )
@@ -508,6 +550,7 @@ class LH5Store:
508
550
  start_row=start_row,
509
551
  n_rows=n_rows,
510
552
  idx=idx,
553
+ use_h5idx=use_h5idx,
511
554
  obj_buf=None if decompress else encoded_data_buf,
512
555
  obj_buf_start=0 if decompress else obj_buf_start,
513
556
  )
@@ -573,6 +616,7 @@ class LH5Store:
573
616
  start_row=start_row,
574
617
  n_rows=n_rows,
575
618
  idx=idx,
619
+ use_h5idx=use_h5idx,
576
620
  obj_buf=cumulen_buf,
577
621
  obj_buf_start=obj_buf_start,
578
622
  )
@@ -597,6 +641,7 @@ class LH5Store:
597
641
  start_row=start_row,
598
642
  n_rows=n_rows,
599
643
  idx=idx2,
644
+ use_h5idx=use_h5idx,
600
645
  )
601
646
  fd_starts = fd_starts.nda # we just need the nda
602
647
  if fd_start is None:
@@ -679,6 +724,7 @@ class LH5Store:
679
724
  start_row=fd_start,
680
725
  n_rows=fd_n_rows,
681
726
  idx=fd_idx,
727
+ use_h5idx=use_h5idx,
682
728
  obj_buf=fd_buf,
683
729
  obj_buf_start=fd_buf_start,
684
730
  )
@@ -722,9 +768,22 @@ class LH5Store:
722
768
  if n_rows_to_read > n_rows:
723
769
  n_rows_to_read = n_rows
724
770
 
771
+ # if idx is passed, check if we can make it a slice instead (faster)
772
+ change_idx_to_slice = False
773
+
725
774
  # prepare the selection for the read. Use idx if available
726
775
  if idx is not None:
727
- source_sel = idx
776
+ # check if idx is empty and convert to slice instead
777
+ if len(idx[0]) == 0:
778
+ source_sel = np.s_[0:0]
779
+ change_idx_to_slice = True
780
+ # check if idx is contiguous and increasing
781
+ # if so, convert it to a slice instead (faster)
782
+ elif np.all(np.diff(idx[0]) == 1):
783
+ source_sel = np.s_[idx[0][0] : idx[0][-1] + 1]
784
+ change_idx_to_slice = True
785
+ else:
786
+ source_sel = idx
728
787
  else:
729
788
  source_sel = np.s_[start_row : start_row + n_rows_to_read]
730
789
 
@@ -734,14 +793,34 @@ class LH5Store:
734
793
  if len(obj_buf) < buf_size:
735
794
  obj_buf.resize(buf_size)
736
795
  dest_sel = np.s_[obj_buf_start:buf_size]
737
- h5f[name].read_direct(obj_buf.nda, source_sel, dest_sel)
796
+
797
+ # this is required to make the read of multiple files faster
798
+ # until a better solution found.
799
+ if change_idx_to_slice or idx is None or use_h5idx:
800
+ h5f[name].read_direct(obj_buf.nda, source_sel, dest_sel)
801
+ else:
802
+ # it is faster to read the whole object and then do fancy indexing
803
+ obj_buf.nda[dest_sel] = h5f[name][...][source_sel]
804
+
738
805
  nda = obj_buf.nda
739
806
  else:
740
807
  if n_rows == 0:
741
808
  tmp_shape = (0,) + h5f[name].shape[1:]
742
809
  nda = np.empty(tmp_shape, h5f[name].dtype)
743
810
  else:
744
- nda = h5f[name][source_sel]
811
+ if change_idx_to_slice or idx is None or use_h5idx:
812
+ nda = h5f[name][source_sel]
813
+ else:
814
+ # it is faster to read the whole object and then do fancy indexing
815
+ nda = h5f[name][...][source_sel]
816
+
817
+ # if reading a list of files recursively, this is given to obj_buf on
818
+ # the first file read. obj_buf needs to be resized and therefore
819
+ # it needs to hold the data itself (not a view of the data).
820
+ # a view is returned by the source_sel indexing, which cannot be resized
821
+ # by ndarray.resize().
822
+ if hasattr(self, "in_file_loop") and self.in_file_loop:
823
+ nda = np.copy(nda)
745
824
 
746
825
  # special handling for bools
747
826
  # (c and Julia store as uint8 so cast to bool)
@@ -781,7 +860,7 @@ class LH5Store:
781
860
  n_rows: int = None,
782
861
  wo_mode: str = "append",
783
862
  write_start: int = 0,
784
- hdf5_compression: str | h5py.filters.FilterRefBase = DEFAULT_HDF5_COMPRESSION,
863
+ **h5py_kwargs,
785
864
  ) -> None:
786
865
  """Write an LGDO into an LH5 file.
787
866
 
@@ -796,20 +875,30 @@ class LH5Store:
796
875
  passed directly to :meth:`h5py.Group.create_dataset`.
797
876
 
798
877
  :class:`.WaveformCodec` object
799
- If `obj` is a :class:`.WaveformTable`, compress its `values` using
800
- this algorithm. More documentation about the supported waveform
801
- compression algorithms at :mod:`.lgdo.compression`.
878
+ If `obj` is a :class:`.WaveformTable` and ``obj.values`` holds the
879
+ attribute, compress ``values`` using this algorithm. More
880
+ documentation about the supported waveform compression algorithms at
881
+ :mod:`.lgdo.compression`.
882
+
883
+ If the `obj` :class:`.LGDO` has a `hdf5_settings` attribute holding a
884
+ dictionary, it is interpreted as a list of keyword arguments to be
885
+ forwarded directly to :meth:`h5py.Group.create_dataset` (exactly like
886
+ the first format of `compression` above). This is the preferred way to
887
+ specify HDF5 dataset options such as chunking etc. If compression
888
+ options are specified, they take precedence over those set with the
889
+ `compression` attribute.
802
890
 
803
891
  Note
804
892
  ----
805
- The `compression` attribute takes precedence over the
806
- `hdf5_compression` argument and is not written to disk.
893
+ The `compression` LGDO attribute takes precedence over the default HDF5
894
+ compression settings. The `hdf5_settings` attribute takes precedence
895
+ over `compression`. These attributes are not written to disk.
807
896
 
808
897
  Note
809
898
  ----
810
- HDF5 compression is skipped for the `encoded_data` dataset of
811
- :class:`.VectorOfEncodedVectors` and
812
- :class`.ArrayOfEncodedEqualSizedArrays`.
899
+ HDF5 compression is skipped for the `encoded_data.flattened_data`
900
+ dataset of :class:`.VectorOfEncodedVectors` and
901
+ :class:`.ArrayOfEncodedEqualSizedArrays`.
813
902
 
814
903
  Parameters
815
904
  ----------
@@ -845,15 +934,17 @@ class LH5Store:
845
934
  write_start
846
935
  row in the output file (if already existing) to start overwriting
847
936
  from.
848
- hdf5_compression
849
- HDF5 compression filter to be applied before writing non-scalar
850
- datasets. **Ignored if compression is specified as an `obj`
851
- attribute.**
937
+ **h5py_kwargs
938
+ additional keyword arguments forwarded to
939
+ :meth:`h5py.Group.create_dataset` to specify, for example, an HDF5
940
+ compression filter to be applied before writing non-scalar
941
+ datasets. **Note: `compression` Ignored if compression is specified
942
+ as an `obj` attribute.**
852
943
  """
853
944
  log.debug(
854
945
  f"writing {repr(obj)}[{start_row}:{n_rows}] as "
855
946
  f"{lh5_file}:{group}/{name}[{write_start}:], "
856
- f"mode = {wo_mode}, hdf5_compression = {hdf5_compression}"
947
+ f"mode = {wo_mode}, h5py_kwargs = {h5py_kwargs}"
857
948
  )
858
949
 
859
950
  if wo_mode == "write_safe":
@@ -926,8 +1017,8 @@ class LH5Store:
926
1017
  for field in obj.keys():
927
1018
  # eventually compress waveform table values with LGDO's
928
1019
  # custom codecs before writing
929
- # if waveformtable.values.attrs["compression"] is a string,
930
- # interpret it as an HDF5 built-in filter
1020
+ # if waveformtable.values.attrs["compression"] is NOT a
1021
+ # WaveformCodec, just leave it there
931
1022
  obj_fld = None
932
1023
  if (
933
1024
  isinstance(obj, WaveformTable)
@@ -953,7 +1044,7 @@ class LH5Store:
953
1044
  n_rows=n_rows,
954
1045
  wo_mode=wo_mode,
955
1046
  write_start=write_start,
956
- hdf5_compression=hdf5_compression,
1047
+ **h5py_kwargs,
957
1048
  )
958
1049
  return
959
1050
 
@@ -977,6 +1068,9 @@ class LH5Store:
977
1068
  name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
978
1069
  )
979
1070
 
1071
+ # ask not to further compress flattened_data, it is already compressed!
1072
+ obj.encoded_data.flattened_data.attrs["compression"] = None
1073
+
980
1074
  self.write_object(
981
1075
  obj.encoded_data,
982
1076
  "encoded_data",
@@ -986,7 +1080,7 @@ class LH5Store:
986
1080
  n_rows=n_rows,
987
1081
  wo_mode=wo_mode,
988
1082
  write_start=write_start,
989
- hdf5_compression=None, # data is already compressed!
1083
+ **h5py_kwargs,
990
1084
  )
991
1085
 
992
1086
  self.write_object(
@@ -998,7 +1092,7 @@ class LH5Store:
998
1092
  n_rows=n_rows,
999
1093
  wo_mode=wo_mode,
1000
1094
  write_start=write_start,
1001
- hdf5_compression=hdf5_compression,
1095
+ **h5py_kwargs,
1002
1096
  )
1003
1097
 
1004
1098
  # vector of vectors
@@ -1034,7 +1128,7 @@ class LH5Store:
1034
1128
  n_rows=fd_n_rows,
1035
1129
  wo_mode=wo_mode,
1036
1130
  write_start=offset,
1037
- hdf5_compression=hdf5_compression,
1131
+ **h5py_kwargs,
1038
1132
  )
1039
1133
 
1040
1134
  # now offset is used to give appropriate in-file values for
@@ -1057,7 +1151,7 @@ class LH5Store:
1057
1151
  n_rows=n_rows,
1058
1152
  wo_mode=wo_mode,
1059
1153
  write_start=write_start,
1060
- hdf5_compression=hdf5_compression,
1154
+ **h5py_kwargs,
1061
1155
  )
1062
1156
  obj.cumulative_length.nda -= cl_dtype(offset)
1063
1157
 
@@ -1077,29 +1171,39 @@ class LH5Store:
1077
1171
  # need to create dataset from ndarray the first time for speed
1078
1172
  # creating an empty dataset and appending to that is super slow!
1079
1173
  if (wo_mode != "a" and write_start == 0) or name not in group:
1174
+ # this is needed in order to have a resizable (in the first
1175
+ # axis) data set, i.e. rows can be appended later
1176
+ # NOTE: this automatically turns chunking on!
1080
1177
  maxshape = (None,) + nda.shape[1:]
1178
+ h5py_kwargs.setdefault("maxshape", maxshape)
1179
+
1081
1180
  if wo_mode == "o" and name in group:
1082
1181
  log.debug(f"overwriting {name} in {group}")
1083
1182
  del group[name]
1084
1183
 
1184
+ # set default compression options
1185
+ for k, v in DEFAULT_HDF5_SETTINGS.items():
1186
+ h5py_kwargs.setdefault(k, v)
1187
+
1188
+ # compress using the 'compression' LGDO attribute, if available
1189
+ if "compression" in obj.attrs:
1190
+ comp_algo = obj.attrs["compression"]
1191
+ if isinstance(comp_algo, dict):
1192
+ h5py_kwargs |= obj.attrs["compression"]
1193
+ else:
1194
+ h5py_kwargs["compression"] = obj.attrs["compression"]
1195
+
1196
+ # and even the 'hdf5_settings' one, preferred
1197
+ if "hdf5_settings" in obj.attrs:
1198
+ h5py_kwargs |= obj.attrs["hdf5_settings"]
1199
+
1085
1200
  # create HDF5 dataset
1086
- # - compress using the 'compression' LGDO attribute, if
1087
- # available
1088
- # - otherwise use "hdf5_compression"
1089
- # - attach HDF5 dataset attributes, but not "compression"!
1090
- comp_algo = obj.attrs.get("compression", hdf5_compression)
1091
- comp_kwargs = {}
1092
- if isinstance(comp_algo, str):
1093
- comp_kwargs = {"compression": comp_algo}
1094
- elif comp_algo is not None:
1095
- comp_kwargs = comp_algo
1096
-
1097
- ds = group.create_dataset(
1098
- name, data=nda, maxshape=maxshape, **comp_kwargs
1099
- )
1201
+ ds = group.create_dataset(name, data=nda, **h5py_kwargs)
1100
1202
 
1203
+ # attach HDF5 dataset attributes, but not "compression"!
1101
1204
  _attrs = obj.getattrs(datatype=True)
1102
1205
  _attrs.pop("compression", None)
1206
+ _attrs.pop("hdf5_settings", None)
1103
1207
  ds.attrs.update(_attrs)
1104
1208
  return
1105
1209