h5netcdf 1.3.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of h5netcdf might be problematic. Click here for more details.

h5netcdf/core.py CHANGED
@@ -1,5 +1,5 @@
1
1
  # For details on how netCDF4 builds on HDF5:
2
- # http://www.unidata.ucar.edu/software/netcdf/docs/file_format_specifications.html#netcdf_4_spec
2
+ # https://docs.unidata.ucar.edu/netcdf-c/current/file_format_specifications.html#netcdf_4_spec
3
3
  import os.path
4
4
  import warnings
5
5
  import weakref
@@ -54,9 +54,11 @@ def _transform_1d_boolean_indexers(key):
54
54
  # return key, if not iterable
55
55
  try:
56
56
  key = [
57
- np.asanyarray(k).nonzero()[0]
58
- if isinstance(k, (np.ndarray, list)) and type(k[0]) in (bool, np.bool_)
59
- else k
57
+ (
58
+ np.asanyarray(k).nonzero()[0]
59
+ if isinstance(k, (np.ndarray, list)) and type(k[0]) in (bool, np.bool_)
60
+ else k
61
+ )
60
62
  for k in key
61
63
  ]
62
64
  except TypeError:
@@ -106,13 +108,11 @@ def _expanded_indexer(key, ndim):
106
108
  return key[k1] + res_dims + key[k2]
107
109
 
108
110
 
109
- class BaseVariable:
110
- def __init__(self, parent, name, dimensions=None):
111
+ class BaseObject:
112
+ def __init__(self, parent, name):
111
113
  self._parent_ref = weakref.ref(parent)
112
114
  self._root_ref = weakref.ref(parent._root)
113
115
  self._h5path = _join_h5paths(parent.name, name)
114
- self._dimensions = dimensions
115
- self._initialized = True
116
116
 
117
117
  @property
118
118
  def _parent(self):
@@ -128,11 +128,149 @@ class BaseVariable:
128
128
  # subclasses:
129
129
  return self._root._h5file[self._h5path]
130
130
 
131
+ @property
132
+ def name(self):
133
+ """Return object name."""
134
+ return self._h5ds.name
135
+
136
+ @property
137
+ def dtype(self):
138
+ """Return NumPy dtype giving object’s dtype."""
139
+ return self._h5ds.dtype
140
+
141
+
142
+ _h5type_mapping = {
143
+ "H5T_COMPOUND": 6,
144
+ "H5T_ENUM": 8,
145
+ "H5T_VLEN": 9,
146
+ }
147
+
148
+
149
+ def _get_h5usertype_identifier(h5type):
150
+ """Return H5 Type Identifier from given H5 Datatype."""
151
+ try:
152
+ # h5py first
153
+ h5typeid = h5type.id.get_class()
154
+ except AttributeError:
155
+ # h5pyd second
156
+ h5typeid = _h5type_mapping[h5type.id.type_json["class"]]
157
+ return h5typeid
158
+
159
+
160
+ def _get_h5dstype_identifier(h5type):
161
+ """Return H5 Type Identifier from given H5 Dataset."""
162
+ try:
163
+ # h5py first
164
+ h5typeid = h5type.id.get_type().get_class()
165
+ except AttributeError:
166
+ # h5pyd second
167
+ h5typeid = _h5type_mapping[h5type.id.type_json["class"]]
168
+ return h5typeid
169
+
170
+
171
+ class UserType(BaseObject):
172
+ _cls_name = "h5netcdf.UserType"
173
+
174
+ @property
175
+ def name(self):
176
+ """Return user type name."""
177
+ # strip hdf5 path
178
+ return super().name.split("/")[-1]
179
+
180
+ def __repr__(self):
181
+ if self._parent._root._closed:
182
+ return f"<Closed {self._cls_name!r}>"
183
+ header = f"<class {self._cls_name!r}: name = {self.name!r}, numpy dtype = {self.dtype!r}"
184
+ return header
185
+
186
+ @property
187
+ def _h5type_identifier(self):
188
+ """Returns type identifier.
189
+
190
+ See https://api.h5py.org/h5t.html#datatype-class-codes and
191
+ https://docs.hdfgroup.org (enum H5T_class_t)
192
+
193
+ """
194
+ return _get_h5usertype_identifier(self._h5ds)
195
+
196
+ @property
197
+ def _h5datatype(self):
198
+ """Returns comparable h5type.
199
+
200
+ - DatatypeID for h5py
201
+ - (dtype, dtype.metadata) for h5pyd
202
+ """
203
+ if self._root._h5py.__name__ == "h5py":
204
+ return self._h5ds.id
205
+ else:
206
+ return self.dtype, self.dtype.metadata
207
+
208
+
209
+ class EnumType(UserType):
210
+ _cls_name = "h5netcdf.EnumType"
211
+
212
+ @property
213
+ def enum_dict(self):
214
+ """Dictionary containing the Enum field/value pairs."""
215
+ return self.dtype.metadata["enum"]
216
+
217
+ def __repr__(self):
218
+ return super().__repr__() + f", fields / values = {self.enum_dict!r}"
219
+
220
+
221
+ class VLType(UserType):
222
+ _cls_name = "h5netcdf.VLType"
223
+
224
+
225
+ def _string_to_char_array_dtype(dtype):
226
+ """Converts fixed string to char array dtype."""
227
+ if dtype.kind == "c":
228
+ return None
229
+ return np.dtype(
230
+ {
231
+ name: (
232
+ np.dtype(("S1", fmt.itemsize)) if fmt.kind == "S" else fmt,
233
+ offset,
234
+ )
235
+ for name, (fmt, offset) in dtype.fields.items()
236
+ }
237
+ )
238
+
239
+
240
+ def _char_array_to_string_dtype(dtype):
241
+ """Converts char array to fixed string dtype."""
242
+ if dtype.kind == "c":
243
+ return None
244
+ return np.dtype(
245
+ {
246
+ name: (
247
+ np.dtype(f"S{fmt.shape[0]}") if fmt.base == "S1" else fmt,
248
+ offset,
249
+ )
250
+ for name, (fmt, offset) in dtype.fields.items()
251
+ }
252
+ )
253
+
254
+
255
+ class CompoundType(UserType):
256
+ _cls_name = "h5netcdf.CompoundType"
257
+
258
+ @property
259
+ def dtype_view(self):
260
+ return _char_array_to_string_dtype(self.dtype)
261
+
262
+
263
+ class BaseVariable(BaseObject):
264
+ def __init__(self, parent, name, dimensions=None):
265
+ super().__init__(parent, name)
266
+ self._dimensions = dimensions
267
+ self._initialized = True
268
+
131
269
  @property
132
270
  def name(self):
133
271
  """Return variable name."""
134
272
  # fix name if _nc4_non_coord_
135
- return self._h5ds.name.replace("_nc4_non_coord_", "")
273
+ return super().name.replace("_nc4_non_coord_", "")
136
274
 
137
275
  def _lookup_dimensions(self):
138
276
  attrs = self._h5ds.attrs
@@ -253,6 +391,31 @@ class BaseVariable:
253
391
  if self._h5ds.shape != new_shape:
254
392
  self._h5ds.resize(new_shape)
255
393
 
394
+ def _add_fillvalue(self, fillvalue):
395
+ """Add _FillValue attribute.
396
+
397
+ This method takes care of adding fillvalue with the wanted
398
+ variable dtype.
399
+ """
400
+
401
+ # trying to create correct type of fillvalue
402
+ if self.dtype is str:
403
+ value = fillvalue
404
+ else:
405
+ # todo: this always checks for dtype.metadata
406
+ string_info = self._root._h5py.check_string_dtype(self.dtype)
407
+ enum_info = self._root._h5py.check_enum_dtype(self.dtype)
408
+ if (
409
+ string_info
410
+ and string_info.length is not None
411
+ and string_info.length > 1
412
+ ) or enum_info:
413
+ value = fillvalue
414
+ else:
415
+ value = self.dtype.type(fillvalue)
416
+
417
+ self.attrs["_FillValue"] = value
418
+
256
419
  @property
257
420
  def dimensions(self):
258
421
  """Return variable dimension names."""
@@ -268,16 +431,52 @@ class BaseVariable:
268
431
 
269
432
  @property
270
433
  def ndim(self):
271
- """Return number variable dimensions"""
434
+ """Return number of variable dimensions."""
272
435
  return len(self.shape)
273
436
 
274
437
  def __len__(self):
275
438
  return self.shape[0]
276
439
 
277
440
  @property
278
- def dtype(self):
279
- """Return NumPy dtype object giving the variable’s type."""
280
- return self._h5ds.dtype
441
+ def _h5type_identifier(self):
442
+ """Returns type identifier.
443
+
444
+ See https://api.h5py.org/h5t.html#datatype-class-codes and
445
+ https://docs.hdfgroup.org (enum H5T_class_t)
446
+
447
+ """
448
+ return _get_h5dstype_identifier(self._h5ds)
449
+
450
+ @property
451
+ def _h5datatype(self):
452
+ """Returns comparable h5type.
453
+
454
+ This property can be used to compare two variables/datatypes or
455
+ a variable and a datatype for equality of the underlying datatype.
456
+
457
+ - DatatypeID for h5py
458
+ - (dtype, dtype.metadata) for h5pyd
459
+ """
460
+ if self._root._h5py.__name__ == "h5py":
461
+ return self._h5ds.id.get_type()
462
+ else:
463
+ return self.dtype, self.dtype.metadata
464
+
465
+ @property
466
+ def datatype(self):
467
+ """Return datatype.
468
+
469
+ Returns numpy dtype (for primitive types) or VLType/CompoundType/EnumType
470
+ instance (for compound, vlen or enum data types).
471
+ """
472
+ # this is really painful as we have to iterate over all types
473
+ # and check equality
474
+ usertype = self._parent._get_usertype_dict(self._h5type_identifier)
475
+ if usertype is not None:
476
+ for tid in usertype.values():
477
+ if self._h5datatype == tid._h5datatype:
478
+ return tid
479
+ return self.dtype
281
480
 
282
481
  def _get_padding(self, key):
283
482
  """Return padding if needed, defaults to False."""
@@ -334,28 +533,54 @@ class BaseVariable:
334
533
 
335
534
  # get padding
336
535
  padding = self._get_padding(key)
536
+
337
537
  # apply padding with fillvalue (both api)
338
538
  if padding:
339
539
  fv = self.dtype.type(self._h5ds.fillvalue)
340
- return np.pad(
540
+ h5ds = np.pad(
341
541
  self._h5ds,
342
542
  pad_width=padding,
343
543
  mode="constant",
344
544
  constant_values=fv,
345
- )[key]
545
+ )
546
+ else:
547
+ h5ds = self._h5ds
346
548
 
347
- return self._h5ds[key]
549
+ if (
550
+ isinstance(self.datatype, CompoundType)
551
+ and (view := self.datatype.dtype_view) is not None
552
+ ):
553
+ return h5ds[key].view(view)
554
+ else:
555
+ return h5ds[key]
348
556
 
349
557
  def __setitem__(self, key, value):
350
558
  from .legacyapi import Dataset
351
559
 
560
+ # check if provided values match enumtype values
561
+ if enum_dict := self._root._h5py.check_enum_dtype(self.dtype):
562
+ mask = np.isin(value, list(enum_dict.values()))
563
+ wrong = set(np.asanyarray(value)[~mask])
564
+ if not mask.all():
565
+ raise ValueError(
566
+ f"Trying to assign illegal value(s) {wrong!r} to Enum variable {self.name!r}."
567
+ f" Valid values are {dict(enum_dict)!r}."
568
+ )
569
+
352
570
  if isinstance(self._parent._root, Dataset):
353
571
  # resize on write only for legacyapi
354
572
  key = _expanded_indexer(key, self.ndim)
355
573
  key = _transform_1d_boolean_indexers(key)
356
574
  # resize on write only for legacy API
357
575
  self._maybe_resize_dimensions(key, value)
358
- self._h5ds[key] = value
576
+
577
+ if (
578
+ isinstance(self.datatype, CompoundType)
579
+ and (view := _string_to_char_array_dtype(self.datatype.dtype)) is not None
580
+ ):
581
+ self._h5ds[key] = value.view(view)
582
+ else:
583
+ self._h5ds[key] = value
359
584
 
360
585
  @property
361
586
  def attrs(self):
@@ -368,14 +593,8 @@ class BaseVariable:
368
593
 
369
594
  def __repr__(self):
370
595
  if self._parent._root._closed:
371
- return "<Closed %s>" % self._cls_name
372
- header = "<{} {!r}: dimensions {}, shape {}, dtype {}>".format(
373
- self._cls_name,
374
- self.name,
375
- self.dimensions,
376
- self.shape,
377
- self.dtype,
378
- )
596
+ return f"<Closed {self._cls_name}>"
597
+ header = f"<{self._cls_name} {self.name!r}: dimensions {self.dimensions}, shape {self.shape}, dtype {self.dtype}>"
379
598
  return "\n".join(
380
599
  [header]
381
600
  + ["Attributes:"]
@@ -467,9 +686,164 @@ def _unlabeled_dimension_mix(h5py_dataset):
467
686
  return status
468
687
 
469
688
 
689
+ def _check_dtype(group, dtype):
690
+ """Check and handle dtypes when adding variable to given group.
691
+
692
+ Raises errors and issues warnings according to given dtype.
693
+ """
694
+
695
+ if dtype == np.bool_:
696
+ # never warn since h5netcdf has always errored here
697
+ _invalid_netcdf_feature(
698
+ "boolean dtypes",
699
+ group._root.invalid_netcdf,
700
+ )
701
+ else:
702
+ group._root._check_valid_netcdf_dtype(dtype)
703
+
704
+ # we only allow h5netcdf user types, not named h5py.Datatype
705
+ if isinstance(dtype, group._root._h5py.Datatype):
706
+ raise TypeError(
707
+ f"Argument dtype {dtype!r} is not allowed. "
708
+ f"Please provide h5netcdf user type or numpy compatible type."
709
+ )
710
+
711
+ # is user type is given extract underlying h5py object
712
+ # we just use the h5py user type here
713
+ if isinstance(dtype, (EnumType, VLType, CompoundType)):
714
+ h5type = dtype._h5ds
715
+ if dtype._root._h5file.filename != group._root._h5file.filename:
716
+ raise TypeError(
717
+ f"Given dtype {dtype} is not committed into current file"
718
+ f" {group._root._h5file.filename}. Instead it's committed into"
719
+ f" file {dtype._root._h5file.filename}"
720
+ )
721
+ # check if committed type can be accessed in current group hierarchy
722
+ user_type = group._get_usertype(h5type)
723
+ if user_type is None:
724
+ msg = (
725
+ f"Given dtype {dtype.name!r} is not accessible in current group"
726
+ f" {group._h5group.name!r} or any parent group. Instead it's defined at"
727
+ f" {h5type.name!r}. Please create it in the current or any parent group."
728
+ )
729
+ raise TypeError(msg)
730
+ # this checks for committed types which are overridden by re-definitions
731
+ elif (actual := user_type._h5ds.name) != h5type.name:
732
+ msg = (
733
+ f"Given dtype {dtype.name!r} is defined at {h5type.name!r}."
734
+ f" Another dtype with same name is defined at {actual!r} and"
735
+ f" would override it."
736
+ )
737
+ raise TypeError(msg)
738
+ elif np.dtype(dtype).kind == "c":
739
+ itemsize = np.dtype(dtype).itemsize
740
+ try:
741
+ width = {8: "FLOAT", 16: "DOUBLE"}[itemsize]
742
+ except KeyError as e:
743
+ raise TypeError(
744
+ "Currently only 'complex64' and 'complex128' dtypes are allowed."
745
+ ) from e
746
+ dname = f"_PFNC_{width}_COMPLEX_TYPE"
747
+ # todo check compound type for existing complex types
748
+ # which may be used here
749
+ # if dname is not available in current group-path
750
+ # create and commit type in current group
751
+ if dname not in group._all_cmptypes:
752
+ dtype = group.create_cmptype(dtype, dname).dtype
753
+
754
+ return dtype
755
+
756
+
757
+ def _check_fillvalue(group, fillvalue, dtype):
758
+ """Handles fillvalues when adding variable to given group.
759
+
760
+ Raises errors and issues warnings according to
761
+ given fillvalue and dtype.
762
+ """
763
+
764
+ # handling default fillvalues for legacyapi
765
+ # see https://github.com/h5netcdf/h5netcdf/issues/182
766
+ from .legacyapi import Dataset, _get_default_fillvalue
767
+
768
+ stacklevel = 5 if isinstance(group._root, Dataset) else 4
769
+
770
+ h5fillvalue = fillvalue
771
+
772
+ # if no fillvalue is provided take netcdf4 default values for legacyapi
773
+ if fillvalue is None:
774
+ if isinstance(group._root, Dataset):
775
+ h5fillvalue = _get_default_fillvalue(dtype)
776
+
777
+ # handling for EnumType
778
+ if dtype is not None and isinstance(dtype, EnumType):
779
+ if fillvalue is None:
780
+ # 1. we need to warn the user that writing enums with default values
781
+ # which are defined in the enum dict will mask those values
782
+ if (h5fillvalue or 0) in dtype.enum_dict.values():
783
+ reverse = dict((v, k) for k, v in dtype.enum_dict.items())
784
+ msg = (
785
+ f"Creating variable with default fill_value {h5fillvalue or 0!r}"
786
+ f" which IS defined in enum type {dtype!r}."
787
+ f" This will mask entry {{{reverse[h5fillvalue or 0]!r}: {h5fillvalue or 0!r}}}."
788
+ )
789
+ warnings.warn(msg, stacklevel=stacklevel)
790
+ else:
791
+ # 2. we need to raise if the default fillvalue is not within the enum dict
792
+ if (
793
+ h5fillvalue is not None
794
+ and h5fillvalue not in dtype.enum_dict.values()
795
+ ):
796
+ msg = (
797
+ f"Creating variable with default fill_value {h5fillvalue!r}"
798
+ f" which IS NOT defined in enum type {dtype!r}."
799
+ f" Please provide a fitting fill_value or enum type."
800
+ )
801
+ raise ValueError(msg)
802
+ if h5fillvalue is None and 0 not in dtype.enum_dict.values():
803
+ # 3. we should inform the user that a fillvalue of '0'
804
+ # will be interpreted as _UNDEFINED in netcdf-c
805
+ # if it is not defined in the enum dict
806
+ msg = (
807
+ f"Creating variable with default fill_value {0!r}"
808
+ f" which IS NOT defined in enum type {dtype!r}."
809
+ f" Value {0!r} will be interpreted as '_UNDEFINED' by netcdf-c."
810
+ )
811
+ warnings.warn(msg, stacklevel=stacklevel)
812
+ else:
813
+ if h5fillvalue not in dtype.enum_dict.values():
814
+ # 4. we should inform the user that a fillvalue of '0'
815
+ # will be interpreted as _UNDEFINED in netcdf-c
816
+ # if it is not defined in the enum dict
817
+ if h5fillvalue == 0:
818
+ msg = (
819
+ f"Creating variable with specified fill_value {h5fillvalue!r}"
820
+ f" which IS NOT defined in enum type {dtype!r}."
821
+ f" Value {0!r} will be interpreted as '_UNDEFINED' by netcdf-c."
822
+ )
823
+ warnings.warn(msg, stacklevel=stacklevel)
824
+ # 5. we need to raise if the fillvalue is not within the enum_dict
825
+ else:
826
+ msg = (
827
+ f"Creating variable with specified fill_value {h5fillvalue!r}"
828
+ f" which IS NOT defined in enum type {dtype!r}."
829
+ f" Please provide a matching fill_value or enum type."
830
+ )
831
+ raise ValueError(msg)
832
+
833
+ if fillvalue is not None:
834
+ # cast to wanted type
835
+ fillvalue = np.array(h5fillvalue).astype(dtype)
836
+ h5fillvalue = fillvalue
837
+
838
+ return fillvalue, h5fillvalue
839
+
840
+
470
841
  class Group(Mapping):
471
842
  _variable_cls = Variable
472
843
  _dimension_cls = Dimension
844
+ _enumtype_cls = EnumType
845
+ _vltype_cls = VLType
846
+ _cmptype_cls = CompoundType
473
847
 
474
848
  @property
475
849
  def _group_cls(self):
@@ -486,13 +860,23 @@ class Group(Mapping):
486
860
  self._h5path = _join_h5paths(parent._h5path, name)
487
861
 
488
862
  self._dimensions = Dimensions(self)
863
+ self._enumtypes = _LazyObjectLookup(self, self._enumtype_cls)
864
+ self._vltypes = _LazyObjectLookup(self, self._vltype_cls)
865
+ self._cmptypes = _LazyObjectLookup(self, self._cmptype_cls)
489
866
 
490
867
  # this map keeps track of all dimensions
491
868
  if parent is self:
492
869
  self._all_dimensions = ChainMap(self._dimensions)
870
+ self._all_enumtypes = ChainMap(self._enumtypes)
871
+ self._all_vltypes = ChainMap(self._vltypes)
872
+ self._all_cmptypes = ChainMap(self._cmptypes)
873
+
493
874
  else:
494
875
  self._all_dimensions = parent._all_dimensions.new_child(self._dimensions)
495
876
  self._all_h5groups = parent._all_h5groups.new_child(self._h5group)
877
+ self._all_enumtypes = parent._all_enumtypes.new_child(self._enumtypes)
878
+ self._all_vltypes = parent._all_vltypes.new_child(self._vltypes)
879
+ self._all_cmptypes = parent._all_cmptypes.new_child(self._cmptypes)
496
880
 
497
881
  self._variables = _LazyObjectLookup(self, self._variable_cls)
498
882
  self._groups = _LazyObjectLookup(self, self._group_cls)
@@ -506,6 +890,9 @@ class Group(Mapping):
506
890
  # add to the groups collection if this is a h5py(d) Group
507
891
  # instance
508
892
  self._groups.add(k)
893
+ elif isinstance(v, self._root._h5py.Datatype):
894
+ # add usertypes (enum, vlen, compound)
895
+ self._add_usertype(v)
509
896
  else:
510
897
  if v.attrs.get("CLASS") == b"DIMENSION_SCALE":
511
898
  # add dimension and retrieve size
@@ -587,16 +974,16 @@ class Group(Mapping):
587
974
  for k, v in self._all_dimensions.maps[0].items():
588
975
  if k in value:
589
976
  if v != value[k]:
590
- raise ValueError("cannot modify existing dimension %r" % k)
977
+ raise ValueError(f"cannot modify existing dimension {k:!r}")
591
978
  else:
592
979
  raise ValueError(
593
- "new dimensions do not include existing dimension %r" % k
980
+ f"new dimensions do not include existing dimension {k:!r}"
594
981
  )
595
982
  self._dimensions.update(value)
596
983
 
597
984
  def _create_child_group(self, name):
598
985
  if name in self:
599
- raise ValueError("unable to create group %r (name already exists)" % name)
986
+ raise ValueError(f"unable to create group {name:!r} (name already exists)")
600
987
  kwargs = {}
601
988
  if self._root._h5py.__name__ == "h5py":
602
989
  kwargs.update(track_order=self._track_order)
@@ -641,7 +1028,7 @@ class Group(Mapping):
641
1028
  ):
642
1029
  if name in self:
643
1030
  raise ValueError(
644
- "unable to create variable %r " "(name already exists)" % name
1031
+ f"unable to create variable {name:!r} (name already exists)"
645
1032
  )
646
1033
  if data is not None:
647
1034
  data = np.asarray(data)
@@ -649,14 +1036,8 @@ class Group(Mapping):
649
1036
  if dtype is None:
650
1037
  dtype = data.dtype
651
1038
 
652
- if dtype == np.bool_:
653
- # never warn since h5netcdf has always errored here
654
- _invalid_netcdf_feature(
655
- "boolean dtypes",
656
- self._root.invalid_netcdf,
657
- )
658
- else:
659
- self._root._check_valid_netcdf_dtype(dtype)
1039
+ # check and handle dtypes
1040
+ dtype = _check_dtype(self, dtype)
660
1041
 
661
1042
  if "scaleoffset" in kwargs:
662
1043
  _invalid_netcdf_feature(
@@ -708,8 +1089,8 @@ class Group(Mapping):
708
1089
  pass
709
1090
  else:
710
1091
  raise ValueError(
711
- "got unrecognized value %s for chunking_heuristic argument "
712
- '(has to be "h5py" or "h5netcdf")' % chunking_heuristic
1092
+ f"got unrecognized value {chunking_heuristic} for chunking_heuristic argument "
1093
+ '(has to be "h5py" or "h5netcdf")'
713
1094
  )
714
1095
 
715
1096
  # Clear dummy HDF5 datasets with this name that were created for a
@@ -724,13 +1105,8 @@ class Group(Mapping):
724
1105
  if self._root._h5py.__name__ == "h5py":
725
1106
  kwargs.update(dict(track_order=self._parent._track_order))
726
1107
 
727
- # handling default fillvalues for legacyapi
728
- # see https://github.com/h5netcdf/h5netcdf/issues/182
729
- from .legacyapi import Dataset, _get_default_fillvalue
730
-
731
- fillval = fillvalue
732
- if fillvalue is None and isinstance(self._parent._root, Dataset):
733
- fillval = _get_default_fillvalue(dtype)
1108
+ # fill value handling
1109
+ fillvalue, h5fillvalue = _check_fillvalue(self, fillvalue, dtype)
734
1110
 
735
1111
  # create hdf5 variable
736
1112
  self._h5group.create_dataset(
@@ -739,7 +1115,7 @@ class Group(Mapping):
739
1115
  dtype=dtype,
740
1116
  data=data,
741
1117
  chunks=chunks,
742
- fillvalue=fillval,
1118
+ fillvalue=h5fillvalue,
743
1119
  **kwargs,
744
1120
  )
745
1121
 
@@ -768,22 +1144,10 @@ class Group(Mapping):
768
1144
  # Todo: get this consistent with netcdf-c/netcdf4-python
769
1145
  variable._ensure_dim_id()
770
1146
 
1147
+ # add fillvalue attribute to variable
771
1148
  if fillvalue is not None:
772
- # trying to create correct type of fillvalue
773
- if variable.dtype is str:
774
- value = fillvalue
775
- else:
776
- string_info = self._root._h5py.check_string_dtype(variable.dtype)
777
- if (
778
- string_info
779
- and string_info.length is not None
780
- and string_info.length > 1
781
- ):
782
- value = fillvalue
783
- else:
784
- value = variable.dtype.type(fillvalue)
1149
+ variable._add_fillvalue(fillvalue)
785
1150
 
786
- variable.attrs._h5attrs["_FillValue"] = value
787
1151
  return variable
788
1152
 
789
1153
  def create_variable(
@@ -807,8 +1171,8 @@ class Group(Mapping):
807
1171
  dimensions : tuple
808
1172
  Tuple containing dimension name strings. Defaults to empty tuple, effectively
809
1173
  creating a scalar variable.
810
- dtype : numpy.dtype, str, optional
811
- Dataype of the new variable. Defaults to None.
1174
+ dtype : numpy.dtype, str, UserType (Enum, VL, Compound), optional
1175
+ Datatype of the new variable. Defaults to None.
812
1176
  fillvalue : scalar, optional
813
1177
  Specify fillvalue for uninitialized parts of the variable. Defaults to ``None``.
814
1178
  chunks : tuple, optional
@@ -841,6 +1205,7 @@ class Group(Mapping):
841
1205
  var : h5netcdf.Variable
842
1206
  Variable class instance
843
1207
  """
1208
+
844
1209
  # if root-variable
845
1210
  if name.startswith("/"):
846
1211
  # handling default fillvalues for legacyapi
@@ -918,6 +1283,48 @@ class Group(Mapping):
918
1283
  def variables(self):
919
1284
  return Frozen(self._variables)
920
1285
 
1286
+ def _add_usertype(self, h5type):
1287
+ """Add usertype to related usertype dict.
1288
+
1289
+ The type is added by name to the dict attached to current group.
1290
+ """
1291
+ name = h5type.name.split("/")[-1]
1292
+ h5typeid = _get_h5usertype_identifier(h5type)
1293
+ # add usertype to corresponding dict
1294
+ self._get_usertype_dict(h5typeid).maps[0].add(name)
1295
+
1296
+ def _get_usertype(self, h5type):
1297
+ """Get usertype from related usertype dict."""
1298
+ h5typeid = _get_h5usertype_identifier(h5type)
1299
+ return self._get_usertype_dict(h5typeid).get(h5type.name.split("/")[-1])
1300
+
1301
+ def _get_usertype_dict(self, h5typeid):
1302
+ """Return usertype-dict related to given h5 type identifier.
1303
+
1304
+ See https://api.h5py.org/h5t.html#datatype-class-codes and
1305
+ https://docs.hdfgroup.org (enum H5T_class_t)
1306
+ """
1307
+ return {
1308
+ 6: self._all_cmptypes,
1309
+ 8: self._all_enumtypes,
1310
+ 9: self._all_vltypes,
1311
+ }.get(h5typeid)
1312
+
1313
+ @property
1314
+ def enumtypes(self):
1315
+ """Return group defined enum types."""
1316
+ return Frozen(self._enumtypes)
1317
+
1318
+ @property
1319
+ def vltypes(self):
1320
+ """Return group defined vlen types."""
1321
+ return Frozen(self._vltypes)
1322
+
1323
+ @property
1324
+ def cmptypes(self):
1325
+ """Return group defined compound types."""
1326
+ return Frozen(self._cmptypes)
1327
+
921
1328
  @property
922
1329
  def dims(self):
923
1330
  return Frozen(self._dimensions)
@@ -936,9 +1343,11 @@ class Group(Mapping):
936
1343
  + [
937
1344
  " {}: {}".format(
938
1345
  k,
939
- f"Unlimited (current: {self._dimensions[k].size})"
940
- if v is None
941
- else v,
1346
+ (
1347
+ f"Unlimited (current: {self._dimensions[k].size})"
1348
+ if v is None
1349
+ else v
1350
+ ),
942
1351
  )
943
1352
  for k, v in self.dimensions.items()
944
1353
  ]
@@ -967,6 +1376,59 @@ class Group(Mapping):
967
1376
  """
968
1377
  self._dimensions[dim]._resize(size)
969
1378
 
1379
+ def create_enumtype(self, datatype, datatype_name, enum_dict):
1380
+ """Create EnumType.
1381
+
1382
+ datatype: np.dtype
1383
+ A numpy integer dtype object describing the base type for the Enum.
1384
+ datatype_name: string
1385
+ A Python string containing a description of the Enum data type.
1386
+ enum_dict: dict
1387
+ A Python dictionary containing the Enum field/value pairs.
1388
+ """
1389
+ et = self._root._h5py.enum_dtype(enum_dict, basetype=datatype)
1390
+ self._h5group[datatype_name] = et
1391
+ # create enumtype class instance
1392
+ enumtype = self._enumtype_cls(self, datatype_name)
1393
+ self._enumtypes[datatype_name] = enumtype
1394
+ return enumtype
1395
+
1396
+ def create_vltype(self, datatype, datatype_name):
1397
+ """Create VLType.
1398
+
1399
+ datatype: np.dtype
1400
+ A numpy dtype object describing the base type.
1401
+ datatype_name: string
1402
+ A Python string containing a description of the VL data type.
1403
+ """
1404
+ # wrap in numpy dtype first
1405
+ datatype = np.dtype(datatype)
1406
+ et = self._root._h5py.vlen_dtype(datatype)
1407
+ self._h5group[datatype_name] = et
1408
+ # create vltype class instance
1409
+ vltype = self._vltype_cls(self, datatype_name)
1410
+ self._vltypes[datatype_name] = vltype
1411
+ return vltype
1412
+
1413
+ def create_cmptype(self, datatype, datatype_name):
1414
+ """Create CompoundType.
1415
+
1416
+ datatype: np.dtype
1417
+ A numpy dtype object describing the structured type.
1418
+ datatype_name: string
1419
+ A Python string containing a description of the compound data type.
1420
+ """
1421
+ # wrap in numpy dtype first
1422
+ datatype = np.dtype(datatype)
1423
+ if (new_dtype := _string_to_char_array_dtype(datatype)) is not None:
1424
+ # "SN" -> ("S1", (N,))
1425
+ datatype = new_dtype
1426
+ self._h5group[datatype_name] = datatype
1427
+ # create compound class instance
1428
+ cmptype = self._cmptype_cls(self, datatype_name)
1429
+ self._cmptypes[datatype_name] = cmptype
1430
+ return cmptype
1431
+
970
1432
 
971
1433
  class File(Group):
972
1434
  def __init__(self, path, mode="r", invalid_netcdf=False, phony_dims=None, **kwargs):
@@ -975,7 +1437,8 @@ class File(Group):
975
1437
  Parameters
976
1438
  ----------
977
1439
  path: path-like
978
- Location of the netCDF4 file to be accessed.
1440
+ Location of the netCDF4 file to be accessed, or an h5py File object,
1441
+ or a Python file-like object (which should read/write bytes).
979
1442
 
980
1443
  mode: "r", "r+", "a", "w"
981
1444
  A valid file access mode. Defaults to "r".
@@ -1010,6 +1473,10 @@ class File(Group):
1010
1473
 
1011
1474
  Datasets created with h5netcdf version 0.12.0 that are opened with
1012
1475
  newer versions of h5netcdf will continue to disable order tracker.
1476
+
1477
+ If an h5py File object is passed in, closing the h5netcdf wrapper will
1478
+ not close the h5py File. In other cases, closing the h5netcdf File object
1479
+ does close the underlying file.
1013
1480
  """
1014
1481
  # 2022/01/09
1015
1482
  # netCDF4 wants the track_order parameter to be true
@@ -1027,6 +1494,7 @@ class File(Group):
1027
1494
  track_order = kwargs.pop("track_order", track_order_default)
1028
1495
 
1029
1496
  self.decode_vlen_strings = kwargs.pop("decode_vlen_strings", None)
1497
+ self._close_h5file = True
1030
1498
  try:
1031
1499
  if isinstance(path, str):
1032
1500
  if (
@@ -1054,6 +1522,12 @@ class File(Group):
1054
1522
  self._h5file = self._h5py.File(
1055
1523
  path, mode, track_order=track_order, **kwargs
1056
1524
  )
1525
+ elif isinstance(path, h5py.File):
1526
+ self._preexisting_file = mode in {"r", "r+", "a"}
1527
+ self._h5py = h5py
1528
+ self._h5file = path
1529
+ # h5py File passed in: let the caller decide when to close it
1530
+ self._close_h5file = False
1057
1531
  else: # file-like object
1058
1532
  self._preexisting_file = mode in {"r", "r+", "a"}
1059
1533
  self._h5py = h5py
@@ -1129,16 +1603,10 @@ class File(Group):
1129
1603
  def _check_valid_netcdf_dtype(self, dtype):
1130
1604
  dtype = np.dtype(dtype)
1131
1605
 
1132
- if dtype == bool:
1606
+ if dtype == bool: # noqa
1133
1607
  description = "boolean"
1134
- elif dtype == complex:
1135
- description = "complex"
1136
- elif h5py.check_dtype(enum=dtype) is not None:
1137
- description = "enum"
1138
- elif h5py.check_dtype(ref=dtype) is not None:
1608
+ elif self._h5py.check_dtype(ref=dtype) is not None:
1139
1609
  description = "reference"
1140
- elif h5py.check_dtype(vlen=dtype) not in {None, str, bytes}:
1141
- description = "non-string variable length"
1142
1610
  else:
1143
1611
  description = None
1144
1612
 
@@ -1168,15 +1636,14 @@ class File(Group):
1168
1636
  if self._writable:
1169
1637
  # only write `_NCProperties` in newly created files
1170
1638
  if not self._preexisting_file and not self.invalid_netcdf:
1171
- _NC_PROPERTIES = "version=2,h5netcdf={},hdf5={},{}={}".format(
1172
- __version__,
1173
- self._h5py.version.hdf5_version,
1174
- self._h5py.__name__,
1175
- self._h5py.__version__,
1639
+ _NC_PROPERTIES = (
1640
+ f"version=2,h5netcdf={__version__},"
1641
+ f"hdf5={self._h5py.version.hdf5_version},"
1642
+ f"{self._h5py.__name__}={self._h5py.__version__}"
1176
1643
  )
1177
1644
  self.attrs._h5attrs["_NCProperties"] = np.array(
1178
1645
  _NC_PROPERTIES,
1179
- dtype=h5py.string_dtype(
1646
+ dtype=self._h5py.string_dtype(
1180
1647
  encoding="ascii", length=len(_NC_PROPERTIES)
1181
1648
  ),
1182
1649
  )
@@ -1200,7 +1667,9 @@ class File(Group):
1200
1667
  def close(self):
1201
1668
  if not self._closed:
1202
1669
  self.flush()
1203
- self._h5file.close()
1670
+ if self._close_h5file:
1671
+ self._h5file.close()
1672
+ self._h5file = None
1204
1673
  self._closed = True
1205
1674
 
1206
1675
  __del__ = close
@@ -1215,11 +1684,9 @@ class File(Group):
1215
1684
 
1216
1685
  def __repr__(self):
1217
1686
  if self._closed:
1218
- return "<Closed %s>" % self._cls_name
1219
- header = "<{} {!r} (mode {})>".format(
1220
- self._cls_name,
1221
- self.filename.split("/")[-1],
1222
- self.mode,
1687
+ return f"<Closed {self._cls_name}>"
1688
+ header = (
1689
+ f"<{self._cls_name} {os.path.basename(self.filename)!r} (mode {self.mode})>"
1223
1690
  )
1224
1691
  return "\n".join([header] + self._repr_body())
1225
1692