h5yaml 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
h5yaml/lib/chunksizes.py CHANGED
@@ -35,20 +35,28 @@ def guess_chunks(dims: ArrayLike[int], dtype_sz: int) -> str | tuple[int]:
35
35
 
36
36
  """
37
37
  fixed_size = dtype_sz
38
- for val in [x for x in dims if x > 0]:
39
- fixed_size *= val
40
-
41
- if 0 in dims: # variable with an unlimited dimension
42
- udim = dims.index(0)
43
- else: # variable has no unlimited dimension
44
- udim = 0
45
- if fixed_size < 65536:
38
+ if len(dims) > 1:
39
+ for val in [x for x in dims[1:] if x > 0]:
40
+ fixed_size *= val
41
+
42
+ # first variables without an unlimited dimension
43
+ if 0 not in dims:
44
+ if fixed_size < 400000:
46
45
  return "contiguous"
47
46
 
47
+ res = list(dims)
48
+ res[0] = max(1, 2048000 // fixed_size)
49
+ return tuple(res)
50
+
51
+ # then variables with an unlimited dimension
48
52
  if len(dims) == 1:
49
53
  return (1024,)
50
54
 
55
+ udim = dims.index(0)
51
56
  res = list(dims)
52
- res[udim] = min(1024, (2048 * 1024) // (fixed_size // max(1, dims[0])))
57
+ if fixed_size < 400000:
58
+ res[udim] = 1024
59
+ else:
60
+ res[udim] = max(1, 2048000 // fixed_size)
53
61
 
54
62
  return tuple(res)
h5yaml/yaml_h5py.py CHANGED
@@ -23,6 +23,8 @@ import numpy as np
23
23
  from h5yaml.conf_from_yaml import conf_from_yaml
24
24
  from h5yaml.lib.chunksizes import guess_chunks
25
25
 
26
+ # - helper function ------------------------------------
27
+
26
28
 
27
29
  # - class definition -----------------------------------
28
30
  class H5Yaml:
@@ -53,22 +55,20 @@ class H5Yaml:
53
55
 
54
56
  def __dimensions(self: H5Yaml, fid: h5py.File) -> None:
55
57
  """Add dimensions to HDF5 product."""
56
- for key, value in self.h5_def["dimensions"].items():
58
+ for key, val in self.h5_def["dimensions"].items():
57
59
  fillvalue = None
58
- if "_FillValue" in value:
60
+ if "_FillValue" in val:
59
61
  fillvalue = (
60
- np.nan if value["_FillValue"] == "NaN" else int(value["_FillValue"])
62
+ np.nan if val["_FillValue"] == "NaN" else int(val["_FillValue"])
61
63
  )
62
64
 
63
- if value["_size"] == 0:
64
- ds_chunk = value.get("_chunks", (50,))
65
+ if val["_size"] == 0:
66
+ ds_chunk = val.get("_chunks", (50,))
65
67
  dset = fid.create_dataset(
66
68
  key,
67
69
  shape=(0,),
68
70
  dtype=(
69
- h5py.string_dtype()
70
- if value["_dtype"] == "str"
71
- else value["_dtype"]
71
+ h5py.string_dtype() if val["_dtype"] == "str" else val["_dtype"]
72
72
  ),
73
73
  chunks=ds_chunk if isinstance(ds_chunk, tuple) else tuple(ds_chunk),
74
74
  maxshape=(None,),
@@ -77,21 +77,48 @@ class H5Yaml:
77
77
  else:
78
78
  dset = fid.create_dataset(
79
79
  key,
80
- shape=(value["_size"],),
81
- dtype=value["_dtype"],
80
+ shape=(val["_size"],),
81
+ dtype=val["_dtype"],
82
82
  )
83
- if "_values" in value:
84
- dset[:] = value["_values"]
83
+ if "_values" in val:
84
+ dset[:] = val["_values"]
85
85
 
86
86
  dset.make_scale(
87
87
  Path(key).name
88
- if "long_name" in value
88
+ if "long_name" in val
89
89
  else "This is a netCDF dimension but not a netCDF variable."
90
90
  )
91
- for attr, attr_val in value.items():
91
+ for attr, attr_val in val.items():
92
92
  if attr.startswith("_"):
93
93
  continue
94
- dset.attrs[attr] = attr_val
94
+ if attr in ("valid_min", "valid_max"):
95
+ match val["_dtype"]:
96
+ case "i1":
97
+ dset.attrs[attr] = np.int8(attr_val)
98
+ case "i2":
99
+ dset.attrs[attr] = np.int16(attr_val)
100
+ case "i4":
101
+ dset.attrs[attr] = np.int32(attr_val)
102
+ case "i8":
103
+ dset.attrs[attr] = np.int64(attr_val)
104
+ case "u1":
105
+ dset.attrs[attr] = np.uint8(attr_val)
106
+ case "u2":
107
+ dset.attrs[attr] = np.uint16(attr_val)
108
+ case "u4":
109
+ dset.attrs[attr] = np.uint32(attr_val)
110
+ case "u8":
111
+ dset.attrs[attr] = np.uint64(attr_val)
112
+ case "f2":
113
+ dset.attrs[attr] = np.float16(attr_val)
114
+ case "f4":
115
+ dset.attrs[attr] = np.float32(attr_val)
116
+ case "f8":
117
+ dset.attrs[attr] = np.float64(attr_val)
118
+ case _:
119
+ dset.attrs[attr] = attr_val
120
+ else:
121
+ dset.attrs[attr] = attr_val
95
122
 
96
123
  def __compounds(self: H5Yaml, fid: h5py.File) -> dict[str, str | int | float]:
97
124
  """Add compound datatypes to HDF5 product."""
@@ -112,14 +139,14 @@ class H5Yaml:
112
139
  for key, value in res.items():
113
140
  self.h5_def["compounds"][key] = value
114
141
 
115
- for key, value in self.h5_def["compounds"].items():
142
+ for key, val in self.h5_def["compounds"].items():
116
143
  compounds[key] = {
117
144
  "dtype": [],
118
145
  "units": [],
119
146
  "names": [],
120
147
  }
121
148
 
122
- for _key, _val in value.items():
149
+ for _key, _val in val.items():
123
150
  compounds[key]["dtype"].append((_key, _val[0]))
124
151
  if len(_val) == 3:
125
152
  compounds[key]["units"].append(_val[1])
@@ -156,12 +183,19 @@ class H5Yaml:
156
183
  np.nan if val["_FillValue"] == "NaN" else int(val["_FillValue"])
157
184
  )
158
185
 
159
- compression = None
160
- shuffle = False
161
- # currently only gzip compression is supported
162
- if "_compression" in val:
163
- compression = val["_compression"]
164
- shuffle = True
186
+ # check for scalar dataset
187
+ if val["_dims"][0] == "scalar":
188
+ dset = fid.create_dataset(
189
+ key,
190
+ (),
191
+ dtype=ds_dtype,
192
+ fillvalue=fillvalue,
193
+ )
194
+ for attr, attr_val in val.items():
195
+ if attr.startswith("_"):
196
+ continue
197
+ dset.attrs[attr] = attr_val
198
+ continue
165
199
 
166
200
  n_udim = 0
167
201
  ds_shape = ()
@@ -194,6 +228,13 @@ class H5Yaml:
194
228
  fillvalue=fillvalue,
195
229
  )
196
230
  else:
231
+ compression = None
232
+ shuffle = False
233
+ # currently only gzip compression is supported
234
+ if "_compression" in val:
235
+ compression = val["_compression"]
236
+ shuffle = True
237
+
197
238
  if val.get("_vlen"):
198
239
  ds_dtype = h5py.vlen_dtype(ds_dtype)
199
240
  fillvalue = None
@@ -217,7 +258,36 @@ class H5Yaml:
217
258
  for attr, attr_val in val.items():
218
259
  if attr.startswith("_"):
219
260
  continue
220
- dset.attrs[attr] = attr_val
261
+ if attr in ("valid_min", "valid_max"):
262
+ match val["_dtype"]:
263
+ case "i1":
264
+ dset.attrs[attr] = np.int8(attr_val)
265
+ case "i2":
266
+ dset.attrs[attr] = np.int16(attr_val)
267
+ case "i4":
268
+ dset.attrs[attr] = np.int32(attr_val)
269
+ case "i8":
270
+ dset.attrs[attr] = np.int64(attr_val)
271
+ case "u1":
272
+ dset.attrs[attr] = np.uint8(attr_val)
273
+ case "u2":
274
+ dset.attrs[attr] = np.uint16(attr_val)
275
+ case "u4":
276
+ dset.attrs[attr] = np.uint32(attr_val)
277
+ case "u8":
278
+ dset.attrs[attr] = np.uint64(attr_val)
279
+ case "f2":
280
+ dset.attrs[attr] = np.float16(attr_val)
281
+ case "f4":
282
+ dset.attrs[attr] = np.float32(attr_val)
283
+ case "f8":
284
+ dset.attrs[attr] = np.float64(attr_val)
285
+ case _:
286
+ dset.attrs[attr] = attr_val
287
+ elif attr == "flag_values":
288
+ dset.attrs[attr] = np.array(attr_val, dtype="u1")
289
+ else:
290
+ dset.attrs[attr] = attr_val
221
291
 
222
292
  if compounds is not None and val["_dtype"] in compounds:
223
293
  if compounds[val["_dtype"]]["units"]:
h5yaml/yaml_nc.py CHANGED
@@ -15,6 +15,7 @@ __all__ = ["NcYaml"]
15
15
 
16
16
  import logging
17
17
  from importlib.resources import files
18
+ from pathlib import PurePosixPath
18
19
  from typing import TYPE_CHECKING
19
20
 
20
21
  import numpy as np
@@ -47,12 +48,20 @@ class NcYaml:
47
48
  def __groups(self: NcYaml, fid: Dataset) -> None:
48
49
  """Create groups in HDF5 product."""
49
50
  for key in self.h5_def["groups"]:
50
- _ = fid.createGroup(key)
51
+ pkey = PurePosixPath(key)
52
+ if pkey.is_absolute():
53
+ _ = fid[pkey.parent].createGroup(pkey.name)
54
+ else:
55
+ _ = fid.createGroup(key)
51
56
 
52
57
  def __dimensions(self: NcYaml, fid: Dataset) -> None:
53
58
  """Add dimensions to HDF5 product."""
54
59
  for key, value in self.h5_def["dimensions"].items():
55
- _ = fid.createDimension(key, value["_size"])
60
+ pkey = PurePosixPath(key)
61
+ if pkey.is_absolute():
62
+ _ = fid[pkey.parent].createDimension(pkey.name, value["_size"])
63
+ else:
64
+ _ = fid.createDimension(key, value["_size"])
56
65
 
57
66
  if "long_name" not in value:
58
67
  continue
@@ -63,13 +72,22 @@ class NcYaml:
63
72
  np.nan if value["_FillValue"] == "NaN" else int(value["_FillValue"])
64
73
  )
65
74
 
66
- dset = fid.createVariable(
67
- key,
68
- value["_dtype"],
69
- dimensions=(key,),
70
- fill_value=fillvalue,
71
- contiguous=value["_size"] != 0,
72
- )
75
+ if pkey.is_absolute():
76
+ dset = fid[pkey.parent].createVariable(
77
+ pkey.name,
78
+ value["_dtype"],
79
+ dimensions=(pkey.name,),
80
+ fill_value=fillvalue,
81
+ contiguous=value["_size"] != 0,
82
+ )
83
+ else:
84
+ dset = fid.createVariable(
85
+ key,
86
+ value["_dtype"],
87
+ dimensions=(key,),
88
+ fill_value=fillvalue,
89
+ contiguous=value["_size"] != 0,
90
+ )
73
91
  dset.setncatts({k: v for k, v in value.items() if not k.startswith("_")})
74
92
 
75
93
  def __compounds(self: NcYaml, fid: Dataset) -> dict[str, str | int | float]:
@@ -145,11 +163,17 @@ class NcYaml:
145
163
  compression = "zlib"
146
164
  complevel = val["_compression"]
147
165
 
166
+ var_dims = []
148
167
  n_udim = 0
149
168
  ds_shape = ()
150
169
  ds_maxshape = ()
151
170
  for coord in val["_dims"]:
152
- dim_sz = fid.dimensions[coord].size
171
+ pcoord = PurePosixPath(coord)
172
+ var_dims.append(pcoord.name if pcoord.is_absolute() else coord)
173
+ if pcoord.is_absolute():
174
+ dim_sz = fid[pcoord.parent].dimensions[pcoord.name].size
175
+ else:
176
+ dim_sz = fid.dimensions[coord].size
153
177
  n_udim += int(dim_sz == 0)
154
178
  ds_shape += (dim_sz,)
155
179
  ds_maxshape += (dim_sz if dim_sz > 0 else None,)
@@ -163,12 +187,18 @@ class NcYaml:
163
187
  val["_chunks"] if "_chunks" in val else guess_chunks(ds_shape, sz_dtype)
164
188
  )
165
189
 
190
+ pkey = PurePosixPath(key)
191
+ var_grp = fid[pkey.parent] if pkey.is_absolute() else fid
192
+ var_name = pkey.name if pkey.is_absolute() else key
193
+ if val["_dtype"] in fid.cmptypes:
194
+ val["_dtype"] = fid.cmptypes[val["_dtype"]]
195
+
166
196
  # create the variable
167
197
  if ds_chunk == "contiguous":
168
- dset = fid.createVariable(
169
- key,
198
+ dset = var_grp.createVariable(
199
+ var_name,
170
200
  val["_dtype"],
171
- dimensions=(key,),
201
+ dimensions=var_dims,
172
202
  fill_value=fillvalue,
173
203
  contiguous=True,
174
204
  )
@@ -181,13 +211,10 @@ class NcYaml:
181
211
  if ds_maxshape == (None,):
182
212
  ds_chunk = (16,)
183
213
 
184
- if key in fid.cmptypes:
185
- val["_dtype"] = fid.cmptypes[key]
186
-
187
- dset = fid.createVariable(
188
- key,
214
+ dset = var_grp.createVariable(
215
+ var_name,
189
216
  val["_dtype"],
190
- dimensions=val["_dims"],
217
+ dimensions=var_dims,
191
218
  fill_value=fillvalue,
192
219
  contiguous=False,
193
220
  compression=compression,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: h5yaml
3
- Version: 0.0.3
3
+ Version: 0.0.4
4
4
  Summary: Use YAML configuration file to generate HDF5/netCDF4 formated files.
5
5
  Project-URL: Homepage, https://github.com/rmvanhees/h5_yaml
6
6
  Project-URL: Source, https://github.com/rmvanhees/h5_yaml
@@ -14,28 +14,51 @@ Classifier: Intended Audience :: Developers
14
14
  Classifier: Intended Audience :: Science/Research
15
15
  Classifier: Operating System :: OS Independent
16
16
  Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
17
20
  Classifier: Programming Language :: Python :: 3.12
18
21
  Classifier: Programming Language :: Python :: 3.13
19
22
  Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
20
- Requires-Python: >=3.12
23
+ Requires-Python: >=3.9
21
24
  Requires-Dist: h5py>=3.13
22
25
  Requires-Dist: netcdf4>=1.7
23
26
  Requires-Dist: numpy>=2.2
24
27
  Requires-Dist: pyyaml>=6.0
25
28
  Description-Content-Type: text/markdown
26
29
 
27
- # H5_YAML
30
+ # H5YAML
31
+ [![image](https://img.shields.io/pypi/v/h5yaml.svg?label=release)](https://github.com/rmvanhees/h5yaml/)
32
+ [![image](https://img.shields.io/pypi/l/h5yaml.svg)](https://github.com/rmvanhees/h5yaml/LICENSE)
33
+ [![image](https://img.shields.io/pypi/dm/h5yaml.svg)](https://pypi.org/project/h5yaml/)
34
+ [![image](https://img.shields.io/pypi/status/h5yaml.svg?label=status)](https://pypi.org/project/h5yaml/)
28
35
 
29
36
  ## Description
30
- Use YAML configuration file to generate HDF5/netCDF4 formated files.
37
+ This package let you generate [HDF5](https://docs.h5py.org/en/stable/)/[netCDF4](https://unidata.github.io/netcdf4-python/)
38
+ formatted files as defined in a [YAML](https://yaml.org/) configuration file. This has several advantages:
31
39
 
32
- The class `NcYaml` must be used when strict conformance to the netCDF4 format is
33
- required. However, the python netCDF4 implementation does not allow variable-length
34
- data to have a compound data-type. The class `H5Yaml` does not have this restiction
35
- and will generate HDF5 formated files which can be read by netCDF4 software.
40
+ * you define the layout of your HDF5/netCDF4 file using YAML which is human-readable and has intuitive syntax.
41
+ * you can reuse the YAML configuration file to to have all your product have a consistent layout.
42
+ * you can make updates by only changing the YAML configuration file
43
+ * you can have the layout of your HDF5/netCDF4 file as a python dictionary, thus without accessing any HDF5/netCDF4 file
44
+
45
+ The `H5YAML` package has two classes to generate a HDF5/netCDF4 formatted file.
46
+
47
+ 1. The class `H5Yaml` uses the [h5py](https://pypi.org/project/h5py/) package, which is a Pythonic interface to
48
+ the HDF5 binary data format.
49
+ Let 'h5_def.yaml' be your YAML configuration file then ```H5Yaml("h5_def.yaml").create("foo.h5")``` will create
50
+ the HDF5 file 'foo.h5'. This can be read by netCDF4 software, because it uses dimension-scales to each dataset.
51
+ 2. The class `NcYaml` uses the [netCDF4](https://pypi.org/project/netCDF4/) package, which provides an object-oriented
52
+ python interface to the netCDF version 4 library.
53
+ Let 'nc_def.yaml' be your YAML configuration file then ```NcYaml("nc_def.yaml").create("foo.nc")``` will create
54
+ the netCDF4/HDF5 file 'foo.nc'
55
+
56
+ The class `NcYaml` must be used when strict conformance to the netCDF4 format is required.
57
+ However, package `netCDF4` has some limitations, which `h5py` has not, for example it does
58
+ not allow variable-length variables to have a compound data-type.
36
59
 
37
60
  ## Installation
38
- Relases of the code, starting from version 0.1, will be made available via PyPi.
61
+ Releases of the code, starting from version 0.1, will be made available via PyPI.
39
62
 
40
63
  ## Usage
41
64
 
@@ -54,7 +77,7 @@ The YAML file should be structured as follows:
54
77
  - science_data
55
78
  ```
56
79
 
57
- * The section 'dimensions' is obligatory, you shouold define the dimensions for each
80
+ * The section 'dimensions' is obligatory, you should define the dimensions for each
58
81
  variable in your file. The 'dimensions' section may look like this:
59
82
 
60
83
  ```
@@ -144,7 +167,7 @@ The YAML file should be structured as follows:
144
167
  ### Notes and ToDo:
145
168
 
146
169
  * The usage of older versions of h5py may result in broken netCDF4 files
147
- * Explain usage of parameter '_chunks', which is currently not correcly implemented.
170
+ * Explain usage of parameter '_chunks', which is currently not correctly implemented.
148
171
  * Explain that the usage of variable length data-sets may break netCDF4 compatibility
149
172
 
150
173
  ## Support [TBW]
@@ -161,6 +184,3 @@ The code is developed by R.M. van Hees (SRON)
161
184
 
162
185
  * Copyright: SRON (https://www.sron.nl).
163
186
  * License: BSD-3-clause
164
-
165
- ## Project status
166
- Beta
@@ -0,0 +1,9 @@
1
+ h5yaml/conf_from_yaml.py,sha256=FT5oS4yqDFUYZqgria_OrmMK52ZcrTVoAPyPkLrHstc,982
2
+ h5yaml/yaml_h5py.py,sha256=MVPUqD5fNY3VYIi3ebaEY7uSR1sgYw-8DvIjrOWIaIk,11838
3
+ h5yaml/yaml_nc.py,sha256=37NQ9FPuCrnOTN5R4otn0R-Sdwej1-cPwhODgY2L6pY,9202
4
+ h5yaml/Data/h5_testing.yaml,sha256=_x3qBC8RNQ1h4c6B1JOH_y0L9DiDLsOEfyr60IcvpoI,2358
5
+ h5yaml/lib/chunksizes.py,sha256=sAYiTTL8ecBivBDxw-XIJu2yMlt0zNHTr-KS7tBFp2o,1392
6
+ h5yaml-0.0.4.dist-info/METADATA,sha256=Vvpv1h1t0cYvV5hBiXzdKe2CtU3TT2hWW0qwrIC8vLc,7160
7
+ h5yaml-0.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
8
+ h5yaml-0.0.4.dist-info/licenses/LICENSE,sha256=MoOwtPnC77nFaIwRIAIE6fKhrzMd3G18mOXDPtAH8G0,1509
9
+ h5yaml-0.0.4.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- h5yaml/conf_from_yaml.py,sha256=FT5oS4yqDFUYZqgria_OrmMK52ZcrTVoAPyPkLrHstc,982
2
- h5yaml/yaml_h5py.py,sha256=m5vOdVour3FwnUjewGukIra6C_c0F61yqnsvUJN-KtM,8591
3
- h5yaml/yaml_nc.py,sha256=KG2y497If2tJmSSmF6bvXl7ePKorptEUEJULPexBIQw,7980
4
- h5yaml/Data/h5_testing.yaml,sha256=_x3qBC8RNQ1h4c6B1JOH_y0L9DiDLsOEfyr60IcvpoI,2358
5
- h5yaml/lib/chunksizes.py,sha256=aOXkLqTk5GgE-uk80QqHYbLB-FzBMnOrMm6ixH4QAUc,1225
6
- h5yaml-0.0.3.dist-info/METADATA,sha256=CQm7-Uxop-K7v5-WotW6DiKFPtYJXwEi9OJfJKC0o7I,5370
7
- h5yaml-0.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
8
- h5yaml-0.0.3.dist-info/licenses/LICENSE,sha256=MoOwtPnC77nFaIwRIAIE6fKhrzMd3G18mOXDPtAH8G0,1509
9
- h5yaml-0.0.3.dist-info/RECORD,,
File without changes