h5yaml 0.1.1__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- h5yaml/Data/h5_compound.yaml +36 -0
- h5yaml/Data/h5_testing.yaml +153 -6
- h5yaml/Data/h5_unsupported.yaml +56 -0
- h5yaml/Data/nc_testing.yaml +143 -6
- h5yaml/__init__.py +21 -0
- h5yaml/conf_from_yaml.py +24 -13
- h5yaml/lib/__init__.py +21 -0
- h5yaml/lib/adjust_attr.py +79 -0
- h5yaml/yaml_h5.py +50 -83
- h5yaml/yaml_nc.py +102 -55
- {h5yaml-0.1.1.dist-info → h5yaml-0.2.1.dist-info}/METADATA +25 -10
- h5yaml-0.2.1.dist-info/RECORD +14 -0
- h5yaml-0.2.1.dist-info/licenses/LICENSE +201 -0
- h5yaml/lib/chunksizes.py +0 -62
- h5yaml-0.1.1.dist-info/RECORD +0 -10
- h5yaml-0.1.1.dist-info/licenses/LICENSE +0 -29
- {h5yaml-0.1.1.dist-info → h5yaml-0.2.1.dist-info}/WHEEL +0 -0
h5yaml/yaml_h5.py
CHANGED
|
@@ -1,11 +1,22 @@
|
|
|
1
1
|
#
|
|
2
|
-
# This file is part of
|
|
3
|
-
# https://github.com/rmvanhees/h5_yaml.git
|
|
2
|
+
# This file is part of Python package: `h5yaml`
|
|
4
3
|
#
|
|
5
|
-
#
|
|
6
|
-
# All Rights Reserved
|
|
4
|
+
# https://github.com/rmvanhees/pyxarr.git
|
|
7
5
|
#
|
|
8
|
-
#
|
|
6
|
+
# Copyright (c) 2025 - R.M. van Hees (SRON)
|
|
7
|
+
# All rights reserved.
|
|
8
|
+
#
|
|
9
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
10
|
+
# you may not use this file except in compliance with the License.
|
|
11
|
+
# You may obtain a copy of the License at
|
|
12
|
+
#
|
|
13
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
14
|
+
#
|
|
15
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
16
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
17
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
18
|
+
# See the License for the specific language governing permissions and
|
|
19
|
+
# limitations under the License.
|
|
9
20
|
#
|
|
10
21
|
"""Create HDF5/netCDF4 formatted file from a YAML configuration file using h5py."""
|
|
11
22
|
|
|
@@ -14,52 +25,13 @@ from __future__ import annotations
|
|
|
14
25
|
__all__ = ["H5Yaml"]
|
|
15
26
|
|
|
16
27
|
import logging
|
|
17
|
-
from importlib.resources import files
|
|
18
28
|
from pathlib import Path
|
|
19
29
|
|
|
20
30
|
import h5py
|
|
21
31
|
import numpy as np
|
|
22
32
|
|
|
23
|
-
from
|
|
24
|
-
from
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
# - helper function ------------------------------------
|
|
28
|
-
def adjust_attr(attr_key: str, attr_val: np.ndarray, attr_dtype: str) -> np.ndarray:
|
|
29
|
-
"""..."""
|
|
30
|
-
if attr_key in ("valid_min", "valid_max", "valid_range"):
|
|
31
|
-
match attr_dtype:
|
|
32
|
-
case "i1":
|
|
33
|
-
res = np.int8(attr_val)
|
|
34
|
-
case "i2":
|
|
35
|
-
res = np.int16(attr_val)
|
|
36
|
-
case "i4":
|
|
37
|
-
res = np.int32(attr_val)
|
|
38
|
-
case "i8":
|
|
39
|
-
res = np.int64(attr_val)
|
|
40
|
-
case "u1":
|
|
41
|
-
res = np.uint8(attr_val)
|
|
42
|
-
case "u2":
|
|
43
|
-
res = np.uint16(attr_val)
|
|
44
|
-
case "u4":
|
|
45
|
-
res = np.uint32(attr_val)
|
|
46
|
-
case "u8":
|
|
47
|
-
res = np.uint64(attr_val)
|
|
48
|
-
case "f2":
|
|
49
|
-
res = np.float16(attr_val)
|
|
50
|
-
case "f4":
|
|
51
|
-
res = np.float32(attr_val)
|
|
52
|
-
case "f8":
|
|
53
|
-
res = np.float64(attr_val)
|
|
54
|
-
case _:
|
|
55
|
-
res = attr_val
|
|
56
|
-
|
|
57
|
-
return res
|
|
58
|
-
|
|
59
|
-
if attr_key == "flag_values":
|
|
60
|
-
return np.array(attr_val, dtype="u1")
|
|
61
|
-
|
|
62
|
-
return attr_val
|
|
33
|
+
from .conf_from_yaml import conf_from_yaml
|
|
34
|
+
from .lib.adjust_attr import adjust_attr
|
|
63
35
|
|
|
64
36
|
|
|
65
37
|
# - class definition -----------------------------------
|
|
@@ -68,12 +40,12 @@ class H5Yaml:
|
|
|
68
40
|
|
|
69
41
|
Parameters
|
|
70
42
|
----------
|
|
71
|
-
h5_yaml_fl : Path
|
|
72
|
-
YAML
|
|
43
|
+
h5_yaml_fl : Path | str
|
|
44
|
+
YAML file with the HDF5 format definition
|
|
73
45
|
|
|
74
46
|
"""
|
|
75
47
|
|
|
76
|
-
def __init__(self: H5Yaml, h5_yaml_fl: Path) -> None:
|
|
48
|
+
def __init__(self: H5Yaml, h5_yaml_fl: Path | str) -> None:
|
|
77
49
|
"""Construct a H5Yaml instance."""
|
|
78
50
|
self.logger = logging.getLogger("h5yaml.H5Yaml")
|
|
79
51
|
|
|
@@ -99,14 +71,14 @@ class H5Yaml:
|
|
|
99
71
|
)
|
|
100
72
|
|
|
101
73
|
if val["_size"] == 0:
|
|
102
|
-
ds_chunk = val.get("_chunks"
|
|
74
|
+
ds_chunk = val.get("_chunks")
|
|
75
|
+
if ds_chunk is not None and not isinstance(ds_chunk, bool):
|
|
76
|
+
ds_chunk = tuple(ds_chunk)
|
|
103
77
|
dset = fid.create_dataset(
|
|
104
78
|
key,
|
|
105
79
|
shape=(0,),
|
|
106
|
-
dtype=
|
|
107
|
-
|
|
108
|
-
),
|
|
109
|
-
chunks=ds_chunk if isinstance(ds_chunk, tuple) else tuple(ds_chunk),
|
|
80
|
+
dtype="T" if val["_dtype"] == "str" else val["_dtype"],
|
|
81
|
+
chunks=ds_chunk,
|
|
110
82
|
maxshape=(None,),
|
|
111
83
|
fillvalue=fillvalue,
|
|
112
84
|
)
|
|
@@ -114,7 +86,7 @@ class H5Yaml:
|
|
|
114
86
|
dset = fid.create_dataset(
|
|
115
87
|
key,
|
|
116
88
|
shape=(val["_size"],),
|
|
117
|
-
dtype=val["_dtype"],
|
|
89
|
+
dtype="T" if val["_dtype"] == "str" else val["_dtype"],
|
|
118
90
|
)
|
|
119
91
|
if "_values" in val:
|
|
120
92
|
dset[:] = val["_values"]
|
|
@@ -128,7 +100,7 @@ class H5Yaml:
|
|
|
128
100
|
)
|
|
129
101
|
for attr, attr_val in val.items():
|
|
130
102
|
if not attr.startswith("_"):
|
|
131
|
-
dset.attrs[attr] = adjust_attr(
|
|
103
|
+
dset.attrs[attr] = adjust_attr(val["_dtype"], attr, attr_val)
|
|
132
104
|
|
|
133
105
|
def __compounds(self: H5Yaml, fid: h5py.File) -> dict[str, str | int | float]:
|
|
134
106
|
"""Add compound datatypes to HDF5 product."""
|
|
@@ -182,10 +154,8 @@ class H5Yaml:
|
|
|
182
154
|
for key, val in self.h5_def["variables"].items():
|
|
183
155
|
if val["_dtype"] in fid:
|
|
184
156
|
ds_dtype = fid[val["_dtype"]]
|
|
185
|
-
dtype_size = fid[val["_dtype"]].dtype.itemsize
|
|
186
157
|
else:
|
|
187
|
-
ds_dtype = val["_dtype"]
|
|
188
|
-
dtype_size = np.dtype(val["_dtype"]).itemsize
|
|
158
|
+
ds_dtype = "T" if val["_dtype"] == "str" else val["_dtype"]
|
|
189
159
|
|
|
190
160
|
fillvalue = None
|
|
191
161
|
if "_FillValue" in val:
|
|
@@ -203,7 +173,7 @@ class H5Yaml:
|
|
|
203
173
|
)
|
|
204
174
|
for attr, attr_val in val.items():
|
|
205
175
|
if not attr.startswith("_"):
|
|
206
|
-
dset.attrs[attr] = adjust_attr(
|
|
176
|
+
dset.attrs[attr] = adjust_attr(val["_dtype"], attr, attr_val)
|
|
207
177
|
continue
|
|
208
178
|
|
|
209
179
|
n_udim = 0
|
|
@@ -217,17 +187,15 @@ class H5Yaml:
|
|
|
217
187
|
|
|
218
188
|
# currently, we can not handle more than one unlimited dimension
|
|
219
189
|
if n_udim > 1:
|
|
220
|
-
raise ValueError("more than one unlimited dimension")
|
|
190
|
+
raise ValueError(f"{key} has more than one unlimited dimension")
|
|
221
191
|
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
else guess_chunks(ds_shape, dtype_size)
|
|
227
|
-
)
|
|
192
|
+
if None in ds_maxshape and val.get("_chunks") == "contiguous":
|
|
193
|
+
raise KeyError(
|
|
194
|
+
"you can not create a contiguous dataset with unlimited dimensions."
|
|
195
|
+
)
|
|
228
196
|
|
|
229
197
|
# create the variable
|
|
230
|
-
if
|
|
198
|
+
if val.get("_chunks") == "contiguous":
|
|
231
199
|
dset = fid.create_dataset(
|
|
232
200
|
key,
|
|
233
201
|
ds_shape,
|
|
@@ -237,6 +205,9 @@ class H5Yaml:
|
|
|
237
205
|
fillvalue=fillvalue,
|
|
238
206
|
)
|
|
239
207
|
else:
|
|
208
|
+
ds_chunk = val.get("_chunks")
|
|
209
|
+
if ds_chunk is not None and not isinstance(ds_chunk, bool):
|
|
210
|
+
ds_chunk = tuple(ds_chunk)
|
|
240
211
|
compression = None
|
|
241
212
|
shuffle = False
|
|
242
213
|
# currently only gzip compression is supported
|
|
@@ -254,14 +225,12 @@ class H5Yaml:
|
|
|
254
225
|
fid[ds_name] = h5py.vlen_dtype(ds_dtype)
|
|
255
226
|
ds_dtype = fid[ds_name]
|
|
256
227
|
fillvalue = None
|
|
257
|
-
if ds_maxshape == (None,):
|
|
258
|
-
ds_chunk = (16,)
|
|
259
228
|
|
|
260
229
|
dset = fid.create_dataset(
|
|
261
230
|
key,
|
|
262
231
|
ds_shape,
|
|
263
232
|
dtype=ds_dtype,
|
|
264
|
-
chunks=ds_chunk
|
|
233
|
+
chunks=ds_chunk,
|
|
265
234
|
maxshape=ds_maxshape,
|
|
266
235
|
fillvalue=fillvalue,
|
|
267
236
|
compression=compression,
|
|
@@ -273,7 +242,7 @@ class H5Yaml:
|
|
|
273
242
|
|
|
274
243
|
for attr, attr_val in val.items():
|
|
275
244
|
if not attr.startswith("_"):
|
|
276
|
-
dset.attrs[attr] = adjust_attr(
|
|
245
|
+
dset.attrs[attr] = adjust_attr(val["_dtype"], attr, attr_val)
|
|
277
246
|
|
|
278
247
|
if compounds is not None and val["_dtype"] in compounds:
|
|
279
248
|
if compounds[val["_dtype"]]["units"]:
|
|
@@ -286,6 +255,15 @@ class H5Yaml:
|
|
|
286
255
|
"""Return definition of the HDF5/netCDF4 product."""
|
|
287
256
|
return self._h5_def
|
|
288
257
|
|
|
258
|
+
def diskless(self: H5Yaml) -> h5py.File:
|
|
259
|
+
"""Create a HDF5/netCDF4 file in memory."""
|
|
260
|
+
fid = h5py.File.in_memory()
|
|
261
|
+
if "groups" in self.h5_def:
|
|
262
|
+
self.__groups(fid)
|
|
263
|
+
self.__dimensions(fid)
|
|
264
|
+
self.__variables(fid, self.__compounds(fid))
|
|
265
|
+
return fid
|
|
266
|
+
|
|
289
267
|
def create(self: H5Yaml, l1a_name: Path | str) -> None:
|
|
290
268
|
"""Create a HDF5/netCDF4 file (overwrite if exist).
|
|
291
269
|
|
|
@@ -303,14 +281,3 @@ class H5Yaml:
|
|
|
303
281
|
self.__variables(fid, self.__compounds(fid))
|
|
304
282
|
except PermissionError as exc:
|
|
305
283
|
raise RuntimeError(f"failed create {l1a_name}") from exc
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
# - test module -------------------------
|
|
309
|
-
def tests() -> None:
|
|
310
|
-
"""..."""
|
|
311
|
-
print("Calling H5Yaml")
|
|
312
|
-
H5Yaml(files("h5yaml.Data") / "h5_testing.yaml").create("test_yaml.h5")
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
if __name__ == "__main__":
|
|
316
|
-
tests()
|
h5yaml/yaml_nc.py
CHANGED
|
@@ -1,11 +1,22 @@
|
|
|
1
1
|
#
|
|
2
|
-
# This file is part of
|
|
3
|
-
# https://github.com/rmvanhees/h5_yaml.git
|
|
2
|
+
# This file is part of Python package: `h5yaml`
|
|
4
3
|
#
|
|
5
|
-
#
|
|
6
|
-
# All Rights Reserved
|
|
4
|
+
# https://github.com/rmvanhees/pyxarr.git
|
|
7
5
|
#
|
|
8
|
-
#
|
|
6
|
+
# Copyright (c) 2025 - R.M. van Hees (SRON)
|
|
7
|
+
# All rights reserved.
|
|
8
|
+
#
|
|
9
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
10
|
+
# you may not use this file except in compliance with the License.
|
|
11
|
+
# You may obtain a copy of the License at
|
|
12
|
+
#
|
|
13
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
14
|
+
#
|
|
15
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
16
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
17
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
18
|
+
# See the License for the specific language governing permissions and
|
|
19
|
+
# limitations under the License.
|
|
9
20
|
#
|
|
10
21
|
"""Create HDF5/netCDF4 formatted file from a YAML configuration file using netCDF4."""
|
|
11
22
|
|
|
@@ -14,7 +25,6 @@ from __future__ import annotations
|
|
|
14
25
|
__all__ = ["NcYaml"]
|
|
15
26
|
|
|
16
27
|
import logging
|
|
17
|
-
from importlib.resources import files
|
|
18
28
|
from pathlib import PurePosixPath
|
|
19
29
|
from typing import TYPE_CHECKING
|
|
20
30
|
|
|
@@ -23,8 +33,8 @@ import numpy as np
|
|
|
23
33
|
# pylint: disable=no-name-in-module
|
|
24
34
|
from netCDF4 import Dataset
|
|
25
35
|
|
|
26
|
-
from
|
|
27
|
-
from
|
|
36
|
+
from .conf_from_yaml import conf_from_yaml
|
|
37
|
+
from .lib.adjust_attr import adjust_attr
|
|
28
38
|
|
|
29
39
|
if TYPE_CHECKING:
|
|
30
40
|
from pathlib import Path
|
|
@@ -32,22 +42,29 @@ if TYPE_CHECKING:
|
|
|
32
42
|
|
|
33
43
|
# - class definition -----------------------------------
|
|
34
44
|
class NcYaml:
|
|
35
|
-
"""Class to create a HDF5/netCDF4 formated file from a YAML configuration file.
|
|
45
|
+
"""Class to create a HDF5/netCDF4 formated file from a YAML configuration file.
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
nc_yaml_fl : Path | str
|
|
50
|
+
YAML file with the netCDF4 format definition
|
|
51
|
+
|
|
52
|
+
"""
|
|
36
53
|
|
|
37
|
-
def __init__(self: NcYaml,
|
|
54
|
+
def __init__(self: NcYaml, nc_yaml_fl: Path | str) -> None:
|
|
38
55
|
"""Construct a NcYaml instance."""
|
|
39
56
|
self.logger = logging.getLogger("h5yaml.NcYaml")
|
|
40
57
|
|
|
41
58
|
try:
|
|
42
|
-
self.
|
|
59
|
+
self._nc_def = conf_from_yaml(nc_yaml_fl)
|
|
43
60
|
except RuntimeError as exc:
|
|
44
61
|
raise RuntimeError from exc
|
|
45
62
|
|
|
46
|
-
self.yaml_dir =
|
|
63
|
+
self.yaml_dir = nc_yaml_fl.parent
|
|
47
64
|
|
|
48
65
|
def __groups(self: NcYaml, fid: Dataset) -> None:
|
|
49
66
|
"""Create groups in HDF5 product."""
|
|
50
|
-
for key in self.
|
|
67
|
+
for key in self.nc_def["groups"]:
|
|
51
68
|
pkey = PurePosixPath(key)
|
|
52
69
|
if pkey.is_absolute():
|
|
53
70
|
_ = fid[pkey.parent].createGroup(pkey.name)
|
|
@@ -56,14 +73,14 @@ class NcYaml:
|
|
|
56
73
|
|
|
57
74
|
def __dimensions(self: NcYaml, fid: Dataset) -> None:
|
|
58
75
|
"""Add dimensions to HDF5 product."""
|
|
59
|
-
for key, value in self.
|
|
76
|
+
for key, value in self.nc_def["dimensions"].items():
|
|
60
77
|
pkey = PurePosixPath(key)
|
|
61
78
|
if pkey.is_absolute():
|
|
62
79
|
_ = fid[pkey.parent].createDimension(pkey.name, value["_size"])
|
|
63
80
|
else:
|
|
64
81
|
_ = fid.createDimension(key, value["_size"])
|
|
65
82
|
|
|
66
|
-
if
|
|
83
|
+
if len(value) <= 2:
|
|
67
84
|
continue
|
|
68
85
|
|
|
69
86
|
fillvalue = None
|
|
@@ -88,17 +105,29 @@ class NcYaml:
|
|
|
88
105
|
fill_value=fillvalue,
|
|
89
106
|
contiguous=value["_size"] != 0,
|
|
90
107
|
)
|
|
91
|
-
|
|
108
|
+
if value["_size"] > 0:
|
|
109
|
+
if "_values" in value:
|
|
110
|
+
dset[:] = np.array(value["_values"])
|
|
111
|
+
elif "_range" in value:
|
|
112
|
+
dset[:] = np.arange(*value["_range"], dtype=value["_dtype"])
|
|
113
|
+
|
|
114
|
+
dset.setncatts(
|
|
115
|
+
{
|
|
116
|
+
k: adjust_attr(value["_dtype"], k, v)
|
|
117
|
+
for k, v in value.items()
|
|
118
|
+
if not k.startswith("_")
|
|
119
|
+
}
|
|
120
|
+
)
|
|
92
121
|
|
|
93
122
|
def __compounds(self: NcYaml, fid: Dataset) -> dict[str, str | int | float]:
|
|
94
123
|
"""Add compound datatypes to HDF5 product."""
|
|
95
|
-
if "compounds" not in self.
|
|
124
|
+
if "compounds" not in self.nc_def:
|
|
96
125
|
return {}
|
|
97
126
|
|
|
98
127
|
compounds = {}
|
|
99
|
-
if isinstance(self.
|
|
100
|
-
file_list = self.
|
|
101
|
-
self.
|
|
128
|
+
if isinstance(self.nc_def["compounds"], list):
|
|
129
|
+
file_list = self.nc_def["compounds"].copy()
|
|
130
|
+
self.nc_def["compounds"] = {}
|
|
102
131
|
for name in file_list:
|
|
103
132
|
if not (yaml_fl := self.yaml_dir / name).is_file():
|
|
104
133
|
continue
|
|
@@ -107,9 +136,9 @@ class NcYaml:
|
|
|
107
136
|
except RuntimeError as exc:
|
|
108
137
|
raise RuntimeError from exc
|
|
109
138
|
for key, value in res.items():
|
|
110
|
-
self.
|
|
139
|
+
self.nc_def["compounds"][key] = value
|
|
111
140
|
|
|
112
|
-
for key, value in self.
|
|
141
|
+
for key, value in self.nc_def["compounds"].items():
|
|
113
142
|
compounds[key] = {
|
|
114
143
|
"dtype": [],
|
|
115
144
|
"units": [],
|
|
@@ -142,13 +171,15 @@ class NcYaml:
|
|
|
142
171
|
Definition of the compound(s) in the product
|
|
143
172
|
|
|
144
173
|
"""
|
|
145
|
-
for key, val in self.
|
|
174
|
+
for key, val in self.nc_def["variables"].items():
|
|
175
|
+
pkey = PurePosixPath(key)
|
|
176
|
+
var_grp = fid[pkey.parent] if pkey.is_absolute() else fid
|
|
177
|
+
var_name = pkey.name if pkey.is_absolute() else key
|
|
178
|
+
|
|
146
179
|
if val["_dtype"] in fid.cmptypes:
|
|
147
180
|
ds_dtype = fid.cmptypes[val["_dtype"]].dtype
|
|
148
|
-
sz_dtype = ds_dtype.itemsize
|
|
149
181
|
else:
|
|
150
182
|
ds_dtype = val["_dtype"]
|
|
151
|
-
sz_dtype = np.dtype(val["_dtype"]).itemsize
|
|
152
183
|
|
|
153
184
|
fillvalue = None
|
|
154
185
|
if "_FillValue" in val:
|
|
@@ -156,6 +187,23 @@ class NcYaml:
|
|
|
156
187
|
np.nan if val["_FillValue"] == "NaN" else int(val["_FillValue"])
|
|
157
188
|
)
|
|
158
189
|
|
|
190
|
+
# check for scalar dataset
|
|
191
|
+
if val["_dims"][0] == "scalar":
|
|
192
|
+
dset = var_grp.createVariable(
|
|
193
|
+
var_name,
|
|
194
|
+
val["_dtype"],
|
|
195
|
+
fill_value=fillvalue,
|
|
196
|
+
contiguous=True,
|
|
197
|
+
)
|
|
198
|
+
dset.setncatts(
|
|
199
|
+
{
|
|
200
|
+
k: adjust_attr(val["_dtype"], k, v)
|
|
201
|
+
for k, v in val.items()
|
|
202
|
+
if not k.startswith("_")
|
|
203
|
+
}
|
|
204
|
+
)
|
|
205
|
+
continue
|
|
206
|
+
|
|
159
207
|
compression = None
|
|
160
208
|
complevel = 0
|
|
161
209
|
# currently only gzip compression is supported
|
|
@@ -182,19 +230,16 @@ class NcYaml:
|
|
|
182
230
|
if n_udim > 1:
|
|
183
231
|
raise ValueError("more than one unlimited dimension")
|
|
184
232
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
233
|
+
if None in ds_maxshape and val.get("_chunks") == "contiguous":
|
|
234
|
+
raise KeyError(
|
|
235
|
+
"you can not create a contiguous dataset with unlimited dimensions."
|
|
236
|
+
)
|
|
189
237
|
|
|
190
|
-
pkey = PurePosixPath(key)
|
|
191
|
-
var_grp = fid[pkey.parent] if pkey.is_absolute() else fid
|
|
192
|
-
var_name = pkey.name if pkey.is_absolute() else key
|
|
193
238
|
if val["_dtype"] in fid.cmptypes:
|
|
194
239
|
val["_dtype"] = fid.cmptypes[val["_dtype"]]
|
|
195
240
|
|
|
196
241
|
# create the variable
|
|
197
|
-
if
|
|
242
|
+
if val.get("_chunks") == "contiguous":
|
|
198
243
|
dset = var_grp.createVariable(
|
|
199
244
|
var_name,
|
|
200
245
|
val["_dtype"],
|
|
@@ -203,27 +248,32 @@ class NcYaml:
|
|
|
203
248
|
contiguous=True,
|
|
204
249
|
)
|
|
205
250
|
else:
|
|
251
|
+
ds_chunk = val.get("_chunks")
|
|
252
|
+
if ds_chunk is not None and not isinstance(ds_chunk, bool):
|
|
253
|
+
ds_chunk = tuple(ds_chunk)
|
|
206
254
|
if val.get("_vlen"):
|
|
207
255
|
if val["_dtype"] in fid.cmptypes:
|
|
208
256
|
raise ValueError("can not have vlen with compounds")
|
|
209
257
|
val["_dtype"] = fid.createVLType(ds_dtype, val["_dtype"])
|
|
210
258
|
fillvalue = None
|
|
211
|
-
if ds_maxshape == (None,):
|
|
212
|
-
ds_chunk = (16,)
|
|
213
259
|
|
|
214
260
|
dset = var_grp.createVariable(
|
|
215
261
|
var_name,
|
|
216
|
-
val["_dtype"],
|
|
262
|
+
str if val["_dtype"] == "str" else val["_dtype"],
|
|
217
263
|
dimensions=var_dims,
|
|
218
264
|
fill_value=fillvalue,
|
|
219
|
-
contiguous=False,
|
|
220
265
|
compression=compression,
|
|
221
266
|
complevel=complevel,
|
|
222
|
-
chunksizes=
|
|
223
|
-
|
|
224
|
-
),
|
|
267
|
+
chunksizes=ds_chunk,
|
|
268
|
+
contiguous=False,
|
|
225
269
|
)
|
|
226
|
-
dset.setncatts(
|
|
270
|
+
dset.setncatts(
|
|
271
|
+
{
|
|
272
|
+
k: adjust_attr(val["_dtype"], k, v)
|
|
273
|
+
for k, v in val.items()
|
|
274
|
+
if not k.startswith("_")
|
|
275
|
+
}
|
|
276
|
+
)
|
|
227
277
|
|
|
228
278
|
if compounds is not None and val["_dtype"] in compounds:
|
|
229
279
|
if compounds[val["_dtype"]]["units"]:
|
|
@@ -232,9 +282,17 @@ class NcYaml:
|
|
|
232
282
|
dset.attrs["long_name"] = compounds[val["_dtype"]]["names"]
|
|
233
283
|
|
|
234
284
|
@property
|
|
235
|
-
def
|
|
285
|
+
def nc_def(self: NcYaml) -> dict:
|
|
236
286
|
"""Return definition of the HDF5/netCDF4 product."""
|
|
237
|
-
return self.
|
|
287
|
+
return self._nc_def
|
|
288
|
+
|
|
289
|
+
def diskless(self: NcYaml) -> Dataset:
|
|
290
|
+
"""Create a HDF5/netCDF4 file in memory."""
|
|
291
|
+
fid = Dataset("diskless_test.nc", "w", diskless=True, persistent=False)
|
|
292
|
+
self.__groups(fid)
|
|
293
|
+
self.__dimensions(fid)
|
|
294
|
+
self.__variables(fid, self.__compounds(fid))
|
|
295
|
+
return fid
|
|
238
296
|
|
|
239
297
|
def create(self: NcYaml, l1a_name: Path | str) -> None:
|
|
240
298
|
"""Create a HDF5/netCDF4 file (overwrite if exist).
|
|
@@ -251,15 +309,4 @@ class NcYaml:
|
|
|
251
309
|
self.__dimensions(fid)
|
|
252
310
|
self.__variables(fid, self.__compounds(fid))
|
|
253
311
|
except PermissionError as exc:
|
|
254
|
-
raise RuntimeError(f"failed create {l1a_name}") from exc
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
# - test module -------------------------
|
|
258
|
-
def tests() -> None:
|
|
259
|
-
"""..."""
|
|
260
|
-
print("Calling NcYaml")
|
|
261
|
-
NcYaml(files("h5yaml.Data") / "nc_testing.yaml").create("test_yaml.nc")
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
if __name__ == "__main__":
|
|
265
|
-
tests()
|
|
312
|
+
raise RuntimeError(f"failed to create {l1a_name}") from exc
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: h5yaml
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Use YAML configuration file to generate HDF5/netCDF4 formated files.
|
|
5
5
|
Project-URL: Homepage, https://github.com/rmvanhees/h5_yaml
|
|
6
6
|
Project-URL: Source, https://github.com/rmvanhees/h5_yaml
|
|
7
7
|
Project-URL: Issues, https://github.com/rmvanhees/h5_yaml/issues
|
|
8
8
|
Author-email: Richard van Hees <r.m.van.hees@sron.nl>
|
|
9
|
-
License-Expression:
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
10
|
License-File: LICENSE
|
|
11
|
-
Keywords: HDF5,YAML,netCDF4
|
|
11
|
+
Keywords: CF metadata,HDF5,YAML,netCDF4
|
|
12
12
|
Classifier: Development Status :: 5 - Production/Stable
|
|
13
13
|
Classifier: Intended Audience :: Developers
|
|
14
14
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -19,12 +19,15 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
19
19
|
Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.13
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.14
|
|
22
|
-
Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
|
|
23
22
|
Requires-Python: >=3.10
|
|
24
|
-
Requires-Dist: h5py>=3.
|
|
23
|
+
Requires-Dist: h5py>=3.14
|
|
25
24
|
Requires-Dist: netcdf4>=1.7
|
|
26
|
-
Requires-Dist: numpy>=
|
|
25
|
+
Requires-Dist: numpy>=2.0
|
|
27
26
|
Requires-Dist: pyyaml>=6.0
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=9.0; extra == 'dev'
|
|
29
|
+
Provides-Extra: test
|
|
30
|
+
Requires-Dist: pytest-cov>=7.0; extra == 'test'
|
|
28
31
|
Description-Content-Type: text/markdown
|
|
29
32
|
|
|
30
33
|
# H5YAML
|
|
@@ -58,7 +61,19 @@ However, package `netCDF4` has some limitations, which `h5py` has not, for examp
|
|
|
58
61
|
not allow variable-length variables to have a compound data-type.
|
|
59
62
|
|
|
60
63
|
## Installation
|
|
61
|
-
|
|
64
|
+
The package `h5yaml` is available from PyPI. To install it use `pip`:
|
|
65
|
+
|
|
66
|
+
> $ pip install [--user] h5yaml
|
|
67
|
+
|
|
68
|
+
The module `h5yaml` requires Python3.10+ and Python modules: h5py (v3.14+), netCDF4 (v1.7+) and numpy (v2.0+).
|
|
69
|
+
|
|
70
|
+
**Note**: the packages `h5py` and `netCDF4` come with their own HDF5 libraries. If these are different then they may
|
|
71
|
+
collide and result in a *''HDF5 error''*.
|
|
72
|
+
If this is the case then you have to install the development packages of HDF5 and netCDF4 (or compile them from source).
|
|
73
|
+
And reinstall `h5py` and `netCDF4` using the commands:
|
|
74
|
+
|
|
75
|
+
> $ pip uninstall h5py; pip install --no-binary=h5py h5py
|
|
76
|
+
> $ pip uninstall netCDF4; pip install --no-binary=netCDF4 netCDF4
|
|
62
77
|
|
|
63
78
|
## Usage
|
|
64
79
|
|
|
@@ -173,7 +188,7 @@ The YAML file should be structured as follows:
|
|
|
173
188
|
|
|
174
189
|
## Support [TBW]
|
|
175
190
|
|
|
176
|
-
##
|
|
191
|
+
## Road map
|
|
177
192
|
|
|
178
193
|
* Release v0.1 : stable API to read your YAML files and generate the HDF5/netCDF4 file
|
|
179
194
|
|
|
@@ -183,5 +198,5 @@ The code is developed by R.M. van Hees (SRON)
|
|
|
183
198
|
|
|
184
199
|
## License
|
|
185
200
|
|
|
186
|
-
* Copyright: SRON (https://www.sron.nl).
|
|
187
|
-
* License:
|
|
201
|
+
* Copyright: Richard van Hees (SRON) (https://www.sron.nl).
|
|
202
|
+
* License: Apache-2.0
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
h5yaml/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
|
|
2
|
+
h5yaml/conf_from_yaml.py,sha256=GVbWR-I0_sKRxrXmgLxbnTJvAXz5OtFtNYu4Pp3LaaI,1607
|
|
3
|
+
h5yaml/yaml_h5.py,sha256=K9KkdHNDadrrwlR5EJWuzf1yFZcycJLm3EaiM4nLHkw,10112
|
|
4
|
+
h5yaml/yaml_nc.py,sha256=0xJ0dYnIXEhUVZGEkv9GZZu4Xo7K4zXPGiG49j8RHKs,10977
|
|
5
|
+
h5yaml/Data/h5_compound.yaml,sha256=z3dMCJDRAw14boRp0zT74bz_oFi21yu8coUoKOW-d2Q,1131
|
|
6
|
+
h5yaml/Data/h5_testing.yaml,sha256=BSrjenq_L5g8GGoldtU_NbWzLSymwAcvFOh26jtoynM,6247
|
|
7
|
+
h5yaml/Data/h5_unsupported.yaml,sha256=v4HYhiTikFt6UoEUJBnmSse_WeHbmBgqF2e1bCJEfLw,1502
|
|
8
|
+
h5yaml/Data/nc_testing.yaml,sha256=zKDRkYpWVM3_vDpsu-ZxHAGDlITICb_nwjDKsFzPgcQ,5807
|
|
9
|
+
h5yaml/lib/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
|
|
10
|
+
h5yaml/lib/adjust_attr.py,sha256=4dHEGwwIa3a3hihyuSX8jCsC08fYcz_9XWA1pBwiwfc,2284
|
|
11
|
+
h5yaml-0.2.1.dist-info/METADATA,sha256=Z-be-4WMz4o1RY4WiY3o-pnfEq72cdvTkDnmTQUnaJY,7876
|
|
12
|
+
h5yaml-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
13
|
+
h5yaml-0.2.1.dist-info/licenses/LICENSE,sha256=rLarIZOYK5jHuUjMnFbgdI_Tb_4_HAAKSOOIhwiWlE4,11356
|
|
14
|
+
h5yaml-0.2.1.dist-info/RECORD,,
|