h5yaml 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ # YAML
2
+ #
3
+ # Configuration file to test the implementation of classes H5Yaml and NcYaml
4
+ #
5
+ # This file is part of h5_yaml:
6
+ # https://github.com/rmvanhees/h5_yaml.git
7
+ #
8
+ # Copyright (c) 2025 SRON
9
+ # All Rights Reserved
10
+ #
11
+ # License: BSD-3-Clause
12
+ #
13
+ # Define groups
14
+ groups:
15
+ - group_00
16
+ - group_01
17
+ - group_02
18
+
19
+ # Define dimensions
20
+ # Note dimensions with an attribute 'long_name' will also be generated as variable
21
+ dimensions:
22
+ number_of_images:
23
+ _dtype: u2
24
+ _size: 0
25
+ samples_per_image:
26
+ _dtype: u4
27
+ _size: 203500
28
+ column:
29
+ _dtype: u2
30
+ _size: 640
31
+ row:
32
+ _dtype: u2
33
+ _size: 512
34
+ time:
35
+ _dtype: f8
36
+ _size: 0
37
+ _FillValue: -32767
38
+ long_name: Attitude sample time (seconds of day)
39
+ calendar: proleptic_gregorian
40
+ units: seconds since %Y-%m-%d %H:%M:%S
41
+ valid_min: 0
42
+ valid_max: 92400
43
+
44
+ # Define compound types
45
+ # - compound elements must have a data-type, and can have a unit and long_name
46
+ compounds:
47
+ stats_dtype:
48
+ time: [u8, seconds since 1970-01-01T00:00:00, timestamp]
49
+ index: [u2, '1', index]
50
+ tbl_id: [u1, '1', binning id]
51
+ saa: [u1, '1', saa-flag]
52
+ coad: [u1, '1', co-addings]
53
+ texp: [f4, ms, exposure time]
54
+ lat: [f4, degree, latitude]
55
+ lon: [f4, degree, longitude]
56
+ avg: [f4, '1', '$S - S_{ref}$']
57
+ unc: [f4, '1', '\u03c3($S - S_{ref}$)']
58
+ dark_offs: [f4, '1', dark-offset]
59
+
60
+ geo_dtype:
61
+ lat: [f4, latitude]
62
+ lon: [f4, longitude]
63
+
64
+ # Define variables
65
+ variables:
66
+ /group_00/detector_images:
67
+ _dtype: u2
68
+ _dims: [number_of_images, column, row]
69
+ _FillValue: 65535
70
+ long_name: Detector pixel values
71
+ comment: unbinned full-frame data
72
+ units: '1'
73
+ valid_min: 0
74
+ valid_max: 65534
75
+ /group_01/detector_images:
76
+ _dtype: u2
77
+ _dims: [number_of_images, samples_per_image]
78
+ _FillValue: 65535
79
+ _compression: 1
80
+ long_name: Detector pixel values
81
+ comment: variable binned data (filled to the largest samples_per_image)
82
+ units: '1'
83
+ valid_min: 0
84
+ valid_max: 65534
85
+ /group_01/stats:
86
+ _dtype: stats_dtype
87
+ _vlen: True
88
+ _dims: [time]
89
+ comment: detector map statistics
90
+ /group_02/detector_images:
91
+ _dtype: u2
92
+ _dims: [number_of_images]
93
+ _vlen: True
94
+ _FillValue: 65535
95
+ long_name: Detector pixel values
96
+ comment: variable binned (vlen) data
97
+ units: '1'
98
+ valid_min: 0
99
+ valid_max: 65534
@@ -0,0 +1,48 @@
1
+ #
2
+ # This file is part of h5_yaml:
3
+ # https://github.com/rmvanhees/h5_yaml.git
4
+ #
5
+ # Copyright (c) 2025 SRON
6
+ # All Rights Reserved
7
+ #
8
+ # License: BSD-3-Clause
9
+ #
10
+ """Read settings from a YAML file."""
11
+
12
+ from __future__ import annotations
13
+
14
+ __all__ = ["conf_from_yaml"]
15
+
16
+ from typing import TYPE_CHECKING
17
+
18
+ import yaml
19
+
20
+ if TYPE_CHECKING:
21
+ from pathlib import Path
22
+
23
+
24
+ # - main function -----------------------------------
25
+ def conf_from_yaml(file_path: Path) -> dict:
26
+ """Read settings from a YAML file: `file_path`.
27
+
28
+ Parameters
29
+ ----------
30
+ file_path : Path
31
+ full path of YAML file
32
+
33
+ Returns
34
+ -------
35
+ dict
36
+ content of the configuration file
37
+
38
+ """
39
+ if not file_path.is_file():
40
+ raise FileNotFoundError(f"{file_path} not found")
41
+
42
+ with file_path.open("r", encoding="ascii") as fid:
43
+ try:
44
+ settings = yaml.safe_load(fid)
45
+ except yaml.YAMLError as exc:
46
+ raise RuntimeError from exc
47
+
48
+ return settings
@@ -0,0 +1,54 @@
1
+ #
2
+ # This file is part of h5_yaml
3
+ # https://github.com/rmvanhees/h5_yaml.git
4
+ #
5
+ # Copyright (c) 2025 SRON
6
+ # All Rights Reserved
7
+ #
8
+ # License: BSD-3-Clause
9
+ #
10
+ """Obtain chunksizes for HDF5 datasets."""
11
+
12
+ from __future__ import annotations
13
+
14
+ __all__ = ["guess_chunks"]
15
+
16
+ from typing import TYPE_CHECKING
17
+
18
+ if TYPE_CHECKING:
19
+ from numpy.typing import ArrayLike
20
+
21
+
22
+ def guess_chunks(dims: ArrayLike[int], dtype_sz: int) -> str | tuple[int]:
23
+ """Perform an educated guess for the dataset chunk sizes.
24
+
25
+ Parameters
26
+ ----------
27
+ dims : ArrayLike[int]
28
+ Dimensions of the variable
29
+ dtype_sz : int
30
+ The element size of the data-type of the variable
31
+
32
+ Returns
33
+ -------
34
+ "contiguous" or tuple with chunk-sizes
35
+
36
+ """
37
+ fixed_size = dtype_sz
38
+ for val in [x for x in dims if x > 0]:
39
+ fixed_size *= val
40
+
41
+ if 0 in dims: # variable with an unlimited dimension
42
+ udim = dims.index(0)
43
+ else: # variable has no unlimited dimension
44
+ udim = 0
45
+ if fixed_size < 65536:
46
+ return "contiguous"
47
+
48
+ if len(dims) == 1:
49
+ return (1024,)
50
+
51
+ res = list(dims)
52
+ res[udim] = min(1024, (2048 * 1024) // (fixed_size // max(1, dims[0])))
53
+
54
+ return tuple(res)
h5yaml/yaml_h5py.py ADDED
@@ -0,0 +1,259 @@
1
+ #
2
+ # This file is part of h5_yaml
3
+ # https://github.com/rmvanhees/h5_yaml.git
4
+ #
5
+ # Copyright (c) 2025 SRON
6
+ # All Rights Reserved
7
+ #
8
+ # License: BSD-3-Clause
9
+ #
10
+ """Create HDF5/netCDF4 formatted file from a YAML configuration file using h5py."""
11
+
12
+ from __future__ import annotations
13
+
14
+ __all__ = ["H5Yaml"]
15
+
16
+ import logging
17
+ from importlib.resources import files
18
+ from pathlib import Path
19
+
20
+ import h5py
21
+ import numpy as np
22
+
23
+ from h5yaml.conf_from_yaml import conf_from_yaml
24
+ from h5yaml.lib.chunksizes import guess_chunks
25
+
26
+
27
+ # - class definition -----------------------------------
28
+ class H5Yaml:
29
+ """Class to create a HDF5/netCDF4 formated file from a YAML configuration file.
30
+
31
+ Parameters
32
+ ----------
33
+ h5_yaml_fl : Path
34
+ YAML files with the HDF5 format definition
35
+
36
+ """
37
+
38
+ def __init__(self: H5Yaml, h5_yaml_fl: Path) -> None:
39
+ """Construct a H5Yaml instance."""
40
+ self.logger = logging.getLogger("h5yaml.H5Yaml")
41
+
42
+ try:
43
+ self._h5_def = conf_from_yaml(h5_yaml_fl)
44
+ except RuntimeError as exc:
45
+ raise RuntimeError from exc
46
+
47
+ self.yaml_dir = h5_yaml_fl.parent
48
+
49
+ def __groups(self: H5Yaml, fid: h5py.File) -> None:
50
+ """Create groups in HDF5 product."""
51
+ for key in self.h5_def["groups"]:
52
+ _ = fid.create_group(key)
53
+
54
+ def __dimensions(self: H5Yaml, fid: h5py.File) -> None:
55
+ """Add dimensions to HDF5 product."""
56
+ for key, value in self.h5_def["dimensions"].items():
57
+ fillvalue = None
58
+ if "_FillValue" in value:
59
+ fillvalue = (
60
+ np.nan if value["_FillValue"] == "NaN" else int(value["_FillValue"])
61
+ )
62
+
63
+ if value["_size"] == 0:
64
+ ds_chunk = value.get("_chunks", (50,))
65
+ dset = fid.create_dataset(
66
+ key,
67
+ shape=(0,),
68
+ dtype=(
69
+ h5py.string_dtype()
70
+ if value["_dtype"] == "str"
71
+ else value["_dtype"]
72
+ ),
73
+ chunks=ds_chunk if isinstance(ds_chunk, tuple) else tuple(ds_chunk),
74
+ maxshape=(None,),
75
+ fillvalue=fillvalue,
76
+ )
77
+ else:
78
+ dset = fid.create_dataset(
79
+ key,
80
+ shape=(value["_size"],),
81
+ dtype=value["_dtype"],
82
+ )
83
+ if "_values" in value:
84
+ dset[:] = value["_values"]
85
+
86
+ dset.make_scale(
87
+ Path(key).name
88
+ if "long_name" in value
89
+ else "This is a netCDF dimension but not a netCDF variable."
90
+ )
91
+ for attr, attr_val in value.items():
92
+ if attr.startswith("_"):
93
+ continue
94
+ dset.attrs[attr] = attr_val
95
+
96
+ def __compounds(self: H5Yaml, fid: h5py.File) -> dict[str, str | int | float]:
97
+ """Add compound datatypes to HDF5 product."""
98
+ if "compounds" not in self.h5_def:
99
+ return {}
100
+
101
+ compounds = {}
102
+ if isinstance(self.h5_def["compounds"], list):
103
+ file_list = self.h5_def["compounds"].copy()
104
+ self.h5_def["compounds"] = {}
105
+ for name in file_list:
106
+ if not (yaml_fl := self.yaml_dir / name).is_file():
107
+ continue
108
+ try:
109
+ res = conf_from_yaml(yaml_fl)
110
+ except RuntimeError as exc:
111
+ raise RuntimeError from exc
112
+ for key, value in res.items():
113
+ self.h5_def["compounds"][key] = value
114
+
115
+ for key, value in self.h5_def["compounds"].items():
116
+ compounds[key] = {
117
+ "dtype": [],
118
+ "units": [],
119
+ "names": [],
120
+ }
121
+
122
+ for _key, _val in value.items():
123
+ compounds[key]["dtype"].append((_key, _val[0]))
124
+ if len(_val) == 3:
125
+ compounds[key]["units"].append(_val[1])
126
+ compounds[key]["names"].append(_val[2] if len(_val) == 3 else _val[1])
127
+
128
+ fid[key] = np.dtype(compounds[key]["dtype"])
129
+
130
+ return compounds
131
+
132
+ def __variables(
133
+ self: H5Yaml, fid: h5py.File, compounds: dict[str, str | int | float] | None
134
+ ) -> None:
135
+ """Add datasets to HDF5 product.
136
+
137
+ Parameters
138
+ ----------
139
+ fid : h5py.File
140
+ HDF5 file pointer (mode 'r+')
141
+ compounds : dict[str, str | int | float]
142
+ Definition of the compound(s) in the product
143
+
144
+ """
145
+ for key, val in self.h5_def["variables"].items():
146
+ if val["_dtype"] in fid:
147
+ ds_dtype = fid[val["_dtype"]]
148
+ dtype_size = fid[val["_dtype"]].dtype.itemsize
149
+ else:
150
+ ds_dtype = val["_dtype"]
151
+ dtype_size = np.dtype(val["_dtype"]).itemsize
152
+
153
+ fillvalue = None
154
+ if "_FillValue" in val:
155
+ fillvalue = (
156
+ np.nan if val["_FillValue"] == "NaN" else int(val["_FillValue"])
157
+ )
158
+
159
+ compression = None
160
+ shuffle = False
161
+ # currently only gzip compression is supported
162
+ if "_compression" in val:
163
+ compression = val["_compression"]
164
+ shuffle = True
165
+
166
+ n_udim = 0
167
+ ds_shape = ()
168
+ ds_maxshape = ()
169
+ for coord in val["_dims"]:
170
+ dim_sz = fid[coord].size
171
+ n_udim += int(dim_sz == 0)
172
+ ds_shape += (dim_sz,)
173
+ ds_maxshape += (dim_sz if dim_sz > 0 else None,)
174
+
175
+ # currently, we can not handle more than one unlimited dimension
176
+ if n_udim > 1:
177
+ raise ValueError("more than one unlimited dimension")
178
+
179
+ # obtain chunk-size settings
180
+ ds_chunk = (
181
+ val["_chunks"]
182
+ if "_chunks" in val
183
+ else guess_chunks(ds_shape, dtype_size)
184
+ )
185
+
186
+ # create the variable
187
+ if ds_chunk == "contiguous":
188
+ dset = fid.create_dataset(
189
+ key,
190
+ ds_shape,
191
+ dtype=ds_dtype,
192
+ chunks=None,
193
+ maxshape=None,
194
+ fillvalue=fillvalue,
195
+ )
196
+ else:
197
+ if val.get("_vlen"):
198
+ ds_dtype = h5py.vlen_dtype(ds_dtype)
199
+ fillvalue = None
200
+ if ds_maxshape == (None,):
201
+ ds_chunk = (16,)
202
+
203
+ dset = fid.create_dataset(
204
+ key,
205
+ ds_shape,
206
+ dtype=ds_dtype,
207
+ chunks=ds_chunk if isinstance(ds_chunk, tuple) else tuple(ds_chunk),
208
+ maxshape=ds_maxshape,
209
+ fillvalue=fillvalue,
210
+ compression=compression,
211
+ shuffle=shuffle,
212
+ )
213
+
214
+ for ii, coord in enumerate(val["_dims"]):
215
+ dset.dims[ii].attach_scale(fid[coord])
216
+
217
+ for attr, attr_val in val.items():
218
+ if attr.startswith("_"):
219
+ continue
220
+ dset.attrs[attr] = attr_val
221
+
222
+ if compounds is not None and val["_dtype"] in compounds:
223
+ if compounds[val["_dtype"]]["units"]:
224
+ dset.attrs["units"] = compounds[val["_dtype"]]["units"]
225
+ if compounds[val["_dtype"]]["names"]:
226
+ dset.attrs["long_name"] = compounds[val["_dtype"]]["names"]
227
+
228
+ @property
229
+ def h5_def(self: H5Yaml) -> dict:
230
+ """Return definition of the HDF5/netCDF4 product."""
231
+ return self._h5_def
232
+
233
+ def create(self: H5Yaml, l1a_name: Path | str) -> None:
234
+ """Create a HDF5/netCDF4 file (overwrite if exist).
235
+
236
+ Parameters
237
+ ----------
238
+ l1a_name : Path | str
239
+ Full name of the HDF5/netCDF4 file to be generated
240
+
241
+ """
242
+ try:
243
+ with h5py.File(l1a_name, "w") as fid:
244
+ self.__groups(fid)
245
+ self.__dimensions(fid)
246
+ self.__variables(fid, self.__compounds(fid))
247
+ except PermissionError as exc:
248
+ raise RuntimeError(f"failed create {l1a_name}") from exc
249
+
250
+
251
+ # - test module -------------------------
252
+ def tests() -> None:
253
+ """..."""
254
+ print("Calling H5Yaml")
255
+ H5Yaml(files("h5yaml.Data") / "h5_testing.yaml").create("test_yaml.h5")
256
+
257
+
258
+ if __name__ == "__main__":
259
+ tests()
h5yaml/yaml_nc.py ADDED
@@ -0,0 +1,238 @@
1
+ #
2
+ # This file is part of h5_yaml:
3
+ # https://github.com/rmvanhees/h5_yaml.git
4
+ #
5
+ # Copyright (c) 2025 SRON
6
+ # All Rights Reserved
7
+ #
8
+ # License: BSD-3-Clause
9
+ #
10
+ """Create HDF5/netCDF4 formatted file from a YAML configuration file using netCDF4."""
11
+
12
+ from __future__ import annotations
13
+
14
+ __all__ = ["NcYaml"]
15
+
16
+ import logging
17
+ from importlib.resources import files
18
+ from typing import TYPE_CHECKING
19
+
20
+ import numpy as np
21
+
22
+ # pylint: disable=no-name-in-module
23
+ from netCDF4 import Dataset
24
+
25
+ from h5yaml.conf_from_yaml import conf_from_yaml
26
+ from h5yaml.lib.chunksizes import guess_chunks
27
+
28
+ if TYPE_CHECKING:
29
+ from pathlib import Path
30
+
31
+
32
+ # - class definition -----------------------------------
33
+ class NcYaml:
34
+ """Class to create a HDF5/netCDF4 formated file from a YAML configuration file."""
35
+
36
+ def __init__(self: NcYaml, h5_yaml_fl: Path) -> None:
37
+ """Construct a NcYaml instance."""
38
+ self.logger = logging.getLogger("h5yaml.NcYaml")
39
+
40
+ try:
41
+ self._h5_def = conf_from_yaml(h5_yaml_fl)
42
+ except RuntimeError as exc:
43
+ raise RuntimeError from exc
44
+
45
+ self.yaml_dir = h5_yaml_fl.parent
46
+
47
+ def __groups(self: NcYaml, fid: Dataset) -> None:
48
+ """Create groups in HDF5 product."""
49
+ for key in self.h5_def["groups"]:
50
+ _ = fid.createGroup(key)
51
+
52
+ def __dimensions(self: NcYaml, fid: Dataset) -> None:
53
+ """Add dimensions to HDF5 product."""
54
+ for key, value in self.h5_def["dimensions"].items():
55
+ _ = fid.createDimension(key, value["_size"])
56
+
57
+ if "long_name" not in value:
58
+ continue
59
+
60
+ fillvalue = None
61
+ if "_FillValue" in value:
62
+ fillvalue = (
63
+ np.nan if value["_FillValue"] == "NaN" else int(value["_FillValue"])
64
+ )
65
+
66
+ dset = fid.createVariable(
67
+ key,
68
+ value["_dtype"],
69
+ dimensions=(key,),
70
+ fill_value=fillvalue,
71
+ contiguous=value["_size"] != 0,
72
+ )
73
+ dset.setncatts({k: v for k, v in value.items() if not k.startswith("_")})
74
+
75
+ def __compounds(self: NcYaml, fid: Dataset) -> dict[str, str | int | float]:
76
+ """Add compound datatypes to HDF5 product."""
77
+ if "compounds" not in self.h5_def:
78
+ return {}
79
+
80
+ compounds = {}
81
+ if isinstance(self.h5_def["compounds"], list):
82
+ file_list = self.h5_def["compounds"].copy()
83
+ self.h5_def["compounds"] = {}
84
+ for name in file_list:
85
+ if not (yaml_fl := self.yaml_dir / name).is_file():
86
+ continue
87
+ try:
88
+ res = conf_from_yaml(yaml_fl)
89
+ except RuntimeError as exc:
90
+ raise RuntimeError from exc
91
+ for key, value in res.items():
92
+ self.h5_def["compounds"][key] = value
93
+
94
+ for key, value in self.h5_def["compounds"].items():
95
+ compounds[key] = {
96
+ "dtype": [],
97
+ "units": [],
98
+ "names": [],
99
+ }
100
+
101
+ for _key, _val in value.items():
102
+ compounds[key]["dtype"].append((_key, _val[0]))
103
+ if len(_val) == 3:
104
+ compounds[key]["units"].append(_val[1])
105
+ compounds[key]["names"].append(_val[2] if len(_val) == 3 else _val[1])
106
+
107
+ comp_t = np.dtype(compounds[key]["dtype"])
108
+ _ = fid.createCompoundType(comp_t, key)
109
+
110
+ return compounds
111
+
112
+ def __variables(
113
+ self: NcYaml,
114
+ fid: Dataset,
115
+ compounds: dict[str, str | int | float] | None,
116
+ ) -> None:
117
+ """Add datasets to HDF5 product.
118
+
119
+ Parameters
120
+ ----------
121
+ fid : netCDF4.Dataset
122
+ HDF5 file pointer (mode 'r+')
123
+ compounds : dict[str, str | int | float]
124
+ Definition of the compound(s) in the product
125
+
126
+ """
127
+ for key, val in self.h5_def["variables"].items():
128
+ if val["_dtype"] in fid.cmptypes:
129
+ ds_dtype = fid.cmptypes[val["_dtype"]].dtype
130
+ sz_dtype = ds_dtype.itemsize
131
+ else:
132
+ ds_dtype = val["_dtype"]
133
+ sz_dtype = np.dtype(val["_dtype"]).itemsize
134
+
135
+ fillvalue = None
136
+ if "_FillValue" in val:
137
+ fillvalue = (
138
+ np.nan if val["_FillValue"] == "NaN" else int(val["_FillValue"])
139
+ )
140
+
141
+ compression = None
142
+ complevel = 0
143
+ # currently only gzip compression is supported
144
+ if "_compression" in val:
145
+ compression = "zlib"
146
+ complevel = val["_compression"]
147
+
148
+ n_udim = 0
149
+ ds_shape = ()
150
+ ds_maxshape = ()
151
+ for coord in val["_dims"]:
152
+ dim_sz = fid.dimensions[coord].size
153
+ n_udim += int(dim_sz == 0)
154
+ ds_shape += (dim_sz,)
155
+ ds_maxshape += (dim_sz if dim_sz > 0 else None,)
156
+
157
+ # currently, we can not handle more than one unlimited dimension
158
+ if n_udim > 1:
159
+ raise ValueError("more than one unlimited dimension")
160
+
161
+ # obtain chunk-size settings
162
+ ds_chunk = (
163
+ val["_chunks"] if "_chunks" in val else guess_chunks(ds_shape, sz_dtype)
164
+ )
165
+
166
+ # create the variable
167
+ if ds_chunk == "contiguous":
168
+ dset = fid.createVariable(
169
+ key,
170
+ val["_dtype"],
171
+ dimensions=(key,),
172
+ fill_value=fillvalue,
173
+ contiguous=True,
174
+ )
175
+ else:
176
+ if val.get("_vlen"):
177
+ if val["_dtype"] in fid.cmptypes:
178
+ raise ValueError("can not have vlen with compounds")
179
+ val["_dtype"] = fid.createVLType(ds_dtype, val["_dtype"])
180
+ fillvalue = None
181
+ if ds_maxshape == (None,):
182
+ ds_chunk = (16,)
183
+
184
+ if key in fid.cmptypes:
185
+ val["_dtype"] = fid.cmptypes[key]
186
+
187
+ dset = fid.createVariable(
188
+ key,
189
+ val["_dtype"],
190
+ dimensions=val["_dims"],
191
+ fill_value=fillvalue,
192
+ contiguous=False,
193
+ compression=compression,
194
+ complevel=complevel,
195
+ chunksizes=(
196
+ ds_chunk if isinstance(ds_chunk, tuple) else tuple(ds_chunk)
197
+ ),
198
+ )
199
+ dset.setncatts({k: v for k, v in val.items() if not k.startswith("_")})
200
+
201
+ if compounds is not None and val["_dtype"] in compounds:
202
+ if compounds[val["_dtype"]]["units"]:
203
+ dset.attrs["units"] = compounds[val["_dtype"]]["units"]
204
+ if compounds[val["_dtype"]]["names"]:
205
+ dset.attrs["long_name"] = compounds[val["_dtype"]]["names"]
206
+
207
+ @property
208
+ def h5_def(self: NcYaml) -> dict:
209
+ """Return definition of the HDF5/netCDF4 product."""
210
+ return self._h5_def
211
+
212
+ def create(self: NcYaml, l1a_name: Path | str) -> None:
213
+ """Create a HDF5/netCDF4 file (overwrite if exist).
214
+
215
+ Parameters
216
+ ----------
217
+ l1a_name : Path | str
218
+ Full name of the HDF5/netCDF4 file to be generated
219
+
220
+ """
221
+ try:
222
+ with Dataset(l1a_name, "w") as fid:
223
+ self.__groups(fid)
224
+ self.__dimensions(fid)
225
+ self.__variables(fid, self.__compounds(fid))
226
+ except PermissionError as exc:
227
+ raise RuntimeError(f"failed create {l1a_name}") from exc
228
+
229
+
230
+ # - test module -------------------------
231
+ def tests() -> None:
232
+ """..."""
233
+ print("Calling NcYaml")
234
+ NcYaml(files("h5yaml.Data") / "h5_testing.yaml").create("test_yaml.nc")
235
+
236
+
237
+ if __name__ == "__main__":
238
+ tests()
@@ -0,0 +1,166 @@
1
+ Metadata-Version: 2.4
2
+ Name: h5yaml
3
+ Version: 0.0.3
4
+ Summary: Use YAML configuration file to generate HDF5/netCDF4 formated files.
5
+ Project-URL: Homepage, https://github.com/rmvanhees/h5_yaml
6
+ Project-URL: Source, https://github.com/rmvanhees/h5_yaml
7
+ Project-URL: Issues, https://github.com/rmvanhees/h5_yaml/issues
8
+ Author-email: Richard van Hees <r.m.van.hees@sron.nl>
9
+ License-Expression: BSD-3-Clause
10
+ License-File: LICENSE
11
+ Keywords: HDF5,YAML,netCDF4
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
20
+ Requires-Python: >=3.12
21
+ Requires-Dist: h5py>=3.13
22
+ Requires-Dist: netcdf4>=1.7
23
+ Requires-Dist: numpy>=2.2
24
+ Requires-Dist: pyyaml>=6.0
25
+ Description-Content-Type: text/markdown
26
+
27
+ # H5_YAML
28
+
29
+ ## Description
30
+ Use YAML configuration file to generate HDF5/netCDF4 formated files.
31
+
32
+ The class `NcYaml` must be used when strict conformance to the netCDF4 format is
33
+ required. However, the python netCDF4 implementation does not allow variable-length
34
+ data to have a compound data-type. The class `H5Yaml` does not have this restiction
35
+ and will generate HDF5 formated files which can be read by netCDF4 software.
36
+
37
+ ## Installation
38
+ Relases of the code, starting from version 0.1, will be made available via PyPi.
39
+
40
+ ## Usage
41
+
42
+ The YAML file should be structured as follows:
43
+
44
+ * The top level are: 'groups', 'dimensions', 'compounds' and 'variables'
45
+ * The section 'groups' are optional, but you should provide each group you want to use
46
+ in your file. The 'groups' section in the YAML file may look like this:
47
+
48
+ ```
49
+ groups:
50
+ - engineering_data
51
+ - image_attributes
52
+ - navigation_data
53
+ - processing_control
54
+ - science_data
55
+ ```
56
+
57
+ * The section 'dimensions' is obligatory, you shouold define the dimensions for each
58
+ variable in your file. The 'dimensions' section may look like this:
59
+
60
+ ```
61
+ dimensions:
62
+ days:
63
+ _dtype: u4
64
+ _size: 0
65
+ long_name: days since 2024-01-01 00:00:00Z
66
+ number_of_images: # an unlimited dimension
67
+ _dtype: u2
68
+ _size: 0
69
+ samples_per_image: # a fixed dimension
70
+ _dtype: u4
71
+ _size: 307200
72
+ /navigation_data/att_time: # an unlimited dimension in a group with attributes
73
+ _dtype: f8
74
+ _size: 0
75
+ _FillValue: -32767
76
+ long_name: Attitude sample time (seconds of day)
77
+ calendar: proleptic_gregorian
78
+ units: seconds since %Y-%m-%d %H:%M:%S
79
+ valid_min: 0
80
+ valid_max: 92400
81
+ n_viewport: # a fixed dimension with fixed values and attributes
82
+ _dtype: i2
83
+ _size: 5
84
+ _values: [-50, -20, 0, 20, 50]
85
+ long_name: along-track view angles at sensor
86
+ units: degrees
87
+ ```
88
+
89
+ * The 'compounds' are optional, but you should provide each compound data-type which
90
+ you want to use in your file. For each compound element you have to provide its
91
+ data-type and attributes: units and long_name. The 'compound' section may look like
92
+ this:
93
+
94
+ ```
95
+ compounds:
96
+ stats_dtype:
97
+ time: [u8, seconds since 1970-01-01T00:00:00, timestamp]
98
+ index: [u2, '1', index]
99
+ tbl_id: [u1, '1', binning id]
100
+ saa: [u1, '1', saa-flag]
101
+ coad: [u1, '1', co-addings]
102
+ texp: [f4, ms, exposure time]
103
+ lat: [f4, degree, latitude]
104
+ lon: [f4, degree, longitude]
105
+ avg: [f4, '1', '$S - S_{ref}$']
106
+ unc: [f4, '1', '\u03c3($S - S_{ref}$)']
107
+ dark_offs: [f4, '1', dark-offset]
108
+ ```
109
+
110
+ Alternatively, provide a list with names of YAML files which contain the definitions
111
+ of the compounds.
112
+
113
+ compounds:
114
+ - h5_nomhk_tm.yaml
115
+ - h5_science_hk.yaml
116
+
117
+ * The 'variables' are defined by their data-type ('_dtype') and dimensions ('_dims'),
118
+ and optionally chunk sizes ('_chunks'), compression ('_compression'), variable length
119
+ ('_vlen'). In addition, each variable can have as many attributes as you like,
120
+ defined by its name and value. The 'variables' section may look like this:
121
+
122
+ ```
123
+ variables:
124
+ /image_attributes/nr_coadditions:
125
+ _dtype: u2
126
+ _dims: [number_of_images]
127
+ _FillValue: 0
128
+ long_name: Number of coadditions
129
+ units: '1'
130
+ valid_min: 1
131
+ /image_attributes/exposure_time:
132
+ _dtype: f8
133
+ _dims: [number_of_images]
134
+ _FillValue: -32767
135
+ long_name: Exposure time
136
+ units: seconds
137
+ stats_163:
138
+ _dtype: stats_dtype
139
+ _vlen: True
140
+ _dims: [days]
141
+ comment: detector map statistics (MPS=163)
142
+ ```
143
+
144
+ ### Notes and ToDo:
145
+
146
+ * The usage of older versions of h5py may result in broken netCDF4 files
147
+ * Explain usage of parameter '_chunks', which is currently not correcly implemented.
148
+ * Explain that the usage of variable length data-sets may break netCDF4 compatibility
149
+
150
+ ## Support [TBW]
151
+
152
+ ## Roadmap
153
+
154
+ * Release v0.1 : stable API to read your YAML files and generate the HDF5/netCDF4 file
155
+
156
+
157
+ ## Authors and acknowledgment
158
+ The code is developed by R.M. van Hees (SRON)
159
+
160
+ ## License
161
+
162
+ * Copyright: SRON (https://www.sron.nl).
163
+ * License: BSD-3-clause
164
+
165
+ ## Project status
166
+ Beta
@@ -0,0 +1,9 @@
1
+ h5yaml/conf_from_yaml.py,sha256=FT5oS4yqDFUYZqgria_OrmMK52ZcrTVoAPyPkLrHstc,982
2
+ h5yaml/yaml_h5py.py,sha256=m5vOdVour3FwnUjewGukIra6C_c0F61yqnsvUJN-KtM,8591
3
+ h5yaml/yaml_nc.py,sha256=KG2y497If2tJmSSmF6bvXl7ePKorptEUEJULPexBIQw,7980
4
+ h5yaml/Data/h5_testing.yaml,sha256=_x3qBC8RNQ1h4c6B1JOH_y0L9DiDLsOEfyr60IcvpoI,2358
5
+ h5yaml/lib/chunksizes.py,sha256=aOXkLqTk5GgE-uk80QqHYbLB-FzBMnOrMm6ixH4QAUc,1225
6
+ h5yaml-0.0.3.dist-info/METADATA,sha256=CQm7-Uxop-K7v5-WotW6DiKFPtYJXwEi9OJfJKC0o7I,5370
7
+ h5yaml-0.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
8
+ h5yaml-0.0.3.dist-info/licenses/LICENSE,sha256=MoOwtPnC77nFaIwRIAIE6fKhrzMd3G18mOXDPtAH8G0,1509
9
+ h5yaml-0.0.3.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,29 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2019-2025 SRON
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ * Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ * Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ * Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.