h5yaml 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,18 +19,19 @@
19
19
  # See the License for the specific language governing permissions and
20
20
  # limitations under the License.
21
21
  #
22
- stats_dtype:
23
- time:
24
- - u8
25
- - seconds since 1970-01-01T00:00:00
26
- - timestamp
27
- index: [u2, '1', index]
28
- tbl_id: [u1, '1', binning id]
29
- saa: [u1, '1', saa-flag]
30
- coad: [u1, '1', co-addings]
31
- texp: [f4, ms, exposure time]
32
- lat: [f4, degree, latitude]
33
- lon: [f4, degree, longitude]
34
- avg: [f4, '1', '$S - S_{ref}$']
35
- unc: [f4, '1', '\u03c3($S - S_{ref}$)']
36
- dark_offs: [f4, '1', dark-offset]
22
+ compounds:
23
+ stats_dtype:
24
+ time:
25
+ - u8
26
+ - seconds since 1970-01-01T00:00:00
27
+ - timestamp
28
+ index: [u2, '1', index]
29
+ tbl_id: [u1, '1', binning id]
30
+ saa: [u1, '1', saa-flag]
31
+ coad: [u1, '1', co-addings]
32
+ texp: [f4, ms, exposure time]
33
+ lat: [f4, degree, latitude]
34
+ lon: [f4, degree, longitude]
35
+ avg: [f4, '1', '$S - S_{ref}$']
36
+ unc: [f4, '1', '\u03c3($S - S_{ref}$)']
37
+ dark_offs: [f4, '1', dark-offset]
@@ -25,6 +25,8 @@ groups:
25
25
  - group_01
26
26
  - group_02
27
27
  - group_03
28
+ - processing_control
29
+ - processing_control/input_parameters
28
30
 
29
31
  # Define dimensions
30
32
  # Note dimensions with an attribute 'long_name' will also be generated as variable
@@ -91,6 +93,26 @@ variables:
91
93
  units: '1'
92
94
  valid_min: 0
93
95
  valid_max: 65534
96
+ /group_00/detector_images_chunked:
97
+ _dtype: u2
98
+ _dims: [number_of_images, column, row]
99
+ _FillValue: 65535
100
+ _chunks: [1, 640, 512]
101
+ long_name: Detector pixel values
102
+ comment: unbinned full-frame data
103
+ units: '1'
104
+ valid_min: 0
105
+ valid_max: 65534
106
+ /group_00/detector_images_autochunk:
107
+ _dtype: u2
108
+ _dims: [number_of_images, column, row]
109
+ _chunks: True
110
+ _FillValue: 65535
111
+ long_name: Detector pixel values
112
+ comment: unbinned full-frame data
113
+ units: '1'
114
+ valid_min: 0
115
+ valid_max: 65534
94
116
  # ---------- GROUP 01 ----------
95
117
  /group_01/detector_images:
96
118
  _dtype: u2
@@ -199,7 +221,7 @@ variables:
199
221
  valid_max: 999.9
200
222
  /group_03/ds_10:
201
223
  _dtype: f4
202
- _dims: [number_of_images]
224
+ _dims: [number_of_images, /group_03/viewport]
203
225
  long_name: float dataset
204
226
  units: '1'
205
227
  valid_min: -999.9
@@ -39,12 +39,6 @@ dimensions:
39
39
  valid_min: 0
40
40
  valid_max: 92400
41
41
 
42
- # Define compound types
43
- # - compound elements must have a data-type, and can have a unit and long_name
44
- compounds:
45
- - h5_compound.yaml
46
- - h5_nocompound.yaml
47
-
48
42
  # Define variables
49
43
  variables:
50
44
  ds_01:
@@ -25,6 +25,8 @@ groups:
25
25
  - group_01
26
26
  - group_02
27
27
  - group_03
28
+ - processing_control
29
+ - processing_control/input_parameters
28
30
 
29
31
  # Define dimensions
30
32
  # Note dimensions with an attribute 'long_name' will also be generated as variable
@@ -87,6 +89,16 @@ variables:
87
89
  units: '1'
88
90
  valid_min: 0
89
91
  valid_max: 65534
92
+ /group_00/detector_images_chunked:
93
+ _dtype: u2
94
+ _dims: [number_of_images, column, row]
95
+ _FillValue: 65535
96
+ _chunks: [1, 640, 512]
97
+ long_name: Detector pixel values
98
+ comment: unbinned full-frame data
99
+ units: '1'
100
+ valid_min: 0
101
+ valid_max: 65534
90
102
  # ---------- GROUP 01 ----------
91
103
  /group_01/detector_images:
92
104
  _dtype: u2
@@ -190,7 +202,7 @@ variables:
190
202
  # valid_max: 999.9
191
203
  /group_03/ds_10:
192
204
  _dtype: f4
193
- _dims: [number_of_images]
205
+ _dims: [number_of_images, /group_03/viewport]
194
206
  long_name: float dataset
195
207
  units: '1'
196
208
  valid_min: -999.9
h5yaml/conf_from_yaml.py CHANGED
@@ -45,7 +45,7 @@ def conf_from_yaml(file_path: Path | str) -> dict:
45
45
 
46
46
  """
47
47
  if isinstance(file_path, str):
48
- file_path = Path(str)
48
+ file_path = Path(file_path)
49
49
 
50
50
  if not file_path.is_file():
51
51
  raise FileNotFoundError(f"{file_path} not found")
@@ -53,7 +53,7 @@ def conf_from_yaml(file_path: Path | str) -> dict:
53
53
  with file_path.open("r", encoding="ascii") as fid:
54
54
  try:
55
55
  settings = yaml.safe_load(fid)
56
- except yaml.YAMLError as exc:
57
- raise RuntimeError from exc
56
+ except yaml.parser.ParserError as exc:
57
+ raise RuntimeError(f"Failed to parse {file_path}") from exc
58
58
 
59
59
  return settings
h5yaml/yaml_h5.py CHANGED
@@ -32,7 +32,6 @@ import numpy as np
32
32
 
33
33
  from .conf_from_yaml import conf_from_yaml
34
34
  from .lib.adjust_attr import adjust_attr
35
- from .lib.chunksizes import guess_chunks
36
35
 
37
36
 
38
37
  # - class definition -----------------------------------
@@ -41,30 +40,74 @@ class H5Yaml:
41
40
 
42
41
  Parameters
43
42
  ----------
44
- h5_yaml_fl : Path
43
+ h5_yaml_fl : Path | str | list[Path | str]
45
44
  YAML files with the HDF5 format definition
46
45
 
47
46
  """
48
47
 
49
- def __init__(self: H5Yaml, h5_yaml_fl: Path) -> None:
48
+ def __init__(self: H5Yaml, h5_yaml_fl: Path | str | list[Path | str]) -> None:
50
49
  """Construct a H5Yaml instance."""
51
50
  self.logger = logging.getLogger("h5yaml.H5Yaml")
51
+ self._h5_def = {
52
+ "groups": set(),
53
+ "attrs_global": {},
54
+ "attrs_groups": {},
55
+ "compounds": {},
56
+ "dimensions": {},
57
+ "variables": {},
58
+ }
59
+
60
+ for yaml_fl in h5_yaml_fl if isinstance(h5_yaml_fl, list) else [h5_yaml_fl]:
61
+ try:
62
+ config = conf_from_yaml(yaml_fl)
63
+ except RuntimeError as exc:
64
+ raise RuntimeError from exc
65
+
66
+ for key in self._h5_def:
67
+ if key in config:
68
+ self._h5_def[key] |= (
69
+ set(config[key]) if key == "groups" else config[key]
70
+ )
71
+
72
+ def __attrs(self: H5Yaml, fid: h5py.File) -> None:
73
+ """Create global and group attributes.
52
74
 
53
- try:
54
- self._h5_def = conf_from_yaml(h5_yaml_fl)
55
- except RuntimeError as exc:
56
- raise RuntimeError from exc
75
+ Parameters
76
+ ----------
77
+ fid : h5py.File
78
+ HDF5 file pointer (mode 'r+')
79
+
80
+ """
81
+ for key, value in self._h5_def["attrs_global"].items():
82
+ if key not in fid.attrs and value != "TBW":
83
+ fid.attrs[key] = value
57
84
 
58
- self.yaml_dir = h5_yaml_fl.parent
85
+ for key, value in self._h5_def["attrs_groups"].items():
86
+ if key not in fid.attrs and value != "TBW":
87
+ fid[str(Path(key).parent)].attrs[Path(key).name] = value
59
88
 
60
89
  def __groups(self: H5Yaml, fid: h5py.File) -> None:
61
- """Create groups in HDF5 product."""
62
- for key in self.h5_def["groups"]:
63
- _ = fid.create_group(key)
90
+ """Create groups in HDF5 product.
91
+
92
+ Parameters
93
+ ----------
94
+ fid : h5py.File
95
+ HDF5 file pointer (mode 'r+')
96
+
97
+ """
98
+ for key in self._h5_def["groups"]:
99
+ _ = fid.require_group(key)
64
100
 
65
101
  def __dimensions(self: H5Yaml, fid: h5py.File) -> None:
66
- """Add dimensions to HDF5 product."""
67
- for key, val in self.h5_def["dimensions"].items():
102
+ """Add dimensions to HDF5 product.
103
+
104
+ Parameters
105
+ ----------
106
+ fid : h5py.File
107
+ HDF5 file pointer (mode 'r+')
108
+
109
+ """
110
+ for key, val in self._h5_def["dimensions"].items():
68
111
  fillvalue = None
69
112
  if "_FillValue" in val:
70
113
  fillvalue = (
@@ -72,12 +115,14 @@ class H5Yaml:
72
115
  )
73
116
 
74
117
  if val["_size"] == 0:
75
- ds_chunk = val.get("_chunks", (50,))
118
+ ds_chunk = val.get("_chunks")
119
+ if ds_chunk is not None and not isinstance(ds_chunk, bool):
120
+ ds_chunk = tuple(ds_chunk)
76
121
  dset = fid.create_dataset(
77
122
  key,
78
123
  shape=(0,),
79
124
  dtype="T" if val["_dtype"] == "str" else val["_dtype"],
80
- chunks=ds_chunk if isinstance(ds_chunk, tuple) else tuple(ds_chunk),
125
+ chunks=ds_chunk,
81
126
  maxshape=(None,),
82
127
  fillvalue=fillvalue,
83
128
  )
@@ -101,62 +146,32 @@ class H5Yaml:
101
146
  if not attr.startswith("_"):
102
147
  dset.attrs[attr] = adjust_attr(val["_dtype"], attr, attr_val)
103
148
 
104
- def __compounds(self: H5Yaml, fid: h5py.File) -> dict[str, str | int | float]:
105
- """Add compound datatypes to HDF5 product."""
106
- if "compounds" not in self.h5_def:
107
- return {}
108
-
109
- compounds = {}
110
- if isinstance(self.h5_def["compounds"], list):
111
- file_list = self.h5_def["compounds"].copy()
112
- self.h5_def["compounds"] = {}
113
- for name in file_list:
114
- if not (yaml_fl := self.yaml_dir / name).is_file():
115
- continue
116
- try:
117
- res = conf_from_yaml(yaml_fl)
118
- except RuntimeError as exc:
119
- raise RuntimeError from exc
120
- for key, value in res.items():
121
- self.h5_def["compounds"][key] = value
122
-
123
- for key, val in self.h5_def["compounds"].items():
124
- compounds[key] = {
125
- "dtype": [],
126
- "units": [],
127
- "names": [],
128
- }
129
-
130
- for _key, _val in val.items():
131
- compounds[key]["dtype"].append((_key, _val[0]))
132
- if len(_val) == 3:
133
- compounds[key]["units"].append(_val[1])
134
- compounds[key]["names"].append(_val[2] if len(_val) == 3 else _val[1])
135
-
136
- fid[key] = np.dtype(compounds[key]["dtype"])
137
-
138
- return compounds
139
-
140
- def __variables(
141
- self: H5Yaml, fid: h5py.File, compounds: dict[str, str | int | float] | None
142
- ) -> None:
149
+ def __compounds(self: H5Yaml, fid: h5py.File) -> None:
150
+ """Add compound datatypes to HDF5 product.
151
+
152
+ Parameters
153
+ ----------
154
+ fid : h5py.File
155
+ HDF5 file pointer (mode 'r+')
156
+
157
+ """
158
+ for key, val in self._h5_def["compounds"].items():
159
+ fid[key] = np.dtype([(k, v[0]) for k, v in val.items()])
160
+
161
+ def __variables(self: H5Yaml, fid: h5py.File) -> None:
143
162
  """Add datasets to HDF5 product.
144
163
 
145
164
  Parameters
146
165
  ----------
147
166
  fid : h5py.File
148
167
  HDF5 file pointer (mode 'r+')
149
- compounds : dict[str, str | int | float]
150
- Definition of the compound(s) in the product
151
168
 
152
169
  """
153
- for key, val in self.h5_def["variables"].items():
170
+ for key, val in self._h5_def["variables"].items():
154
171
  if val["_dtype"] in fid:
155
172
  ds_dtype = fid[val["_dtype"]]
156
- dtype_size = fid[val["_dtype"]].dtype.itemsize
157
173
  else:
158
174
  ds_dtype = "T" if val["_dtype"] == "str" else val["_dtype"]
159
- dtype_size = np.dtype(val["_dtype"]).itemsize
160
175
 
161
176
  fillvalue = None
162
177
  if "_FillValue" in val:
@@ -190,15 +205,13 @@ class H5Yaml:
190
205
  if n_udim > 1:
191
206
  raise ValueError(f"{key} has more than one unlimited dimension")
192
207
 
193
- # obtain chunk-size settings
194
- ds_chunk = (
195
- val["_chunks"]
196
- if "_chunks" in val
197
- else guess_chunks(ds_shape, dtype_size)
198
- )
208
+ if None in ds_maxshape and val.get("_chunks") == "contiguous":
209
+ raise KeyError(
210
+ "you can not create a contiguous dataset with unlimited dimensions."
211
+ )
199
212
 
200
213
  # create the variable
201
- if ds_chunk == "contiguous":
214
+ if val.get("_chunks") == "contiguous":
202
215
  dset = fid.create_dataset(
203
216
  key,
204
217
  ds_shape,
@@ -208,6 +221,9 @@ class H5Yaml:
208
221
  fillvalue=fillvalue,
209
222
  )
210
223
  else:
224
+ ds_chunk = val.get("_chunks")
225
+ if ds_chunk is not None and not isinstance(ds_chunk, bool):
226
+ ds_chunk = tuple(ds_chunk)
211
227
  compression = None
212
228
  shuffle = False
213
229
  # currently only gzip compression is supported
@@ -225,14 +241,12 @@ class H5Yaml:
225
241
  fid[ds_name] = h5py.vlen_dtype(ds_dtype)
226
242
  ds_dtype = fid[ds_name]
227
243
  fillvalue = None
228
- if ds_maxshape == (None,):
229
- ds_chunk = (16,)
230
244
 
231
245
  dset = fid.create_dataset(
232
246
  key,
233
247
  ds_shape,
234
248
  dtype=ds_dtype,
235
- chunks=ds_chunk if isinstance(ds_chunk, tuple) else tuple(ds_chunk),
249
+ chunks=ds_chunk,
236
250
  maxshape=ds_maxshape,
237
251
  fillvalue=fillvalue,
238
252
  compression=compression,
@@ -246,11 +260,14 @@ class H5Yaml:
246
260
  if not attr.startswith("_"):
247
261
  dset.attrs[attr] = adjust_attr(val["_dtype"], attr, attr_val)
248
262
 
249
- if compounds is not None and val["_dtype"] in compounds:
250
- if compounds[val["_dtype"]]["units"]:
251
- dset.attrs["units"] = compounds[val["_dtype"]]["units"]
252
- if compounds[val["_dtype"]]["names"]:
253
- dset.attrs["long_name"] = compounds[val["_dtype"]]["names"]
263
+ if val["_dtype"] in self._h5_def["compounds"]:
264
+ compound = self._h5_def["compounds"][val["_dtype"]]
265
+ res = [v[2] for k, v in compound.items() if len(v) == 3]
266
+ if res:
267
+ dset.attrs["units"] = [v[1] for k, v in compound.items()]
268
+ dset.attrs["long_name"] = res
269
+ else:
270
+ dset.attrs["long_name"] = [v[1] for k, v in compound.items()]
254
271
 
255
272
  @property
256
273
  def h5_def(self: H5Yaml) -> dict:
@@ -260,10 +277,11 @@ class H5Yaml:
260
277
  def diskless(self: H5Yaml) -> h5py.File:
261
278
  """Create a HDF5/netCDF4 file in memory."""
262
279
  fid = h5py.File.in_memory()
263
- if "groups" in self.h5_def:
264
- self.__groups(fid)
280
+ self.__groups(fid)
265
281
  self.__dimensions(fid)
266
- self.__variables(fid, self.__compounds(fid))
282
+ self.__compounds(fid)
283
+ self.__variables(fid)
284
+ self.__attrs(fid)
267
285
  return fid
268
286
 
269
287
  def create(self: H5Yaml, l1a_name: Path | str) -> None:
@@ -277,9 +295,10 @@ class H5Yaml:
277
295
  """
278
296
  try:
279
297
  with h5py.File(l1a_name, "w") as fid:
280
- if "groups" in self.h5_def:
281
- self.__groups(fid)
298
+ self.__groups(fid)
282
299
  self.__dimensions(fid)
283
- self.__variables(fid, self.__compounds(fid))
300
+ self.__compounds(fid)
301
+ self.__variables(fid)
302
+ self.__attrs(fid)
284
303
  except PermissionError as exc:
285
304
  raise RuntimeError(f"failed create {l1a_name}") from exc
h5yaml/yaml_nc.py CHANGED
@@ -18,15 +18,14 @@
18
18
  # See the License for the specific language governing permissions and
19
19
  # limitations under the License.
20
20
  #
21
- """Create HDF5/netCDF4 formatted file from a YAML configuration file using netCDF4."""
21
+ """Create netCDF4 formatted file from a YAML configuration file using netCDF4."""
22
22
 
23
23
  from __future__ import annotations
24
24
 
25
25
  __all__ = ["NcYaml"]
26
26
 
27
27
  import logging
28
- from pathlib import PurePosixPath
29
- from typing import TYPE_CHECKING
28
+ from pathlib import Path, PurePosixPath
30
29
 
31
30
  import numpy as np
32
31
 
@@ -35,38 +34,91 @@ from netCDF4 import Dataset
35
34
 
36
35
  from .conf_from_yaml import conf_from_yaml
37
36
  from .lib.adjust_attr import adjust_attr
38
- from .lib.chunksizes import guess_chunks
39
-
40
- if TYPE_CHECKING:
41
- from pathlib import Path
42
37
 
43
38
 
44
39
  # - class definition -----------------------------------
45
40
  class NcYaml:
46
- """Class to create a HDF5/netCDF4 formated file from a YAML configuration file."""
41
+ """Class to create a netCDF4 formated file from a YAML configuration file.
42
+
43
+ Parameters
44
+ ----------
45
+ nc_yaml_fl : Path | str | list[Path | str]
46
+ YAML files with the netCDF4 format definition
47
+
48
+ """
47
49
 
48
- def __init__(self: NcYaml, nc_yaml_fl: Path) -> None:
50
+ def __init__(self: NcYaml, nc_yaml_fl: Path | str | list[Path | str]) -> None:
49
51
  """Construct a NcYaml instance."""
50
52
  self.logger = logging.getLogger("h5yaml.NcYaml")
53
+ self._nc_def = {
54
+ "groups": set(),
55
+ "attrs_global": {},
56
+ "attrs_groups": {},
57
+ "compounds": {},
58
+ "dimensions": {},
59
+ "variables": {},
60
+ }
61
+
62
+ for yaml_fl in nc_yaml_fl if isinstance(nc_yaml_fl, list) else [nc_yaml_fl]:
63
+ try:
64
+ config = conf_from_yaml(yaml_fl)
65
+ except RuntimeError as exc:
66
+ raise RuntimeError from exc
67
+
68
+ for key in self._nc_def:
69
+ if key in config:
70
+ self._nc_def[key] |= (
71
+ set(config[key]) if key == "groups" else config[key]
72
+ )
73
+
74
+ def __attrs(self: NcYaml, fid: Dataset) -> None:
75
+ """Create global and group attributes.
51
76
 
52
- try:
53
- self._nc_def = conf_from_yaml(nc_yaml_fl)
54
- except RuntimeError as exc:
55
- raise RuntimeError from exc
77
+ Parameters
78
+ ----------
79
+ fid : netCDF4.Dataset
80
+ netCDF4 Dataset (mode 'r+')
56
81
 
57
- self.yaml_dir = nc_yaml_fl.parent
82
+ """
83
+ for key, val in self.nc_def["attrs_global"].items():
84
+ if val == "TBW":
85
+ continue
86
+
87
+ if isinstance(val, str):
88
+ fid.setncattr_string(key, val)
89
+ else:
90
+ fid.setncattr(key, val)
91
+
92
+ for key, val in self.nc_def["attrs_groups"].items():
93
+ if val == "TBW":
94
+ continue
95
+
96
+ if isinstance(val, str):
97
+ fid[str(Path(key).parent)].setncattr_string(Path(key).name, val)
98
+ else:
99
+ fid[str(Path(key).parent)].setncattr(Path(key).name, val)
58
100
 
59
101
  def __groups(self: NcYaml, fid: Dataset) -> None:
60
- """Create groups in HDF5 product."""
102
+ """Create groups in a netCDF4 product.
103
+
104
+ Parameters
105
+ ----------
106
+ fid : netCDF4.Dataset
107
+ netCDF4 Dataset (mode 'r+')
108
+
109
+ """
61
110
  for key in self.nc_def["groups"]:
62
- pkey = PurePosixPath(key)
63
- if pkey.is_absolute():
64
- _ = fid[pkey.parent].createGroup(pkey.name)
65
- else:
66
- _ = fid.createGroup(key)
111
+ _ = fid.createGroup(key)
67
112
 
68
113
  def __dimensions(self: NcYaml, fid: Dataset) -> None:
69
- """Add dimensions to HDF5 product."""
114
+ """Add dimensions to a netCDF4 product.
115
+
116
+ Parameters
117
+ ----------
118
+ fid : netCDF4.Dataset
119
+ netCDF4 Dataset (mode 'r+')
120
+
121
+ """
70
122
  for key, value in self.nc_def["dimensions"].items():
71
123
  pkey = PurePosixPath(key)
72
124
  if pkey.is_absolute():
@@ -113,56 +165,26 @@ class NcYaml:
113
165
  }
114
166
  )
115
167
 
116
- def __compounds(self: NcYaml, fid: Dataset) -> dict[str, str | int | float]:
117
- """Add compound datatypes to HDF5 product."""
118
- if "compounds" not in self.nc_def:
119
- return {}
120
-
121
- compounds = {}
122
- if isinstance(self.nc_def["compounds"], list):
123
- file_list = self.nc_def["compounds"].copy()
124
- self.nc_def["compounds"] = {}
125
- for name in file_list:
126
- if not (yaml_fl := self.yaml_dir / name).is_file():
127
- continue
128
- try:
129
- res = conf_from_yaml(yaml_fl)
130
- except RuntimeError as exc:
131
- raise RuntimeError from exc
132
- for key, value in res.items():
133
- self.nc_def["compounds"][key] = value
134
-
135
- for key, value in self.nc_def["compounds"].items():
136
- compounds[key] = {
137
- "dtype": [],
138
- "units": [],
139
- "names": [],
140
- }
141
-
142
- for _key, _val in value.items():
143
- compounds[key]["dtype"].append((_key, _val[0]))
144
- if len(_val) == 3:
145
- compounds[key]["units"].append(_val[1])
146
- compounds[key]["names"].append(_val[2] if len(_val) == 3 else _val[1])
147
-
148
- comp_t = np.dtype(compounds[key]["dtype"])
149
- _ = fid.createCompoundType(comp_t, key)
168
+ def __compounds(self: NcYaml, fid: Dataset) -> None:
169
+ """Add compound datatypes to a netCDF4 product.
150
170
 
151
- return compounds
171
+ Parameters
172
+ ----------
173
+ fid : netCDF4.Dataset
174
+ netCDF4 Dataset (mode 'r+')
175
+
176
+ """
177
+ for key, val in self.nc_def["compounds"].items():
178
+ comp_t = np.dtype([(k, v[0]) for k, v in val.items()])
179
+ _ = fid.createCompoundType(comp_t, key)
152
180
 
153
- def __variables(
154
- self: NcYaml,
155
- fid: Dataset,
156
- compounds: dict[str, str | int | float] | None,
157
- ) -> None:
158
- """Add datasets to HDF5 product.
181
+ def __variables(self: NcYaml, fid: Dataset) -> None:
182
+ """Add datasets to a netCDF4 product.
159
183
 
160
184
  Parameters
161
185
  ----------
162
186
  fid : netCDF4.Dataset
163
- HDF5 file pointer (mode 'r+')
164
- compounds : dict[str, str | int | float]
165
- Definition of the compound(s) in the product
187
+ netCDF4 Dataset (mode 'r+')
166
188
 
167
189
  """
168
190
  for key, val in self.nc_def["variables"].items():
@@ -172,10 +194,8 @@ class NcYaml:
172
194
 
173
195
  if val["_dtype"] in fid.cmptypes:
174
196
  ds_dtype = fid.cmptypes[val["_dtype"]].dtype
175
- sz_dtype = ds_dtype.itemsize
176
197
  else:
177
198
  ds_dtype = val["_dtype"]
178
- sz_dtype = np.dtype(val["_dtype"]).itemsize
179
199
 
180
200
  fillvalue = None
181
201
  if "_FillValue" in val:
@@ -226,16 +246,16 @@ class NcYaml:
226
246
  if n_udim > 1:
227
247
  raise ValueError("more than one unlimited dimension")
228
248
 
229
- # obtain chunk-size settings
230
- ds_chunk = (
231
- val["_chunks"] if "_chunks" in val else guess_chunks(ds_shape, sz_dtype)
232
- )
249
+ if None in ds_maxshape and val.get("_chunks") == "contiguous":
250
+ raise KeyError(
251
+ "you can not create a contiguous dataset with unlimited dimensions."
252
+ )
233
253
 
234
254
  if val["_dtype"] in fid.cmptypes:
235
255
  val["_dtype"] = fid.cmptypes[val["_dtype"]]
236
256
 
237
257
  # create the variable
238
- if ds_chunk == "contiguous":
258
+ if val.get("_chunks") == "contiguous":
239
259
  dset = var_grp.createVariable(
240
260
  var_name,
241
261
  val["_dtype"],
@@ -244,25 +264,24 @@ class NcYaml:
244
264
  contiguous=True,
245
265
  )
246
266
  else:
267
+ ds_chunk = val.get("_chunks")
268
+ if ds_chunk is not None and not isinstance(ds_chunk, bool):
269
+ ds_chunk = tuple(ds_chunk)
247
270
  if val.get("_vlen"):
248
271
  if val["_dtype"] in fid.cmptypes:
249
272
  raise ValueError("can not have vlen with compounds")
250
273
  val["_dtype"] = fid.createVLType(ds_dtype, val["_dtype"])
251
274
  fillvalue = None
252
- if ds_maxshape == (None,):
253
- ds_chunk = (16,)
254
275
 
255
276
  dset = var_grp.createVariable(
256
277
  var_name,
257
278
  str if val["_dtype"] == "str" else val["_dtype"],
258
279
  dimensions=var_dims,
259
280
  fill_value=fillvalue,
260
- contiguous=False,
261
281
  compression=compression,
262
282
  complevel=complevel,
263
- chunksizes=(
264
- ds_chunk if isinstance(ds_chunk, tuple) else tuple(ds_chunk)
265
- ),
283
+ chunksizes=ds_chunk,
284
+ contiguous=False,
266
285
  )
267
286
  dset.setncatts(
268
287
  {
@@ -272,38 +291,45 @@ class NcYaml:
272
291
  }
273
292
  )
274
293
 
275
- if compounds is not None and val["_dtype"] in compounds:
276
- if compounds[val["_dtype"]]["units"]:
277
- dset.attrs["units"] = compounds[val["_dtype"]]["units"]
278
- if compounds[val["_dtype"]]["names"]:
279
- dset.attrs["long_name"] = compounds[val["_dtype"]]["names"]
294
+ if val["_dtype"] in self._nc_def["compounds"]:
295
+ compound = self._nc_def["compounds"][val["_dtype"]]
296
+ res = [v[2] for k, v in compound.items() if len(v) == 3]
297
+ if res:
298
+ dset.attrs["units"] = [v[1] for k, v in compound.items()]
299
+ dset.attrs["long_name"] = res
300
+ else:
301
+ dset.attrs["long_name"] = [v[1] for k, v in compound.items()]
280
302
 
281
303
  @property
282
304
  def nc_def(self: NcYaml) -> dict:
283
- """Return definition of the HDF5/netCDF4 product."""
305
+ """Return definition of the netCDF4 product."""
284
306
  return self._nc_def
285
307
 
286
308
  def diskless(self: NcYaml) -> Dataset:
287
- """Create a HDF5/netCDF4 file in memory."""
309
+ """Create a netCDF4 file in memory."""
288
310
  fid = Dataset("diskless_test.nc", "w", diskless=True, persistent=False)
289
311
  self.__groups(fid)
290
312
  self.__dimensions(fid)
291
- self.__variables(fid, self.__compounds(fid))
313
+ self.__compounds(fid)
314
+ self.__variables(fid)
315
+ self.__attrs(fid)
292
316
  return fid
293
317
 
294
318
  def create(self: NcYaml, l1a_name: Path | str) -> None:
295
- """Create a HDF5/netCDF4 file (overwrite if exist).
319
+ """Create a netCDF4 file (overwrite if exist).
296
320
 
297
321
  Parameters
298
322
  ----------
299
323
  l1a_name : Path | str
300
- Full name of the HDF5/netCDF4 file to be generated
324
+ Full name of the netCDF4 file to be generated
301
325
 
302
326
  """
303
327
  try:
304
328
  with Dataset(l1a_name, "w") as fid:
305
329
  self.__groups(fid)
306
330
  self.__dimensions(fid)
307
- self.__variables(fid, self.__compounds(fid))
331
+ self.__compounds(fid)
332
+ self.__variables(fid)
333
+ self.__attrs(fid)
308
334
  except PermissionError as exc:
309
335
  raise RuntimeError(f"failed to create {l1a_name}") from exc
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: h5yaml
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Use YAML configuration file to generate HDF5/netCDF4 formated files.
5
5
  Project-URL: Homepage, https://github.com/rmvanhees/h5_yaml
6
6
  Project-URL: Source, https://github.com/rmvanhees/h5_yaml
@@ -61,23 +61,39 @@ However, package `netCDF4` has some limitations, which `h5py` has not, for examp
61
61
  not allow variable-length variables to have a compound data-type.
62
62
 
63
63
  ## Installation
64
- Releases of the code, starting from version 0.1, will be made available via PyPI.
64
+ The package `h5yaml` is available from PyPI. To install it use `pip`:
65
+
66
+ > $ pip install [--user] h5yaml
67
+
68
+ The module `h5yaml` requires Python3.10+ and Python modules: h5py (v3.14+), netCDF4 (v1.7+) and numpy (v2.0+).
69
+
70
+ **Note**: the packages `h5py` and `netCDF4` come with their own HDF5 libraries. If these are different then they may
71
+ collide and result in a *''HDF5 error''*.
72
+ If this is the case then you have to install the development packages of HDF5 and netCDF4 (or compile them from source).
73
+ And reinstall `h5py` and `netCDF4` using the commands:
74
+
75
+ > $ pip uninstall h5py; pip install --no-binary=h5py h5py
76
+ > $ pip uninstall netCDF4; pip install --no-binary=netCDF4 netCDF4
65
77
 
66
78
  ## Usage
67
79
 
68
80
  The YAML file should be structured as follows:
69
81
 
70
- * The top level are: 'groups', 'dimensions', 'compounds' and 'variables'
82
+ * The top level are: 'groups', 'dimensions', 'compounds', 'variables', 'attrs\_global' and 'attrs\_groups'.
83
+ * > 'attrs\_global' and 'attrs\_groups' are added in version 0.3.0
84
+ * The names of the attributes, groups, dimensions, compounds and viariable should be specified as PosixPaths, however:
85
+ * The names of groups should never start with a slash (always erlative to root);
86
+ * All other elements which are stored in root should also not start with a slash;
87
+ * But these elements require a starting slash (absolute paths) when they are stored not the root.
71
88
  * The section 'groups' are optional, but you should provide each group you want to use
72
89
  in your file. The 'groups' section in the YAML file may look like this:
73
-
74
90
  ```
75
91
  groups:
76
92
  - engineering_data
77
93
  - image_attributes
78
94
  - navigation_data
79
- - processing_control
80
95
  - science_data
96
+ - processing_control/input_data
81
97
  ```
82
98
 
83
99
  * The section 'dimensions' is obligatory, you should define the dimensions for each
@@ -133,14 +149,6 @@ The YAML file should be structured as follows:
133
149
  dark_offs: [f4, '1', dark-offset]
134
150
  ```
135
151
 
136
- Alternatively, provide a list with names of YAML files which contain the definitions
137
- of the compounds.
138
-
139
- ```
140
- compounds:
141
- - h5_nomhk_tm.yaml
142
- - h5_science_hk.yaml
143
- ```
144
152
  * The 'variables' are defined by their data-type ('_dtype') and dimensions ('_dims'),
145
153
  and optionally chunk sizes ('_chunks'), compression ('_compression'), variable length
146
154
  ('_vlen'). In addition, each variable can have as many attributes as you like,
@@ -148,6 +156,16 @@ The YAML file should be structured as follows:
148
156
 
149
157
  ```
150
158
  variables:
159
+ /science_data/detector_images:
160
+ _dtype: u2
161
+ _dims: [number_of_images, samples_per_image]
162
+ _compression: 3
163
+ _FillValue: 65535
164
+ long_name: Detector pixel values
165
+ coverage_content_type: image
166
+ units: '1'
167
+ valid_min: 0
168
+ valid_max: 65534
151
169
  /image_attributes/nr_coadditions:
152
170
  _dtype: u2
153
171
  _dims: [number_of_images]
@@ -163,20 +181,18 @@ The YAML file should be structured as follows:
163
181
  units: seconds
164
182
  stats_163:
165
183
  _dtype: stats_dtype
166
- _vlen: True
167
184
  _dims: [days]
185
+ _vlen: True
168
186
  comment: detector map statistics (MPS=163)
169
187
  ```
170
188
 
171
- ### Notes and ToDo:
189
+ ### Notes and ToDo
172
190
 
173
- * The usage of older versions of h5py may result in broken netCDF4 files
174
- * Explain usage of parameter '_chunks', which is currently not correctly implemented.
175
- * Explain that the usage of variable length data-sets may break netCDF4 compatibility
191
+ * The layout of a HDF5 or netCDF4 file can be complex. From version 0.3.0, you can split the file definition over several YAML files and provide a list with the names of YAML files as input to H5Yaml and NcYaml.
176
192
 
177
193
  ## Support [TBW]
178
194
 
179
- ## Roadmap
195
+ ## Road map
180
196
 
181
197
  * Release v0.1 : stable API to read your YAML files and generate the HDF5/netCDF4 file
182
198
 
@@ -0,0 +1,14 @@
1
+ h5yaml/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
2
+ h5yaml/conf_from_yaml.py,sha256=GVbWR-I0_sKRxrXmgLxbnTJvAXz5OtFtNYu4Pp3LaaI,1607
3
+ h5yaml/yaml_h5.py,sha256=R_WqvK2korrR_nCY7MlmycRcD5Hc4yeJC6B4rvuwulk,10399
4
+ h5yaml/yaml_nc.py,sha256=G4kvn_Ec72ND8phXgNbFWY7EML-pgiMcBVwZroPtrQY,11330
5
+ h5yaml/Data/h5_compound.yaml,sha256=pAVGyhGpbbFgsb1NoTQZsttPLK1zktTZRufFdSaX78U,1172
6
+ h5yaml/Data/h5_testing.yaml,sha256=s-kUjHiXKr4IOVf2vqz8mUr1vcU61wxKwZFuQCUbemA,6246
7
+ h5yaml/Data/h5_unsupported.yaml,sha256=EfFztuUpuXDl_7wgwIqelwE_gdvu35zKT-YtsUfGQeM,1342
8
+ h5yaml/Data/nc_testing.yaml,sha256=C30hXo73GG4BifIr7oymkbJ4Bh8hxKKDDMnFOO5VED8,5806
9
+ h5yaml/lib/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
10
+ h5yaml/lib/adjust_attr.py,sha256=4dHEGwwIa3a3hihyuSX8jCsC08fYcz_9XWA1pBwiwfc,2284
11
+ h5yaml-0.3.0.dist-info/METADATA,sha256=jFRcsa-2kTu8QIFtKmKWaURD3DcQwrfyiXzNvzsQXnU,8446
12
+ h5yaml-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
13
+ h5yaml-0.3.0.dist-info/licenses/LICENSE,sha256=rLarIZOYK5jHuUjMnFbgdI_Tb_4_HAAKSOOIhwiWlE4,11356
14
+ h5yaml-0.3.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
h5yaml/lib/chunksizes.py DELETED
@@ -1,73 +0,0 @@
1
- #
2
- # This file is part of Python package: `h5yaml`
3
- #
4
- # https://github.com/rmvanhees/pyxarr.git
5
- #
6
- # Copyright (c) 2025 - R.M. van Hees (SRON)
7
- # All rights reserved.
8
- #
9
- # Licensed under the Apache License, Version 2.0 (the "License");
10
- # you may not use this file except in compliance with the License.
11
- # You may obtain a copy of the License at
12
- #
13
- # http://www.apache.org/licenses/LICENSE-2.0
14
- #
15
- # Unless required by applicable law or agreed to in writing, software
16
- # distributed under the License is distributed on an "AS IS" BASIS,
17
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
- # See the License for the specific language governing permissions and
19
- # limitations under the License.
20
- #
21
- """Obtain chunksizes for HDF5 datasets."""
22
-
23
- from __future__ import annotations
24
-
25
- __all__ = ["guess_chunks"]
26
-
27
- from typing import TYPE_CHECKING
28
-
29
- if TYPE_CHECKING:
30
- from numpy.typing import ArrayLike
31
-
32
-
33
- def guess_chunks(dims: ArrayLike[int], dtype_sz: int) -> str | tuple[int]:
34
- """Perform an educated guess for the dataset chunk sizes.
35
-
36
- Parameters
37
- ----------
38
- dims : ArrayLike[int]
39
- Dimensions of the variable
40
- dtype_sz : int
41
- The element size of the data-type of the variable
42
-
43
- Returns
44
- -------
45
- "contiguous" or tuple with chunk-sizes
46
-
47
- """
48
- fixed_size = dtype_sz
49
- if len(dims) > 1:
50
- for val in [x for x in dims[1:] if x > 0]:
51
- fixed_size *= val
52
-
53
- # first variables without an unlimited dimension
54
- if 0 not in dims:
55
- if fixed_size < 400000:
56
- return "contiguous"
57
-
58
- res = list(dims)
59
- res[0] = max(1, 2048000 // fixed_size)
60
- return tuple(res)
61
-
62
- # then variables with an unlimited dimension
63
- if len(dims) == 1:
64
- return (1024,)
65
-
66
- udim = dims.index(0)
67
- res = list(dims)
68
- if fixed_size < 400000:
69
- res[udim] = 1024
70
- else:
71
- res[udim] = max(1, 2048000 // fixed_size)
72
-
73
- return tuple(res)
@@ -1,15 +0,0 @@
1
- h5yaml/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
2
- h5yaml/conf_from_yaml.py,sha256=-2ar_gUmc5qvD1KlcctnpPY8G5c4TTXOF2tfrgcT9m4,1560
3
- h5yaml/yaml_h5.py,sha256=jVZAL5Cu6dqgdn25cgVBP3g7yx2wdE-cmCbkaOmQKZ4,10153
4
- h5yaml/yaml_nc.py,sha256=M6g4ZTPMlPmGZjyn3mLFnoQpmynGwgA0HtlUXSGNvvw,10963
5
- h5yaml/Data/h5_compound.yaml,sha256=z3dMCJDRAw14boRp0zT74bz_oFi21yu8coUoKOW-d2Q,1131
6
- h5yaml/Data/h5_testing.yaml,sha256=NhXeXjdblh3bv1hPjCl5DvIhEXo3EpD4mlgaZDElsJc,5626
7
- h5yaml/Data/h5_unsupported.yaml,sha256=v4HYhiTikFt6UoEUJBnmSse_WeHbmBgqF2e1bCJEfLw,1502
8
- h5yaml/Data/nc_testing.yaml,sha256=zuXcYrcuCankndt5e4nRPj2-xed97IA9yvfpn89XQgw,5451
9
- h5yaml/lib/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
10
- h5yaml/lib/adjust_attr.py,sha256=4dHEGwwIa3a3hihyuSX8jCsC08fYcz_9XWA1pBwiwfc,2284
11
- h5yaml/lib/chunksizes.py,sha256=R1kdaKWF0Hol_maZ0tPDoUyWIH5RatP7d2J1yBA8bkk,1949
12
- h5yaml-0.2.0.dist-info/METADATA,sha256=IZxpXl9fI3Z7pa1DoRumjxPZXn8N_ykkJEdaUNnFJlw,7280
13
- h5yaml-0.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- h5yaml-0.2.0.dist-info/licenses/LICENSE,sha256=rLarIZOYK5jHuUjMnFbgdI_Tb_4_HAAKSOOIhwiWlE4,11356
15
- h5yaml-0.2.0.dist-info/RECORD,,