h5yaml 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,6 +25,8 @@ groups:
25
25
  - group_01
26
26
  - group_02
27
27
  - group_03
28
+ - processing_control
29
+ - /processing_control/input_parameters
28
30
 
29
31
  # Define dimensions
30
32
  # Note dimensions with an attribute 'long_name' will also be generated as variable
@@ -91,6 +93,26 @@ variables:
91
93
  units: '1'
92
94
  valid_min: 0
93
95
  valid_max: 65534
96
+ /group_00/detector_images_chunked:
97
+ _dtype: u2
98
+ _dims: [number_of_images, column, row]
99
+ _FillValue: 65535
100
+ _chunks: [1, 640, 512]
101
+ long_name: Detector pixel values
102
+ comment: unbinned full-frame data
103
+ units: '1'
104
+ valid_min: 0
105
+ valid_max: 65534
106
+ /group_00/detector_images_autochunk:
107
+ _dtype: u2
108
+ _dims: [number_of_images, column, row]
109
+ _chunks: True
110
+ _FillValue: 65535
111
+ long_name: Detector pixel values
112
+ comment: unbinned full-frame data
113
+ units: '1'
114
+ valid_min: 0
115
+ valid_max: 65534
94
116
  # ---------- GROUP 01 ----------
95
117
  /group_01/detector_images:
96
118
  _dtype: u2
@@ -199,7 +221,7 @@ variables:
199
221
  valid_max: 999.9
200
222
  /group_03/ds_10:
201
223
  _dtype: f4
202
- _dims: [number_of_images]
224
+ _dims: [number_of_images, /group_03/viewport]
203
225
  long_name: float dataset
204
226
  units: '1'
205
227
  valid_min: -999.9
@@ -25,6 +25,8 @@ groups:
25
25
  - group_01
26
26
  - group_02
27
27
  - group_03
28
+ - processing_control
29
+ - /processing_control/input_parameters
28
30
 
29
31
  # Define dimensions
30
32
  # Note dimensions with an attribute 'long_name' will also be generated as variable
@@ -87,6 +89,16 @@ variables:
87
89
  units: '1'
88
90
  valid_min: 0
89
91
  valid_max: 65534
92
+ /group_00/detector_images_chunked:
93
+ _dtype: u2
94
+ _dims: [number_of_images, column, row]
95
+ _FillValue: 65535
96
+ _chunks: [1, 640, 512]
97
+ long_name: Detector pixel values
98
+ comment: unbinned full-frame data
99
+ units: '1'
100
+ valid_min: 0
101
+ valid_max: 65534
90
102
  # ---------- GROUP 01 ----------
91
103
  /group_01/detector_images:
92
104
  _dtype: u2
@@ -190,7 +202,7 @@ variables:
190
202
  # valid_max: 999.9
191
203
  /group_03/ds_10:
192
204
  _dtype: f4
193
- _dims: [number_of_images]
205
+ _dims: [number_of_images, /group_03/viewport]
194
206
  long_name: float dataset
195
207
  units: '1'
196
208
  valid_min: -999.9
h5yaml/conf_from_yaml.py CHANGED
@@ -45,7 +45,7 @@ def conf_from_yaml(file_path: Path | str) -> dict:
45
45
 
46
46
  """
47
47
  if isinstance(file_path, str):
48
- file_path = Path(str)
48
+ file_path = Path(file_path)
49
49
 
50
50
  if not file_path.is_file():
51
51
  raise FileNotFoundError(f"{file_path} not found")
@@ -53,7 +53,7 @@ def conf_from_yaml(file_path: Path | str) -> dict:
53
53
  with file_path.open("r", encoding="ascii") as fid:
54
54
  try:
55
55
  settings = yaml.safe_load(fid)
56
- except yaml.YAMLError as exc:
57
- raise RuntimeError from exc
56
+ except yaml.parser.ParserError as exc:
57
+ raise RuntimeError(f"Failed to parse {file_path}") from exc
58
58
 
59
59
  return settings
h5yaml/yaml_h5.py CHANGED
@@ -32,7 +32,6 @@ import numpy as np
32
32
 
33
33
  from .conf_from_yaml import conf_from_yaml
34
34
  from .lib.adjust_attr import adjust_attr
35
- from .lib.chunksizes import guess_chunks
36
35
 
37
36
 
38
37
  # - class definition -----------------------------------
@@ -41,12 +40,12 @@ class H5Yaml:
41
40
 
42
41
  Parameters
43
42
  ----------
44
- h5_yaml_fl : Path
45
- YAML files with the HDF5 format definition
43
+ h5_yaml_fl : Path | str
44
+ YAML file with the HDF5 format definition
46
45
 
47
46
  """
48
47
 
49
- def __init__(self: H5Yaml, h5_yaml_fl: Path) -> None:
48
+ def __init__(self: H5Yaml, h5_yaml_fl: Path | str) -> None:
50
49
  """Construct a H5Yaml instance."""
51
50
  self.logger = logging.getLogger("h5yaml.H5Yaml")
52
51
 
@@ -72,12 +71,14 @@ class H5Yaml:
72
71
  )
73
72
 
74
73
  if val["_size"] == 0:
75
- ds_chunk = val.get("_chunks", (50,))
74
+ ds_chunk = val.get("_chunks")
75
+ if ds_chunk is not None and not isinstance(ds_chunk, bool):
76
+ ds_chunk = tuple(ds_chunk)
76
77
  dset = fid.create_dataset(
77
78
  key,
78
79
  shape=(0,),
79
80
  dtype="T" if val["_dtype"] == "str" else val["_dtype"],
80
- chunks=ds_chunk if isinstance(ds_chunk, tuple) else tuple(ds_chunk),
81
+ chunks=ds_chunk,
81
82
  maxshape=(None,),
82
83
  fillvalue=fillvalue,
83
84
  )
@@ -153,10 +154,8 @@ class H5Yaml:
153
154
  for key, val in self.h5_def["variables"].items():
154
155
  if val["_dtype"] in fid:
155
156
  ds_dtype = fid[val["_dtype"]]
156
- dtype_size = fid[val["_dtype"]].dtype.itemsize
157
157
  else:
158
158
  ds_dtype = "T" if val["_dtype"] == "str" else val["_dtype"]
159
- dtype_size = np.dtype(val["_dtype"]).itemsize
160
159
 
161
160
  fillvalue = None
162
161
  if "_FillValue" in val:
@@ -190,15 +189,13 @@ class H5Yaml:
190
189
  if n_udim > 1:
191
190
  raise ValueError(f"{key} has more than one unlimited dimension")
192
191
 
193
- # obtain chunk-size settings
194
- ds_chunk = (
195
- val["_chunks"]
196
- if "_chunks" in val
197
- else guess_chunks(ds_shape, dtype_size)
198
- )
192
+ if None in ds_maxshape and val.get("_chunks") == "contiguous":
193
+ raise KeyError(
194
+ "you can not create a contiguous dataset with unlimited dimensions."
195
+ )
199
196
 
200
197
  # create the variable
201
- if ds_chunk == "contiguous":
198
+ if val.get("_chunks") == "contiguous":
202
199
  dset = fid.create_dataset(
203
200
  key,
204
201
  ds_shape,
@@ -208,6 +205,9 @@ class H5Yaml:
208
205
  fillvalue=fillvalue,
209
206
  )
210
207
  else:
208
+ ds_chunk = val.get("_chunks")
209
+ if ds_chunk is not None and not isinstance(ds_chunk, bool):
210
+ ds_chunk = tuple(ds_chunk)
211
211
  compression = None
212
212
  shuffle = False
213
213
  # currently only gzip compression is supported
@@ -225,14 +225,12 @@ class H5Yaml:
225
225
  fid[ds_name] = h5py.vlen_dtype(ds_dtype)
226
226
  ds_dtype = fid[ds_name]
227
227
  fillvalue = None
228
- if ds_maxshape == (None,):
229
- ds_chunk = (16,)
230
228
 
231
229
  dset = fid.create_dataset(
232
230
  key,
233
231
  ds_shape,
234
232
  dtype=ds_dtype,
235
- chunks=ds_chunk if isinstance(ds_chunk, tuple) else tuple(ds_chunk),
233
+ chunks=ds_chunk,
236
234
  maxshape=ds_maxshape,
237
235
  fillvalue=fillvalue,
238
236
  compression=compression,
h5yaml/yaml_nc.py CHANGED
@@ -35,7 +35,6 @@ from netCDF4 import Dataset
35
35
 
36
36
  from .conf_from_yaml import conf_from_yaml
37
37
  from .lib.adjust_attr import adjust_attr
38
- from .lib.chunksizes import guess_chunks
39
38
 
40
39
  if TYPE_CHECKING:
41
40
  from pathlib import Path
@@ -43,9 +42,16 @@ if TYPE_CHECKING:
43
42
 
44
43
  # - class definition -----------------------------------
45
44
  class NcYaml:
46
- """Class to create a HDF5/netCDF4 formated file from a YAML configuration file."""
45
+ """Class to create a HDF5/netCDF4 formated file from a YAML configuration file.
47
46
 
48
- def __init__(self: NcYaml, nc_yaml_fl: Path) -> None:
47
+ Parameters
48
+ ----------
49
+ nc_yaml_fl : Path | str
50
+ YAML file with the netCDF4 format definition
51
+
52
+ """
53
+
54
+ def __init__(self: NcYaml, nc_yaml_fl: Path | str) -> None:
49
55
  """Construct a NcYaml instance."""
50
56
  self.logger = logging.getLogger("h5yaml.NcYaml")
51
57
 
@@ -172,10 +178,8 @@ class NcYaml:
172
178
 
173
179
  if val["_dtype"] in fid.cmptypes:
174
180
  ds_dtype = fid.cmptypes[val["_dtype"]].dtype
175
- sz_dtype = ds_dtype.itemsize
176
181
  else:
177
182
  ds_dtype = val["_dtype"]
178
- sz_dtype = np.dtype(val["_dtype"]).itemsize
179
183
 
180
184
  fillvalue = None
181
185
  if "_FillValue" in val:
@@ -226,16 +230,16 @@ class NcYaml:
226
230
  if n_udim > 1:
227
231
  raise ValueError("more than one unlimited dimension")
228
232
 
229
- # obtain chunk-size settings
230
- ds_chunk = (
231
- val["_chunks"] if "_chunks" in val else guess_chunks(ds_shape, sz_dtype)
232
- )
233
+ if None in ds_maxshape and val.get("_chunks") == "contiguous":
234
+ raise KeyError(
235
+ "you can not create a contiguous dataset with unlimited dimensions."
236
+ )
233
237
 
234
238
  if val["_dtype"] in fid.cmptypes:
235
239
  val["_dtype"] = fid.cmptypes[val["_dtype"]]
236
240
 
237
241
  # create the variable
238
- if ds_chunk == "contiguous":
242
+ if val.get("_chunks") == "contiguous":
239
243
  dset = var_grp.createVariable(
240
244
  var_name,
241
245
  val["_dtype"],
@@ -244,25 +248,24 @@ class NcYaml:
244
248
  contiguous=True,
245
249
  )
246
250
  else:
251
+ ds_chunk = val.get("_chunks")
252
+ if ds_chunk is not None and not isinstance(ds_chunk, bool):
253
+ ds_chunk = tuple(ds_chunk)
247
254
  if val.get("_vlen"):
248
255
  if val["_dtype"] in fid.cmptypes:
249
256
  raise ValueError("can not have vlen with compounds")
250
257
  val["_dtype"] = fid.createVLType(ds_dtype, val["_dtype"])
251
258
  fillvalue = None
252
- if ds_maxshape == (None,):
253
- ds_chunk = (16,)
254
259
 
255
260
  dset = var_grp.createVariable(
256
261
  var_name,
257
262
  str if val["_dtype"] == "str" else val["_dtype"],
258
263
  dimensions=var_dims,
259
264
  fill_value=fillvalue,
260
- contiguous=False,
261
265
  compression=compression,
262
266
  complevel=complevel,
263
- chunksizes=(
264
- ds_chunk if isinstance(ds_chunk, tuple) else tuple(ds_chunk)
265
- ),
267
+ chunksizes=ds_chunk,
268
+ contiguous=False,
266
269
  )
267
270
  dset.setncatts(
268
271
  {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: h5yaml
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Use YAML configuration file to generate HDF5/netCDF4 formated files.
5
5
  Project-URL: Homepage, https://github.com/rmvanhees/h5_yaml
6
6
  Project-URL: Source, https://github.com/rmvanhees/h5_yaml
@@ -61,7 +61,19 @@ However, package `netCDF4` has some limitations, which `h5py` has not, for examp
61
61
  not allow variable-length variables to have a compound data-type.
62
62
 
63
63
  ## Installation
64
- Releases of the code, starting from version 0.1, will be made available via PyPI.
64
+ The package `h5yaml` is available from PyPI. To install it use `pip`:
65
+
66
+ > $ pip install [--user] h5yaml
67
+
68
+ The module `h5yaml` requires Python3.10+ and Python modules: h5py (v3.14+), netCDF4 (v1.7+) and numpy (v2.0+).
69
+
70
+ **Note**: the packages `h5py` and `netCDF4` come with their own HDF5 libraries. If these are different then they may
71
+ collide and result in a *''HDF5 error''*.
72
+ If this is the case then you have to install the development packages of HDF5 and netCDF4 (or compile them from source).
73
+ And reinstall `h5py` and `netCDF4` using the commands:
74
+
75
+ > $ pip uninstall h5py; pip install --no-binary=h5py h5py
76
+ > $ pip uninstall netCDF4; pip install --no-binary=netCDF4 netCDF4
65
77
 
66
78
  ## Usage
67
79
 
@@ -176,7 +188,7 @@ The YAML file should be structured as follows:
176
188
 
177
189
  ## Support [TBW]
178
190
 
179
- ## Roadmap
191
+ ## Road map
180
192
 
181
193
  * Release v0.1 : stable API to read your YAML files and generate the HDF5/netCDF4 file
182
194
 
@@ -0,0 +1,14 @@
1
+ h5yaml/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
2
+ h5yaml/conf_from_yaml.py,sha256=GVbWR-I0_sKRxrXmgLxbnTJvAXz5OtFtNYu4Pp3LaaI,1607
3
+ h5yaml/yaml_h5.py,sha256=K9KkdHNDadrrwlR5EJWuzf1yFZcycJLm3EaiM4nLHkw,10112
4
+ h5yaml/yaml_nc.py,sha256=0xJ0dYnIXEhUVZGEkv9GZZu4Xo7K4zXPGiG49j8RHKs,10977
5
+ h5yaml/Data/h5_compound.yaml,sha256=z3dMCJDRAw14boRp0zT74bz_oFi21yu8coUoKOW-d2Q,1131
6
+ h5yaml/Data/h5_testing.yaml,sha256=BSrjenq_L5g8GGoldtU_NbWzLSymwAcvFOh26jtoynM,6247
7
+ h5yaml/Data/h5_unsupported.yaml,sha256=v4HYhiTikFt6UoEUJBnmSse_WeHbmBgqF2e1bCJEfLw,1502
8
+ h5yaml/Data/nc_testing.yaml,sha256=zKDRkYpWVM3_vDpsu-ZxHAGDlITICb_nwjDKsFzPgcQ,5807
9
+ h5yaml/lib/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
10
+ h5yaml/lib/adjust_attr.py,sha256=4dHEGwwIa3a3hihyuSX8jCsC08fYcz_9XWA1pBwiwfc,2284
11
+ h5yaml-0.2.1.dist-info/METADATA,sha256=Z-be-4WMz4o1RY4WiY3o-pnfEq72cdvTkDnmTQUnaJY,7876
12
+ h5yaml-0.2.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
13
+ h5yaml-0.2.1.dist-info/licenses/LICENSE,sha256=rLarIZOYK5jHuUjMnFbgdI_Tb_4_HAAKSOOIhwiWlE4,11356
14
+ h5yaml-0.2.1.dist-info/RECORD,,
h5yaml/lib/chunksizes.py DELETED
@@ -1,73 +0,0 @@
1
- #
2
- # This file is part of Python package: `h5yaml`
3
- #
4
- # https://github.com/rmvanhees/pyxarr.git
5
- #
6
- # Copyright (c) 2025 - R.M. van Hees (SRON)
7
- # All rights reserved.
8
- #
9
- # Licensed under the Apache License, Version 2.0 (the "License");
10
- # you may not use this file except in compliance with the License.
11
- # You may obtain a copy of the License at
12
- #
13
- # http://www.apache.org/licenses/LICENSE-2.0
14
- #
15
- # Unless required by applicable law or agreed to in writing, software
16
- # distributed under the License is distributed on an "AS IS" BASIS,
17
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
- # See the License for the specific language governing permissions and
19
- # limitations under the License.
20
- #
21
- """Obtain chunksizes for HDF5 datasets."""
22
-
23
- from __future__ import annotations
24
-
25
- __all__ = ["guess_chunks"]
26
-
27
- from typing import TYPE_CHECKING
28
-
29
- if TYPE_CHECKING:
30
- from numpy.typing import ArrayLike
31
-
32
-
33
- def guess_chunks(dims: ArrayLike[int], dtype_sz: int) -> str | tuple[int]:
34
- """Perform an educated guess for the dataset chunk sizes.
35
-
36
- Parameters
37
- ----------
38
- dims : ArrayLike[int]
39
- Dimensions of the variable
40
- dtype_sz : int
41
- The element size of the data-type of the variable
42
-
43
- Returns
44
- -------
45
- "contiguous" or tuple with chunk-sizes
46
-
47
- """
48
- fixed_size = dtype_sz
49
- if len(dims) > 1:
50
- for val in [x for x in dims[1:] if x > 0]:
51
- fixed_size *= val
52
-
53
- # first variables without an unlimited dimension
54
- if 0 not in dims:
55
- if fixed_size < 400000:
56
- return "contiguous"
57
-
58
- res = list(dims)
59
- res[0] = max(1, 2048000 // fixed_size)
60
- return tuple(res)
61
-
62
- # then variables with an unlimited dimension
63
- if len(dims) == 1:
64
- return (1024,)
65
-
66
- udim = dims.index(0)
67
- res = list(dims)
68
- if fixed_size < 400000:
69
- res[udim] = 1024
70
- else:
71
- res[udim] = max(1, 2048000 // fixed_size)
72
-
73
- return tuple(res)
@@ -1,15 +0,0 @@
1
- h5yaml/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
2
- h5yaml/conf_from_yaml.py,sha256=-2ar_gUmc5qvD1KlcctnpPY8G5c4TTXOF2tfrgcT9m4,1560
3
- h5yaml/yaml_h5.py,sha256=jVZAL5Cu6dqgdn25cgVBP3g7yx2wdE-cmCbkaOmQKZ4,10153
4
- h5yaml/yaml_nc.py,sha256=M6g4ZTPMlPmGZjyn3mLFnoQpmynGwgA0HtlUXSGNvvw,10963
5
- h5yaml/Data/h5_compound.yaml,sha256=z3dMCJDRAw14boRp0zT74bz_oFi21yu8coUoKOW-d2Q,1131
6
- h5yaml/Data/h5_testing.yaml,sha256=NhXeXjdblh3bv1hPjCl5DvIhEXo3EpD4mlgaZDElsJc,5626
7
- h5yaml/Data/h5_unsupported.yaml,sha256=v4HYhiTikFt6UoEUJBnmSse_WeHbmBgqF2e1bCJEfLw,1502
8
- h5yaml/Data/nc_testing.yaml,sha256=zuXcYrcuCankndt5e4nRPj2-xed97IA9yvfpn89XQgw,5451
9
- h5yaml/lib/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
10
- h5yaml/lib/adjust_attr.py,sha256=4dHEGwwIa3a3hihyuSX8jCsC08fYcz_9XWA1pBwiwfc,2284
11
- h5yaml/lib/chunksizes.py,sha256=R1kdaKWF0Hol_maZ0tPDoUyWIH5RatP7d2J1yBA8bkk,1949
12
- h5yaml-0.2.0.dist-info/METADATA,sha256=IZxpXl9fI3Z7pa1DoRumjxPZXn8N_ykkJEdaUNnFJlw,7280
13
- h5yaml-0.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- h5yaml-0.2.0.dist-info/licenses/LICENSE,sha256=rLarIZOYK5jHuUjMnFbgdI_Tb_4_HAAKSOOIhwiWlE4,11356
15
- h5yaml-0.2.0.dist-info/RECORD,,
File without changes