h5yaml 0.0.3__tar.gz → 0.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {h5yaml-0.0.3 → h5yaml-0.0.5}/PKG-INFO +34 -14
- {h5yaml-0.0.3 → h5yaml-0.0.5}/README.md +29 -12
- {h5yaml-0.0.3 → h5yaml-0.0.5}/pyproject.toml +37 -35
- h5yaml-0.0.5/src/h5yaml/Data/h5_testing.yaml +103 -0
- h5yaml-0.0.3/src/h5yaml/Data/h5_testing.yaml → h5yaml-0.0.5/src/h5yaml/Data/nc_testing.yaml +0 -5
- {h5yaml-0.0.3 → h5yaml-0.0.5}/src/h5yaml/lib/chunksizes.py +17 -9
- h5yaml-0.0.3/src/h5yaml/yaml_h5py.py → h5yaml-0.0.5/src/h5yaml/yaml_h5.py +102 -25
- {h5yaml-0.0.3 → h5yaml-0.0.5}/src/h5yaml/yaml_nc.py +47 -20
- {h5yaml-0.0.3 → h5yaml-0.0.5}/.gitignore +0 -0
- {h5yaml-0.0.3 → h5yaml-0.0.5}/LICENSE +0 -0
- {h5yaml-0.0.3 → h5yaml-0.0.5}/MANIFEST.in +0 -0
- {h5yaml-0.0.3 → h5yaml-0.0.5}/src/h5yaml/conf_from_yaml.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: h5yaml
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.5
|
|
4
4
|
Summary: Use YAML configuration file to generate HDF5/netCDF4 formated files.
|
|
5
5
|
Project-URL: Homepage, https://github.com/rmvanhees/h5_yaml
|
|
6
6
|
Project-URL: Source, https://github.com/rmvanhees/h5_yaml
|
|
@@ -14,28 +14,51 @@ Classifier: Intended Audience :: Developers
|
|
|
14
14
|
Classifier: Intended Audience :: Science/Research
|
|
15
15
|
Classifier: Operating System :: OS Independent
|
|
16
16
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
20
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
21
|
Classifier: Programming Language :: Python :: 3.13
|
|
19
22
|
Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
|
|
20
|
-
Requires-Python: >=3.
|
|
23
|
+
Requires-Python: >=3.9
|
|
21
24
|
Requires-Dist: h5py>=3.13
|
|
22
25
|
Requires-Dist: netcdf4>=1.7
|
|
23
26
|
Requires-Dist: numpy>=2.2
|
|
24
27
|
Requires-Dist: pyyaml>=6.0
|
|
25
28
|
Description-Content-Type: text/markdown
|
|
26
29
|
|
|
27
|
-
#
|
|
30
|
+
# H5YAML
|
|
31
|
+
[](https://github.com/rmvanhees/h5yaml/)
|
|
32
|
+
[](https://github.com/rmvanhees/h5yaml/LICENSE)
|
|
33
|
+
[](https://pypi.org/project/h5yaml/)
|
|
34
|
+
[](https://pypi.org/project/h5yaml/)
|
|
28
35
|
|
|
29
36
|
## Description
|
|
30
|
-
|
|
37
|
+
This package let you generate [HDF5](https://docs.h5py.org/en/stable/)/[netCDF4](https://unidata.github.io/netcdf4-python/)
|
|
38
|
+
formatted files as defined in a [YAML](https://yaml.org/) configuration file. This has several advantages:
|
|
31
39
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
40
|
+
* you define the layout of your HDF5/netCDF4 file using YAML which is human-readable and has intuitive syntax.
|
|
41
|
+
* you can reuse the YAML configuration file to to have all your product have a consistent layout.
|
|
42
|
+
* you can make updates by only changing the YAML configuration file
|
|
43
|
+
* you can have the layout of your HDF5/netCDF4 file as a python dictionary, thus without accessing any HDF5/netCDF4 file
|
|
44
|
+
|
|
45
|
+
The `H5YAML` package has two classes to generate a HDF5/netCDF4 formatted file.
|
|
46
|
+
|
|
47
|
+
1. The class `H5Yaml` uses the [h5py](https://pypi.org/project/h5py/) package, which is a Pythonic interface to
|
|
48
|
+
the HDF5 binary data format.
|
|
49
|
+
Let 'h5_def.yaml' be your YAML configuration file then ```H5Yaml("h5_def.yaml").create("foo.h5")``` will create
|
|
50
|
+
the HDF5 file 'foo.h5'. This can be read by netCDF4 software, because it uses dimension-scales to each dataset.
|
|
51
|
+
2. The class `NcYaml` uses the [netCDF4](https://pypi.org/project/netCDF4/) package, which provides an object-oriented
|
|
52
|
+
python interface to the netCDF version 4 library.
|
|
53
|
+
Let 'nc_def.yaml' be your YAML configuration file then ```NcYaml("nc_def.yaml").create("foo.nc")``` will create
|
|
54
|
+
the netCDF4/HDF5 file 'foo.nc'
|
|
55
|
+
|
|
56
|
+
The class `NcYaml` must be used when strict conformance to the netCDF4 format is required.
|
|
57
|
+
However, package `netCDF4` has some limitations, which `h5py` has not, for example it does
|
|
58
|
+
not allow variable-length variables to have a compound data-type.
|
|
36
59
|
|
|
37
60
|
## Installation
|
|
38
|
-
|
|
61
|
+
Releases of the code, starting from version 0.1, will be made available via PyPI.
|
|
39
62
|
|
|
40
63
|
## Usage
|
|
41
64
|
|
|
@@ -54,7 +77,7 @@ The YAML file should be structured as follows:
|
|
|
54
77
|
- science_data
|
|
55
78
|
```
|
|
56
79
|
|
|
57
|
-
* The section 'dimensions' is obligatory, you
|
|
80
|
+
* The section 'dimensions' is obligatory, you should define the dimensions for each
|
|
58
81
|
variable in your file. The 'dimensions' section may look like this:
|
|
59
82
|
|
|
60
83
|
```
|
|
@@ -144,7 +167,7 @@ The YAML file should be structured as follows:
|
|
|
144
167
|
### Notes and ToDo:
|
|
145
168
|
|
|
146
169
|
* The usage of older versions of h5py may result in broken netCDF4 files
|
|
147
|
-
* Explain usage of parameter '_chunks', which is currently not
|
|
170
|
+
* Explain usage of parameter '_chunks', which is currently not correctly implemented.
|
|
148
171
|
* Explain that the usage of variable length data-sets may break netCDF4 compatibility
|
|
149
172
|
|
|
150
173
|
## Support [TBW]
|
|
@@ -161,6 +184,3 @@ The code is developed by R.M. van Hees (SRON)
|
|
|
161
184
|
|
|
162
185
|
* Copyright: SRON (https://www.sron.nl).
|
|
163
186
|
* License: BSD-3-clause
|
|
164
|
-
|
|
165
|
-
## Project status
|
|
166
|
-
Beta
|
|
@@ -1,15 +1,35 @@
|
|
|
1
|
-
#
|
|
1
|
+
# H5YAML
|
|
2
|
+
[](https://github.com/rmvanhees/h5yaml/)
|
|
3
|
+
[](https://github.com/rmvanhees/h5yaml/LICENSE)
|
|
4
|
+
[](https://pypi.org/project/h5yaml/)
|
|
5
|
+
[](https://pypi.org/project/h5yaml/)
|
|
2
6
|
|
|
3
7
|
## Description
|
|
4
|
-
|
|
8
|
+
This package let you generate [HDF5](https://docs.h5py.org/en/stable/)/[netCDF4](https://unidata.github.io/netcdf4-python/)
|
|
9
|
+
formatted files as defined in a [YAML](https://yaml.org/) configuration file. This has several advantages:
|
|
5
10
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
11
|
+
* you define the layout of your HDF5/netCDF4 file using YAML which is human-readable and has intuitive syntax.
|
|
12
|
+
* you can reuse the YAML configuration file to to have all your product have a consistent layout.
|
|
13
|
+
* you can make updates by only changing the YAML configuration file
|
|
14
|
+
* you can have the layout of your HDF5/netCDF4 file as a python dictionary, thus without accessing any HDF5/netCDF4 file
|
|
15
|
+
|
|
16
|
+
The `H5YAML` package has two classes to generate a HDF5/netCDF4 formatted file.
|
|
17
|
+
|
|
18
|
+
1. The class `H5Yaml` uses the [h5py](https://pypi.org/project/h5py/) package, which is a Pythonic interface to
|
|
19
|
+
the HDF5 binary data format.
|
|
20
|
+
Let 'h5_def.yaml' be your YAML configuration file then ```H5Yaml("h5_def.yaml").create("foo.h5")``` will create
|
|
21
|
+
the HDF5 file 'foo.h5'. This can be read by netCDF4 software, because it uses dimension-scales to each dataset.
|
|
22
|
+
2. The class `NcYaml` uses the [netCDF4](https://pypi.org/project/netCDF4/) package, which provides an object-oriented
|
|
23
|
+
python interface to the netCDF version 4 library.
|
|
24
|
+
Let 'nc_def.yaml' be your YAML configuration file then ```NcYaml("nc_def.yaml").create("foo.nc")``` will create
|
|
25
|
+
the netCDF4/HDF5 file 'foo.nc'
|
|
26
|
+
|
|
27
|
+
The class `NcYaml` must be used when strict conformance to the netCDF4 format is required.
|
|
28
|
+
However, package `netCDF4` has some limitations, which `h5py` has not, for example it does
|
|
29
|
+
not allow variable-length variables to have a compound data-type.
|
|
10
30
|
|
|
11
31
|
## Installation
|
|
12
|
-
|
|
32
|
+
Releases of the code, starting from version 0.1, will be made available via PyPI.
|
|
13
33
|
|
|
14
34
|
## Usage
|
|
15
35
|
|
|
@@ -28,7 +48,7 @@ The YAML file should be structured as follows:
|
|
|
28
48
|
- science_data
|
|
29
49
|
```
|
|
30
50
|
|
|
31
|
-
* The section 'dimensions' is obligatory, you
|
|
51
|
+
* The section 'dimensions' is obligatory, you should define the dimensions for each
|
|
32
52
|
variable in your file. The 'dimensions' section may look like this:
|
|
33
53
|
|
|
34
54
|
```
|
|
@@ -118,7 +138,7 @@ The YAML file should be structured as follows:
|
|
|
118
138
|
### Notes and ToDo:
|
|
119
139
|
|
|
120
140
|
* The usage of older versions of h5py may result in broken netCDF4 files
|
|
121
|
-
* Explain usage of parameter '_chunks', which is currently not
|
|
141
|
+
* Explain usage of parameter '_chunks', which is currently not correctly implemented.
|
|
122
142
|
* Explain that the usage of variable length data-sets may break netCDF4 compatibility
|
|
123
143
|
|
|
124
144
|
## Support [TBW]
|
|
@@ -135,6 +155,3 @@ The code is developed by R.M. van Hees (SRON)
|
|
|
135
155
|
|
|
136
156
|
* Copyright: SRON (https://www.sron.nl).
|
|
137
157
|
* License: BSD-3-clause
|
|
138
|
-
|
|
139
|
-
## Project status
|
|
140
|
-
Beta
|
|
@@ -14,33 +14,35 @@ license = "BSD-3-Clause"
|
|
|
14
14
|
authors = [
|
|
15
15
|
{name = "Richard van Hees", email = "r.m.van.hees@sron.nl"}
|
|
16
16
|
]
|
|
17
|
-
requires-python = ">=3.
|
|
17
|
+
requires-python = ">=3.9"
|
|
18
18
|
classifiers = [
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
19
|
+
"Development Status :: 4 - Beta",
|
|
20
|
+
"Intended Audience :: Developers",
|
|
21
|
+
"Intended Audience :: Science/Research",
|
|
22
|
+
"Operating System :: OS Independent",
|
|
23
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
24
|
+
"Programming Language :: Python :: 3.9",
|
|
25
|
+
"Programming Language :: Python :: 3.10",
|
|
26
|
+
"Programming Language :: Python :: 3.11",
|
|
27
|
+
"Programming Language :: Python :: 3.12",
|
|
28
|
+
"Programming Language :: Python :: 3.13",
|
|
29
|
+
"Topic :: Scientific/Engineering :: Atmospheric Science",
|
|
27
30
|
]
|
|
28
31
|
keywords = [
|
|
29
|
-
|
|
32
|
+
"HDF5", "netCDF4", "YAML"
|
|
30
33
|
]
|
|
31
34
|
dynamic = [
|
|
32
|
-
|
|
35
|
+
"version",
|
|
33
36
|
]
|
|
34
37
|
dependencies = [
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
"h5py>=3.13",
|
|
39
|
+
"netCDF4>=1.7",
|
|
40
|
+
"numpy>=2.2",
|
|
41
|
+
"pyYAML>=6.0",
|
|
39
42
|
]
|
|
40
43
|
|
|
41
44
|
[project.scripts]
|
|
42
45
|
|
|
43
|
-
|
|
44
46
|
[project.urls]
|
|
45
47
|
Homepage = "https://github.com/rmvanhees/h5_yaml"
|
|
46
48
|
Source = "https://github.com/rmvanhees/h5_yaml"
|
|
@@ -72,25 +74,25 @@ target-version = "py312"
|
|
|
72
74
|
|
|
73
75
|
[tool.ruff.lint]
|
|
74
76
|
select = [
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
77
|
+
"D", # pydocstyle
|
|
78
|
+
"E", # pycodestyle
|
|
79
|
+
"F", # pyflakes
|
|
80
|
+
"I", # isort
|
|
81
|
+
"N", # pep8-naming
|
|
82
|
+
"W", # pycodestyle
|
|
83
|
+
"ANN", # flake8-annotations
|
|
84
|
+
"B", # flake8-bugbear
|
|
85
|
+
"ISC", # flake8-implicit-str-concat
|
|
86
|
+
"PGH", # flake8-pie
|
|
87
|
+
"PYI", # flake8-pyi
|
|
88
|
+
"Q", # flake8-quotes
|
|
89
|
+
"SIM", # flake8-simplify
|
|
90
|
+
"TID", # flake8-tidy-imports
|
|
91
|
+
"TCH", # flake8-type-checking
|
|
92
|
+
"NPY", # NumPy-specific
|
|
93
|
+
"PERF", # Perflint
|
|
94
|
+
"RUF", # Ruff Specific
|
|
95
|
+
"UP", # pyupgrade
|
|
94
96
|
]
|
|
95
97
|
ignore = ["D203", "D213"]
|
|
96
98
|
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# YAML
|
|
2
|
+
#
|
|
3
|
+
# Configuration file to test the implementation of classes H5Yaml and NcYaml
|
|
4
|
+
#
|
|
5
|
+
# This file is part of h5_yaml:
|
|
6
|
+
# https://github.com/rmvanhees/h5_yaml.git
|
|
7
|
+
#
|
|
8
|
+
# Copyright (c) 2025 SRON
|
|
9
|
+
# All Rights Reserved
|
|
10
|
+
#
|
|
11
|
+
# License: BSD-3-Clause
|
|
12
|
+
#
|
|
13
|
+
# Define groups
|
|
14
|
+
groups:
|
|
15
|
+
- group_00
|
|
16
|
+
- group_01
|
|
17
|
+
- group_02
|
|
18
|
+
|
|
19
|
+
# Define dimensions
|
|
20
|
+
# Note dimensions with an attribute 'long_name' will also be generated as variable
|
|
21
|
+
dimensions:
|
|
22
|
+
number_of_images:
|
|
23
|
+
_dtype: u2
|
|
24
|
+
_size: 0
|
|
25
|
+
samples_per_image:
|
|
26
|
+
_dtype: u4
|
|
27
|
+
_size: 203500
|
|
28
|
+
column:
|
|
29
|
+
_dtype: u2
|
|
30
|
+
_size: 640
|
|
31
|
+
row:
|
|
32
|
+
_dtype: u2
|
|
33
|
+
_size: 512
|
|
34
|
+
time:
|
|
35
|
+
_dtype: f8
|
|
36
|
+
_size: 0
|
|
37
|
+
_FillValue: -32767
|
|
38
|
+
long_name: Attitude sample time (seconds of day)
|
|
39
|
+
calendar: proleptic_gregorian
|
|
40
|
+
units: seconds since %Y-%m-%d %H:%M:%S
|
|
41
|
+
valid_min: 0
|
|
42
|
+
valid_max: 92400
|
|
43
|
+
|
|
44
|
+
# Define compound types
|
|
45
|
+
# - compound elements must have a data-type, and can have a unit and long_name
|
|
46
|
+
compounds:
|
|
47
|
+
stats_dtype:
|
|
48
|
+
time: [u8, seconds since 1970-01-01T00:00:00, timestamp]
|
|
49
|
+
index: [u2, '1', index]
|
|
50
|
+
tbl_id: [u1, '1', binning id]
|
|
51
|
+
saa: [u1, '1', saa-flag]
|
|
52
|
+
coad: [u1, '1', co-addings]
|
|
53
|
+
texp: [f4, ms, exposure time]
|
|
54
|
+
lat: [f4, degree, latitude]
|
|
55
|
+
lon: [f4, degree, longitude]
|
|
56
|
+
avg: [f4, '1', '$S - S_{ref}$']
|
|
57
|
+
unc: [f4, '1', '\u03c3($S - S_{ref}$)']
|
|
58
|
+
dark_offs: [f4, '1', dark-offset]
|
|
59
|
+
|
|
60
|
+
geo_dtype:
|
|
61
|
+
lat: [f4, latitude]
|
|
62
|
+
lon: [f4, longitude]
|
|
63
|
+
|
|
64
|
+
# Define variables
|
|
65
|
+
variables:
|
|
66
|
+
/group_00/detector_images:
|
|
67
|
+
_dtype: u2
|
|
68
|
+
_dims: [number_of_images, column, row]
|
|
69
|
+
_FillValue: 65535
|
|
70
|
+
long_name: Detector pixel values
|
|
71
|
+
comment: unbinned full-frame data
|
|
72
|
+
units: '1'
|
|
73
|
+
valid_min: 0
|
|
74
|
+
valid_max: 65534
|
|
75
|
+
/group_01/detector_images:
|
|
76
|
+
_dtype: u2
|
|
77
|
+
_dims: [number_of_images, samples_per_image]
|
|
78
|
+
_FillValue: 65535
|
|
79
|
+
_compression: 1
|
|
80
|
+
long_name: Detector pixel values
|
|
81
|
+
comment: variable binned data (filled to the largest samples_per_image)
|
|
82
|
+
units: '1'
|
|
83
|
+
valid_min: 0
|
|
84
|
+
valid_max: 65534
|
|
85
|
+
/group_01/stats:
|
|
86
|
+
_dtype: stats_dtype
|
|
87
|
+
_dims: [time]
|
|
88
|
+
comment: detector map statistics
|
|
89
|
+
/group_02/detector_images:
|
|
90
|
+
_dtype: u2
|
|
91
|
+
_dims: [number_of_images]
|
|
92
|
+
_vlen: True
|
|
93
|
+
_FillValue: 65535
|
|
94
|
+
long_name: Detector pixel values
|
|
95
|
+
comment: variable binned (vlen) data
|
|
96
|
+
units: '1'
|
|
97
|
+
valid_min: 0
|
|
98
|
+
valid_max: 65534
|
|
99
|
+
/group_02/stats:
|
|
100
|
+
_dtype: stats_dtype
|
|
101
|
+
_vlen: True
|
|
102
|
+
_dims: [time]
|
|
103
|
+
comment: detector map statistics (vlen)
|
|
@@ -57,10 +57,6 @@ compounds:
|
|
|
57
57
|
unc: [f4, '1', '\u03c3($S - S_{ref}$)']
|
|
58
58
|
dark_offs: [f4, '1', dark-offset]
|
|
59
59
|
|
|
60
|
-
geo_dtype:
|
|
61
|
-
lat: [f4, latitude]
|
|
62
|
-
lon: [f4, longitude]
|
|
63
|
-
|
|
64
60
|
# Define variables
|
|
65
61
|
variables:
|
|
66
62
|
/group_00/detector_images:
|
|
@@ -84,7 +80,6 @@ variables:
|
|
|
84
80
|
valid_max: 65534
|
|
85
81
|
/group_01/stats:
|
|
86
82
|
_dtype: stats_dtype
|
|
87
|
-
_vlen: True
|
|
88
83
|
_dims: [time]
|
|
89
84
|
comment: detector map statistics
|
|
90
85
|
/group_02/detector_images:
|
|
@@ -35,20 +35,28 @@ def guess_chunks(dims: ArrayLike[int], dtype_sz: int) -> str | tuple[int]:
|
|
|
35
35
|
|
|
36
36
|
"""
|
|
37
37
|
fixed_size = dtype_sz
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
if fixed_size < 65536:
|
|
38
|
+
if len(dims) > 1:
|
|
39
|
+
for val in [x for x in dims[1:] if x > 0]:
|
|
40
|
+
fixed_size *= val
|
|
41
|
+
|
|
42
|
+
# first variables without an unlimited dimension
|
|
43
|
+
if 0 not in dims:
|
|
44
|
+
if fixed_size < 400000:
|
|
46
45
|
return "contiguous"
|
|
47
46
|
|
|
47
|
+
res = list(dims)
|
|
48
|
+
res[0] = max(1, 2048000 // fixed_size)
|
|
49
|
+
return tuple(res)
|
|
50
|
+
|
|
51
|
+
# then variables with an unlimited dimension
|
|
48
52
|
if len(dims) == 1:
|
|
49
53
|
return (1024,)
|
|
50
54
|
|
|
55
|
+
udim = dims.index(0)
|
|
51
56
|
res = list(dims)
|
|
52
|
-
|
|
57
|
+
if fixed_size < 400000:
|
|
58
|
+
res[udim] = 1024
|
|
59
|
+
else:
|
|
60
|
+
res[udim] = max(1, 2048000 // fixed_size)
|
|
53
61
|
|
|
54
62
|
return tuple(res)
|
|
@@ -23,6 +23,8 @@ import numpy as np
|
|
|
23
23
|
from h5yaml.conf_from_yaml import conf_from_yaml
|
|
24
24
|
from h5yaml.lib.chunksizes import guess_chunks
|
|
25
25
|
|
|
26
|
+
# - helper function ------------------------------------
|
|
27
|
+
|
|
26
28
|
|
|
27
29
|
# - class definition -----------------------------------
|
|
28
30
|
class H5Yaml:
|
|
@@ -53,22 +55,20 @@ class H5Yaml:
|
|
|
53
55
|
|
|
54
56
|
def __dimensions(self: H5Yaml, fid: h5py.File) -> None:
|
|
55
57
|
"""Add dimensions to HDF5 product."""
|
|
56
|
-
for key,
|
|
58
|
+
for key, val in self.h5_def["dimensions"].items():
|
|
57
59
|
fillvalue = None
|
|
58
|
-
if "_FillValue" in
|
|
60
|
+
if "_FillValue" in val:
|
|
59
61
|
fillvalue = (
|
|
60
|
-
np.nan if
|
|
62
|
+
np.nan if val["_FillValue"] == "NaN" else int(val["_FillValue"])
|
|
61
63
|
)
|
|
62
64
|
|
|
63
|
-
if
|
|
64
|
-
ds_chunk =
|
|
65
|
+
if val["_size"] == 0:
|
|
66
|
+
ds_chunk = val.get("_chunks", (50,))
|
|
65
67
|
dset = fid.create_dataset(
|
|
66
68
|
key,
|
|
67
69
|
shape=(0,),
|
|
68
70
|
dtype=(
|
|
69
|
-
h5py.string_dtype()
|
|
70
|
-
if value["_dtype"] == "str"
|
|
71
|
-
else value["_dtype"]
|
|
71
|
+
h5py.string_dtype() if val["_dtype"] == "str" else val["_dtype"]
|
|
72
72
|
),
|
|
73
73
|
chunks=ds_chunk if isinstance(ds_chunk, tuple) else tuple(ds_chunk),
|
|
74
74
|
maxshape=(None,),
|
|
@@ -77,21 +77,48 @@ class H5Yaml:
|
|
|
77
77
|
else:
|
|
78
78
|
dset = fid.create_dataset(
|
|
79
79
|
key,
|
|
80
|
-
shape=(
|
|
81
|
-
dtype=
|
|
80
|
+
shape=(val["_size"],),
|
|
81
|
+
dtype=val["_dtype"],
|
|
82
82
|
)
|
|
83
|
-
if "_values" in
|
|
84
|
-
dset[:] =
|
|
83
|
+
if "_values" in val:
|
|
84
|
+
dset[:] = val["_values"]
|
|
85
85
|
|
|
86
86
|
dset.make_scale(
|
|
87
87
|
Path(key).name
|
|
88
|
-
if "long_name" in
|
|
88
|
+
if "long_name" in val
|
|
89
89
|
else "This is a netCDF dimension but not a netCDF variable."
|
|
90
90
|
)
|
|
91
|
-
for attr, attr_val in
|
|
91
|
+
for attr, attr_val in val.items():
|
|
92
92
|
if attr.startswith("_"):
|
|
93
93
|
continue
|
|
94
|
-
|
|
94
|
+
if attr in ("valid_min", "valid_max"):
|
|
95
|
+
match val["_dtype"]:
|
|
96
|
+
case "i1":
|
|
97
|
+
dset.attrs[attr] = np.int8(attr_val)
|
|
98
|
+
case "i2":
|
|
99
|
+
dset.attrs[attr] = np.int16(attr_val)
|
|
100
|
+
case "i4":
|
|
101
|
+
dset.attrs[attr] = np.int32(attr_val)
|
|
102
|
+
case "i8":
|
|
103
|
+
dset.attrs[attr] = np.int64(attr_val)
|
|
104
|
+
case "u1":
|
|
105
|
+
dset.attrs[attr] = np.uint8(attr_val)
|
|
106
|
+
case "u2":
|
|
107
|
+
dset.attrs[attr] = np.uint16(attr_val)
|
|
108
|
+
case "u4":
|
|
109
|
+
dset.attrs[attr] = np.uint32(attr_val)
|
|
110
|
+
case "u8":
|
|
111
|
+
dset.attrs[attr] = np.uint64(attr_val)
|
|
112
|
+
case "f2":
|
|
113
|
+
dset.attrs[attr] = np.float16(attr_val)
|
|
114
|
+
case "f4":
|
|
115
|
+
dset.attrs[attr] = np.float32(attr_val)
|
|
116
|
+
case "f8":
|
|
117
|
+
dset.attrs[attr] = np.float64(attr_val)
|
|
118
|
+
case _:
|
|
119
|
+
dset.attrs[attr] = attr_val
|
|
120
|
+
else:
|
|
121
|
+
dset.attrs[attr] = attr_val
|
|
95
122
|
|
|
96
123
|
def __compounds(self: H5Yaml, fid: h5py.File) -> dict[str, str | int | float]:
|
|
97
124
|
"""Add compound datatypes to HDF5 product."""
|
|
@@ -112,14 +139,14 @@ class H5Yaml:
|
|
|
112
139
|
for key, value in res.items():
|
|
113
140
|
self.h5_def["compounds"][key] = value
|
|
114
141
|
|
|
115
|
-
for key,
|
|
142
|
+
for key, val in self.h5_def["compounds"].items():
|
|
116
143
|
compounds[key] = {
|
|
117
144
|
"dtype": [],
|
|
118
145
|
"units": [],
|
|
119
146
|
"names": [],
|
|
120
147
|
}
|
|
121
148
|
|
|
122
|
-
for _key, _val in
|
|
149
|
+
for _key, _val in val.items():
|
|
123
150
|
compounds[key]["dtype"].append((_key, _val[0]))
|
|
124
151
|
if len(_val) == 3:
|
|
125
152
|
compounds[key]["units"].append(_val[1])
|
|
@@ -156,12 +183,19 @@ class H5Yaml:
|
|
|
156
183
|
np.nan if val["_FillValue"] == "NaN" else int(val["_FillValue"])
|
|
157
184
|
)
|
|
158
185
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
186
|
+
# check for scalar dataset
|
|
187
|
+
if val["_dims"][0] == "scalar":
|
|
188
|
+
dset = fid.create_dataset(
|
|
189
|
+
key,
|
|
190
|
+
(),
|
|
191
|
+
dtype=ds_dtype,
|
|
192
|
+
fillvalue=fillvalue,
|
|
193
|
+
)
|
|
194
|
+
for attr, attr_val in val.items():
|
|
195
|
+
if attr.startswith("_"):
|
|
196
|
+
continue
|
|
197
|
+
dset.attrs[attr] = attr_val
|
|
198
|
+
continue
|
|
165
199
|
|
|
166
200
|
n_udim = 0
|
|
167
201
|
ds_shape = ()
|
|
@@ -194,8 +228,22 @@ class H5Yaml:
|
|
|
194
228
|
fillvalue=fillvalue,
|
|
195
229
|
)
|
|
196
230
|
else:
|
|
231
|
+
compression = None
|
|
232
|
+
shuffle = False
|
|
233
|
+
# currently only gzip compression is supported
|
|
234
|
+
if "_compression" in val:
|
|
235
|
+
compression = val["_compression"]
|
|
236
|
+
shuffle = True
|
|
237
|
+
|
|
197
238
|
if val.get("_vlen"):
|
|
198
|
-
|
|
239
|
+
ds_name = (
|
|
240
|
+
val["_dtype"].split("_")[0]
|
|
241
|
+
if "_" in val["_dtype"]
|
|
242
|
+
else val["_dtype"]
|
|
243
|
+
) + "_vlen"
|
|
244
|
+
if ds_name not in fid:
|
|
245
|
+
fid[ds_name] = h5py.vlen_dtype(ds_dtype)
|
|
246
|
+
ds_dtype = fid[ds_name]
|
|
199
247
|
fillvalue = None
|
|
200
248
|
if ds_maxshape == (None,):
|
|
201
249
|
ds_chunk = (16,)
|
|
@@ -217,7 +265,36 @@ class H5Yaml:
|
|
|
217
265
|
for attr, attr_val in val.items():
|
|
218
266
|
if attr.startswith("_"):
|
|
219
267
|
continue
|
|
220
|
-
|
|
268
|
+
if attr in ("valid_min", "valid_max"):
|
|
269
|
+
match val["_dtype"]:
|
|
270
|
+
case "i1":
|
|
271
|
+
dset.attrs[attr] = np.int8(attr_val)
|
|
272
|
+
case "i2":
|
|
273
|
+
dset.attrs[attr] = np.int16(attr_val)
|
|
274
|
+
case "i4":
|
|
275
|
+
dset.attrs[attr] = np.int32(attr_val)
|
|
276
|
+
case "i8":
|
|
277
|
+
dset.attrs[attr] = np.int64(attr_val)
|
|
278
|
+
case "u1":
|
|
279
|
+
dset.attrs[attr] = np.uint8(attr_val)
|
|
280
|
+
case "u2":
|
|
281
|
+
dset.attrs[attr] = np.uint16(attr_val)
|
|
282
|
+
case "u4":
|
|
283
|
+
dset.attrs[attr] = np.uint32(attr_val)
|
|
284
|
+
case "u8":
|
|
285
|
+
dset.attrs[attr] = np.uint64(attr_val)
|
|
286
|
+
case "f2":
|
|
287
|
+
dset.attrs[attr] = np.float16(attr_val)
|
|
288
|
+
case "f4":
|
|
289
|
+
dset.attrs[attr] = np.float32(attr_val)
|
|
290
|
+
case "f8":
|
|
291
|
+
dset.attrs[attr] = np.float64(attr_val)
|
|
292
|
+
case _:
|
|
293
|
+
dset.attrs[attr] = attr_val
|
|
294
|
+
elif attr == "flag_values":
|
|
295
|
+
dset.attrs[attr] = np.array(attr_val, dtype="u1")
|
|
296
|
+
else:
|
|
297
|
+
dset.attrs[attr] = attr_val
|
|
221
298
|
|
|
222
299
|
if compounds is not None and val["_dtype"] in compounds:
|
|
223
300
|
if compounds[val["_dtype"]]["units"]:
|
|
@@ -15,6 +15,7 @@ __all__ = ["NcYaml"]
|
|
|
15
15
|
|
|
16
16
|
import logging
|
|
17
17
|
from importlib.resources import files
|
|
18
|
+
from pathlib import PurePosixPath
|
|
18
19
|
from typing import TYPE_CHECKING
|
|
19
20
|
|
|
20
21
|
import numpy as np
|
|
@@ -47,12 +48,20 @@ class NcYaml:
|
|
|
47
48
|
def __groups(self: NcYaml, fid: Dataset) -> None:
|
|
48
49
|
"""Create groups in HDF5 product."""
|
|
49
50
|
for key in self.h5_def["groups"]:
|
|
50
|
-
|
|
51
|
+
pkey = PurePosixPath(key)
|
|
52
|
+
if pkey.is_absolute():
|
|
53
|
+
_ = fid[pkey.parent].createGroup(pkey.name)
|
|
54
|
+
else:
|
|
55
|
+
_ = fid.createGroup(key)
|
|
51
56
|
|
|
52
57
|
def __dimensions(self: NcYaml, fid: Dataset) -> None:
|
|
53
58
|
"""Add dimensions to HDF5 product."""
|
|
54
59
|
for key, value in self.h5_def["dimensions"].items():
|
|
55
|
-
|
|
60
|
+
pkey = PurePosixPath(key)
|
|
61
|
+
if pkey.is_absolute():
|
|
62
|
+
_ = fid[pkey.parent].createDimension(pkey.name, value["_size"])
|
|
63
|
+
else:
|
|
64
|
+
_ = fid.createDimension(key, value["_size"])
|
|
56
65
|
|
|
57
66
|
if "long_name" not in value:
|
|
58
67
|
continue
|
|
@@ -63,13 +72,22 @@ class NcYaml:
|
|
|
63
72
|
np.nan if value["_FillValue"] == "NaN" else int(value["_FillValue"])
|
|
64
73
|
)
|
|
65
74
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
75
|
+
if pkey.is_absolute():
|
|
76
|
+
dset = fid[pkey.parent].createVariable(
|
|
77
|
+
pkey.name,
|
|
78
|
+
value["_dtype"],
|
|
79
|
+
dimensions=(pkey.name,),
|
|
80
|
+
fill_value=fillvalue,
|
|
81
|
+
contiguous=value["_size"] != 0,
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
dset = fid.createVariable(
|
|
85
|
+
key,
|
|
86
|
+
value["_dtype"],
|
|
87
|
+
dimensions=(key,),
|
|
88
|
+
fill_value=fillvalue,
|
|
89
|
+
contiguous=value["_size"] != 0,
|
|
90
|
+
)
|
|
73
91
|
dset.setncatts({k: v for k, v in value.items() if not k.startswith("_")})
|
|
74
92
|
|
|
75
93
|
def __compounds(self: NcYaml, fid: Dataset) -> dict[str, str | int | float]:
|
|
@@ -145,11 +163,17 @@ class NcYaml:
|
|
|
145
163
|
compression = "zlib"
|
|
146
164
|
complevel = val["_compression"]
|
|
147
165
|
|
|
166
|
+
var_dims = []
|
|
148
167
|
n_udim = 0
|
|
149
168
|
ds_shape = ()
|
|
150
169
|
ds_maxshape = ()
|
|
151
170
|
for coord in val["_dims"]:
|
|
152
|
-
|
|
171
|
+
pcoord = PurePosixPath(coord)
|
|
172
|
+
var_dims.append(pcoord.name if pcoord.is_absolute() else coord)
|
|
173
|
+
if pcoord.is_absolute():
|
|
174
|
+
dim_sz = fid[pcoord.parent].dimensions[pcoord.name].size
|
|
175
|
+
else:
|
|
176
|
+
dim_sz = fid.dimensions[coord].size
|
|
153
177
|
n_udim += int(dim_sz == 0)
|
|
154
178
|
ds_shape += (dim_sz,)
|
|
155
179
|
ds_maxshape += (dim_sz if dim_sz > 0 else None,)
|
|
@@ -163,12 +187,18 @@ class NcYaml:
|
|
|
163
187
|
val["_chunks"] if "_chunks" in val else guess_chunks(ds_shape, sz_dtype)
|
|
164
188
|
)
|
|
165
189
|
|
|
190
|
+
pkey = PurePosixPath(key)
|
|
191
|
+
var_grp = fid[pkey.parent] if pkey.is_absolute() else fid
|
|
192
|
+
var_name = pkey.name if pkey.is_absolute() else key
|
|
193
|
+
if val["_dtype"] in fid.cmptypes:
|
|
194
|
+
val["_dtype"] = fid.cmptypes[val["_dtype"]]
|
|
195
|
+
|
|
166
196
|
# create the variable
|
|
167
197
|
if ds_chunk == "contiguous":
|
|
168
|
-
dset =
|
|
169
|
-
|
|
198
|
+
dset = var_grp.createVariable(
|
|
199
|
+
var_name,
|
|
170
200
|
val["_dtype"],
|
|
171
|
-
dimensions=
|
|
201
|
+
dimensions=var_dims,
|
|
172
202
|
fill_value=fillvalue,
|
|
173
203
|
contiguous=True,
|
|
174
204
|
)
|
|
@@ -181,13 +211,10 @@ class NcYaml:
|
|
|
181
211
|
if ds_maxshape == (None,):
|
|
182
212
|
ds_chunk = (16,)
|
|
183
213
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
dset = fid.createVariable(
|
|
188
|
-
key,
|
|
214
|
+
dset = var_grp.createVariable(
|
|
215
|
+
var_name,
|
|
189
216
|
val["_dtype"],
|
|
190
|
-
dimensions=
|
|
217
|
+
dimensions=var_dims,
|
|
191
218
|
fill_value=fillvalue,
|
|
192
219
|
contiguous=False,
|
|
193
220
|
compression=compression,
|
|
@@ -231,7 +258,7 @@ class NcYaml:
|
|
|
231
258
|
def tests() -> None:
|
|
232
259
|
"""..."""
|
|
233
260
|
print("Calling NcYaml")
|
|
234
|
-
NcYaml(files("h5yaml.Data") / "
|
|
261
|
+
NcYaml(files("h5yaml.Data") / "nc_testing.yaml").create("test_yaml.nc")
|
|
235
262
|
|
|
236
263
|
|
|
237
264
|
if __name__ == "__main__":
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|