h5yaml 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- h5yaml/Data/h5_compound.yaml +16 -15
- h5yaml/Data/h5_testing.yaml +23 -1
- h5yaml/Data/h5_unsupported.yaml +0 -6
- h5yaml/Data/nc_testing.yaml +13 -1
- h5yaml/conf_from_yaml.py +3 -3
- h5yaml/yaml_h5.py +99 -80
- h5yaml/yaml_nc.py +115 -89
- {h5yaml-0.2.0.dist-info → h5yaml-0.3.0.dist-info}/METADATA +35 -19
- h5yaml-0.3.0.dist-info/RECORD +14 -0
- {h5yaml-0.2.0.dist-info → h5yaml-0.3.0.dist-info}/WHEEL +1 -1
- h5yaml/lib/chunksizes.py +0 -73
- h5yaml-0.2.0.dist-info/RECORD +0 -15
- {h5yaml-0.2.0.dist-info → h5yaml-0.3.0.dist-info}/licenses/LICENSE +0 -0
h5yaml/Data/h5_compound.yaml
CHANGED
|
@@ -19,18 +19,19 @@
|
|
|
19
19
|
# See the License for the specific language governing permissions and
|
|
20
20
|
# limitations under the License.
|
|
21
21
|
#
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
22
|
+
compounds:
|
|
23
|
+
stats_dtype:
|
|
24
|
+
time:
|
|
25
|
+
- u8
|
|
26
|
+
- seconds since 1970-01-01T00:00:00
|
|
27
|
+
- timestamp
|
|
28
|
+
index: [u2, '1', index]
|
|
29
|
+
tbl_id: [u1, '1', binning id]
|
|
30
|
+
saa: [u1, '1', saa-flag]
|
|
31
|
+
coad: [u1, '1', co-addings]
|
|
32
|
+
texp: [f4, ms, exposure time]
|
|
33
|
+
lat: [f4, degree, latitude]
|
|
34
|
+
lon: [f4, degree, longitude]
|
|
35
|
+
avg: [f4, '1', '$S - S_{ref}$']
|
|
36
|
+
unc: [f4, '1', '\u03c3($S - S_{ref}$)']
|
|
37
|
+
dark_offs: [f4, '1', dark-offset]
|
h5yaml/Data/h5_testing.yaml
CHANGED
|
@@ -25,6 +25,8 @@ groups:
|
|
|
25
25
|
- group_01
|
|
26
26
|
- group_02
|
|
27
27
|
- group_03
|
|
28
|
+
- processing_control
|
|
29
|
+
- processing_control/input_parameters
|
|
28
30
|
|
|
29
31
|
# Define dimensions
|
|
30
32
|
# Note dimensions with an attribute 'long_name' will also be generated as variable
|
|
@@ -91,6 +93,26 @@ variables:
|
|
|
91
93
|
units: '1'
|
|
92
94
|
valid_min: 0
|
|
93
95
|
valid_max: 65534
|
|
96
|
+
/group_00/detector_images_chunked:
|
|
97
|
+
_dtype: u2
|
|
98
|
+
_dims: [number_of_images, column, row]
|
|
99
|
+
_FillValue: 65535
|
|
100
|
+
_chunks: [1, 640, 512]
|
|
101
|
+
long_name: Detector pixel values
|
|
102
|
+
comment: unbinned full-frame data
|
|
103
|
+
units: '1'
|
|
104
|
+
valid_min: 0
|
|
105
|
+
valid_max: 65534
|
|
106
|
+
/group_00/detector_images_autochunk:
|
|
107
|
+
_dtype: u2
|
|
108
|
+
_dims: [number_of_images, column, row]
|
|
109
|
+
_chunks: True
|
|
110
|
+
_FillValue: 65535
|
|
111
|
+
long_name: Detector pixel values
|
|
112
|
+
comment: unbinned full-frame data
|
|
113
|
+
units: '1'
|
|
114
|
+
valid_min: 0
|
|
115
|
+
valid_max: 65534
|
|
94
116
|
# ---------- GROUP 01 ----------
|
|
95
117
|
/group_01/detector_images:
|
|
96
118
|
_dtype: u2
|
|
@@ -199,7 +221,7 @@ variables:
|
|
|
199
221
|
valid_max: 999.9
|
|
200
222
|
/group_03/ds_10:
|
|
201
223
|
_dtype: f4
|
|
202
|
-
_dims: [number_of_images]
|
|
224
|
+
_dims: [number_of_images, /group_03/viewport]
|
|
203
225
|
long_name: float dataset
|
|
204
226
|
units: '1'
|
|
205
227
|
valid_min: -999.9
|
h5yaml/Data/h5_unsupported.yaml
CHANGED
|
@@ -39,12 +39,6 @@ dimensions:
|
|
|
39
39
|
valid_min: 0
|
|
40
40
|
valid_max: 92400
|
|
41
41
|
|
|
42
|
-
# Define compound types
|
|
43
|
-
# - compound elements must have a data-type, and can have a unit and long_name
|
|
44
|
-
compounds:
|
|
45
|
-
- h5_compound.yaml
|
|
46
|
-
- h5_nocompound.yaml
|
|
47
|
-
|
|
48
42
|
# Define variables
|
|
49
43
|
variables:
|
|
50
44
|
ds_01:
|
h5yaml/Data/nc_testing.yaml
CHANGED
|
@@ -25,6 +25,8 @@ groups:
|
|
|
25
25
|
- group_01
|
|
26
26
|
- group_02
|
|
27
27
|
- group_03
|
|
28
|
+
- processing_control
|
|
29
|
+
- processing_control/input_parameters
|
|
28
30
|
|
|
29
31
|
# Define dimensions
|
|
30
32
|
# Note dimensions with an attribute 'long_name' will also be generated as variable
|
|
@@ -87,6 +89,16 @@ variables:
|
|
|
87
89
|
units: '1'
|
|
88
90
|
valid_min: 0
|
|
89
91
|
valid_max: 65534
|
|
92
|
+
/group_00/detector_images_chunked:
|
|
93
|
+
_dtype: u2
|
|
94
|
+
_dims: [number_of_images, column, row]
|
|
95
|
+
_FillValue: 65535
|
|
96
|
+
_chunks: [1, 640, 512]
|
|
97
|
+
long_name: Detector pixel values
|
|
98
|
+
comment: unbinned full-frame data
|
|
99
|
+
units: '1'
|
|
100
|
+
valid_min: 0
|
|
101
|
+
valid_max: 65534
|
|
90
102
|
# ---------- GROUP 01 ----------
|
|
91
103
|
/group_01/detector_images:
|
|
92
104
|
_dtype: u2
|
|
@@ -190,7 +202,7 @@ variables:
|
|
|
190
202
|
# valid_max: 999.9
|
|
191
203
|
/group_03/ds_10:
|
|
192
204
|
_dtype: f4
|
|
193
|
-
_dims: [number_of_images]
|
|
205
|
+
_dims: [number_of_images, /group_03/viewport]
|
|
194
206
|
long_name: float dataset
|
|
195
207
|
units: '1'
|
|
196
208
|
valid_min: -999.9
|
h5yaml/conf_from_yaml.py
CHANGED
|
@@ -45,7 +45,7 @@ def conf_from_yaml(file_path: Path | str) -> dict:
|
|
|
45
45
|
|
|
46
46
|
"""
|
|
47
47
|
if isinstance(file_path, str):
|
|
48
|
-
file_path = Path(
|
|
48
|
+
file_path = Path(file_path)
|
|
49
49
|
|
|
50
50
|
if not file_path.is_file():
|
|
51
51
|
raise FileNotFoundError(f"{file_path} not found")
|
|
@@ -53,7 +53,7 @@ def conf_from_yaml(file_path: Path | str) -> dict:
|
|
|
53
53
|
with file_path.open("r", encoding="ascii") as fid:
|
|
54
54
|
try:
|
|
55
55
|
settings = yaml.safe_load(fid)
|
|
56
|
-
except yaml.
|
|
57
|
-
raise RuntimeError from exc
|
|
56
|
+
except yaml.parser.ParserError as exc:
|
|
57
|
+
raise RuntimeError(f"Failed to parse {file_path}") from exc
|
|
58
58
|
|
|
59
59
|
return settings
|
h5yaml/yaml_h5.py
CHANGED
|
@@ -32,7 +32,6 @@ import numpy as np
|
|
|
32
32
|
|
|
33
33
|
from .conf_from_yaml import conf_from_yaml
|
|
34
34
|
from .lib.adjust_attr import adjust_attr
|
|
35
|
-
from .lib.chunksizes import guess_chunks
|
|
36
35
|
|
|
37
36
|
|
|
38
37
|
# - class definition -----------------------------------
|
|
@@ -41,30 +40,74 @@ class H5Yaml:
|
|
|
41
40
|
|
|
42
41
|
Parameters
|
|
43
42
|
----------
|
|
44
|
-
h5_yaml_fl : Path
|
|
43
|
+
h5_yaml_fl : Path | str | list[Path | str]
|
|
45
44
|
YAML files with the HDF5 format definition
|
|
46
45
|
|
|
47
46
|
"""
|
|
48
47
|
|
|
49
|
-
def __init__(self: H5Yaml, h5_yaml_fl: Path) -> None:
|
|
48
|
+
def __init__(self: H5Yaml, h5_yaml_fl: Path | str | list[Path | str]) -> None:
|
|
50
49
|
"""Construct a H5Yaml instance."""
|
|
51
50
|
self.logger = logging.getLogger("h5yaml.H5Yaml")
|
|
51
|
+
self._h5_def = {
|
|
52
|
+
"groups": set(),
|
|
53
|
+
"attrs_global": {},
|
|
54
|
+
"attrs_groups": {},
|
|
55
|
+
"compounds": {},
|
|
56
|
+
"dimensions": {},
|
|
57
|
+
"variables": {},
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
for yaml_fl in h5_yaml_fl if isinstance(h5_yaml_fl, list) else [h5_yaml_fl]:
|
|
61
|
+
try:
|
|
62
|
+
config = conf_from_yaml(yaml_fl)
|
|
63
|
+
except RuntimeError as exc:
|
|
64
|
+
raise RuntimeError from exc
|
|
65
|
+
|
|
66
|
+
for key in self._h5_def:
|
|
67
|
+
if key in config:
|
|
68
|
+
self._h5_def[key] |= (
|
|
69
|
+
set(config[key]) if key == "groups" else config[key]
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
def __attrs(self: H5Yaml, fid: h5py.File) -> None:
|
|
73
|
+
"""Create global and group attributes.
|
|
52
74
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
75
|
+
Parameters
|
|
76
|
+
----------
|
|
77
|
+
fid : h5py.File
|
|
78
|
+
HDF5 file pointer (mode 'r+')
|
|
79
|
+
|
|
80
|
+
"""
|
|
81
|
+
for key, value in self._h5_def["attrs_global"].items():
|
|
82
|
+
if key not in fid.attrs and value != "TBW":
|
|
83
|
+
fid.attrs[key] = value
|
|
57
84
|
|
|
58
|
-
|
|
85
|
+
for key, value in self._h5_def["attrs_groups"].items():
|
|
86
|
+
if key not in fid.attrs and value != "TBW":
|
|
87
|
+
fid[str(Path(key).parent)].attrs[Path(key).name] = value
|
|
59
88
|
|
|
60
89
|
def __groups(self: H5Yaml, fid: h5py.File) -> None:
|
|
61
|
-
"""Create groups in HDF5 product.
|
|
62
|
-
|
|
63
|
-
|
|
90
|
+
"""Create groups in HDF5 product.
|
|
91
|
+
|
|
92
|
+
Parameters
|
|
93
|
+
----------
|
|
94
|
+
fid : h5py.File
|
|
95
|
+
HDF5 file pointer (mode 'r+')
|
|
96
|
+
|
|
97
|
+
"""
|
|
98
|
+
for key in self._h5_def["groups"]:
|
|
99
|
+
_ = fid.require_group(key)
|
|
64
100
|
|
|
65
101
|
def __dimensions(self: H5Yaml, fid: h5py.File) -> None:
|
|
66
|
-
"""Add dimensions to HDF5 product.
|
|
67
|
-
|
|
102
|
+
"""Add dimensions to HDF5 product.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
fid : h5py.File
|
|
107
|
+
HDF5 file pointer (mode 'r+')
|
|
108
|
+
|
|
109
|
+
"""
|
|
110
|
+
for key, val in self._h5_def["dimensions"].items():
|
|
68
111
|
fillvalue = None
|
|
69
112
|
if "_FillValue" in val:
|
|
70
113
|
fillvalue = (
|
|
@@ -72,12 +115,14 @@ class H5Yaml:
|
|
|
72
115
|
)
|
|
73
116
|
|
|
74
117
|
if val["_size"] == 0:
|
|
75
|
-
ds_chunk = val.get("_chunks"
|
|
118
|
+
ds_chunk = val.get("_chunks")
|
|
119
|
+
if ds_chunk is not None and not isinstance(ds_chunk, bool):
|
|
120
|
+
ds_chunk = tuple(ds_chunk)
|
|
76
121
|
dset = fid.create_dataset(
|
|
77
122
|
key,
|
|
78
123
|
shape=(0,),
|
|
79
124
|
dtype="T" if val["_dtype"] == "str" else val["_dtype"],
|
|
80
|
-
chunks=ds_chunk
|
|
125
|
+
chunks=ds_chunk,
|
|
81
126
|
maxshape=(None,),
|
|
82
127
|
fillvalue=fillvalue,
|
|
83
128
|
)
|
|
@@ -101,62 +146,32 @@ class H5Yaml:
|
|
|
101
146
|
if not attr.startswith("_"):
|
|
102
147
|
dset.attrs[attr] = adjust_attr(val["_dtype"], attr, attr_val)
|
|
103
148
|
|
|
104
|
-
def __compounds(self: H5Yaml, fid: h5py.File) ->
|
|
105
|
-
"""Add compound datatypes to HDF5 product.
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
res = conf_from_yaml(yaml_fl)
|
|
118
|
-
except RuntimeError as exc:
|
|
119
|
-
raise RuntimeError from exc
|
|
120
|
-
for key, value in res.items():
|
|
121
|
-
self.h5_def["compounds"][key] = value
|
|
122
|
-
|
|
123
|
-
for key, val in self.h5_def["compounds"].items():
|
|
124
|
-
compounds[key] = {
|
|
125
|
-
"dtype": [],
|
|
126
|
-
"units": [],
|
|
127
|
-
"names": [],
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
for _key, _val in val.items():
|
|
131
|
-
compounds[key]["dtype"].append((_key, _val[0]))
|
|
132
|
-
if len(_val) == 3:
|
|
133
|
-
compounds[key]["units"].append(_val[1])
|
|
134
|
-
compounds[key]["names"].append(_val[2] if len(_val) == 3 else _val[1])
|
|
135
|
-
|
|
136
|
-
fid[key] = np.dtype(compounds[key]["dtype"])
|
|
137
|
-
|
|
138
|
-
return compounds
|
|
139
|
-
|
|
140
|
-
def __variables(
|
|
141
|
-
self: H5Yaml, fid: h5py.File, compounds: dict[str, str | int | float] | None
|
|
142
|
-
) -> None:
|
|
149
|
+
def __compounds(self: H5Yaml, fid: h5py.File) -> None:
|
|
150
|
+
"""Add compound datatypes to HDF5 product.
|
|
151
|
+
|
|
152
|
+
Parameters
|
|
153
|
+
----------
|
|
154
|
+
fid : h5py.File
|
|
155
|
+
HDF5 file pointer (mode 'r+')
|
|
156
|
+
|
|
157
|
+
"""
|
|
158
|
+
for key, val in self._h5_def["compounds"].items():
|
|
159
|
+
fid[key] = np.dtype([(k, v[0]) for k, v in val.items()])
|
|
160
|
+
|
|
161
|
+
def __variables(self: H5Yaml, fid: h5py.File) -> None:
|
|
143
162
|
"""Add datasets to HDF5 product.
|
|
144
163
|
|
|
145
164
|
Parameters
|
|
146
165
|
----------
|
|
147
166
|
fid : h5py.File
|
|
148
167
|
HDF5 file pointer (mode 'r+')
|
|
149
|
-
compounds : dict[str, str | int | float]
|
|
150
|
-
Definition of the compound(s) in the product
|
|
151
168
|
|
|
152
169
|
"""
|
|
153
|
-
for key, val in self.
|
|
170
|
+
for key, val in self._h5_def["variables"].items():
|
|
154
171
|
if val["_dtype"] in fid:
|
|
155
172
|
ds_dtype = fid[val["_dtype"]]
|
|
156
|
-
dtype_size = fid[val["_dtype"]].dtype.itemsize
|
|
157
173
|
else:
|
|
158
174
|
ds_dtype = "T" if val["_dtype"] == "str" else val["_dtype"]
|
|
159
|
-
dtype_size = np.dtype(val["_dtype"]).itemsize
|
|
160
175
|
|
|
161
176
|
fillvalue = None
|
|
162
177
|
if "_FillValue" in val:
|
|
@@ -190,15 +205,13 @@ class H5Yaml:
|
|
|
190
205
|
if n_udim > 1:
|
|
191
206
|
raise ValueError(f"{key} has more than one unlimited dimension")
|
|
192
207
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
else guess_chunks(ds_shape, dtype_size)
|
|
198
|
-
)
|
|
208
|
+
if None in ds_maxshape and val.get("_chunks") == "contiguous":
|
|
209
|
+
raise KeyError(
|
|
210
|
+
"you can not create a contiguous dataset with unlimited dimensions."
|
|
211
|
+
)
|
|
199
212
|
|
|
200
213
|
# create the variable
|
|
201
|
-
if
|
|
214
|
+
if val.get("_chunks") == "contiguous":
|
|
202
215
|
dset = fid.create_dataset(
|
|
203
216
|
key,
|
|
204
217
|
ds_shape,
|
|
@@ -208,6 +221,9 @@ class H5Yaml:
|
|
|
208
221
|
fillvalue=fillvalue,
|
|
209
222
|
)
|
|
210
223
|
else:
|
|
224
|
+
ds_chunk = val.get("_chunks")
|
|
225
|
+
if ds_chunk is not None and not isinstance(ds_chunk, bool):
|
|
226
|
+
ds_chunk = tuple(ds_chunk)
|
|
211
227
|
compression = None
|
|
212
228
|
shuffle = False
|
|
213
229
|
# currently only gzip compression is supported
|
|
@@ -225,14 +241,12 @@ class H5Yaml:
|
|
|
225
241
|
fid[ds_name] = h5py.vlen_dtype(ds_dtype)
|
|
226
242
|
ds_dtype = fid[ds_name]
|
|
227
243
|
fillvalue = None
|
|
228
|
-
if ds_maxshape == (None,):
|
|
229
|
-
ds_chunk = (16,)
|
|
230
244
|
|
|
231
245
|
dset = fid.create_dataset(
|
|
232
246
|
key,
|
|
233
247
|
ds_shape,
|
|
234
248
|
dtype=ds_dtype,
|
|
235
|
-
chunks=ds_chunk
|
|
249
|
+
chunks=ds_chunk,
|
|
236
250
|
maxshape=ds_maxshape,
|
|
237
251
|
fillvalue=fillvalue,
|
|
238
252
|
compression=compression,
|
|
@@ -246,11 +260,14 @@ class H5Yaml:
|
|
|
246
260
|
if not attr.startswith("_"):
|
|
247
261
|
dset.attrs[attr] = adjust_attr(val["_dtype"], attr, attr_val)
|
|
248
262
|
|
|
249
|
-
if
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
if
|
|
253
|
-
dset.attrs["
|
|
263
|
+
if val["_dtype"] in self._h5_def["compounds"]:
|
|
264
|
+
compound = self._h5_def["compounds"][val["_dtype"]]
|
|
265
|
+
res = [v[2] for k, v in compound.items() if len(v) == 3]
|
|
266
|
+
if res:
|
|
267
|
+
dset.attrs["units"] = [v[1] for k, v in compound.items()]
|
|
268
|
+
dset.attrs["long_name"] = res
|
|
269
|
+
else:
|
|
270
|
+
dset.attrs["long_name"] = [v[1] for k, v in compound.items()]
|
|
254
271
|
|
|
255
272
|
@property
|
|
256
273
|
def h5_def(self: H5Yaml) -> dict:
|
|
@@ -260,10 +277,11 @@ class H5Yaml:
|
|
|
260
277
|
def diskless(self: H5Yaml) -> h5py.File:
|
|
261
278
|
"""Create a HDF5/netCDF4 file in memory."""
|
|
262
279
|
fid = h5py.File.in_memory()
|
|
263
|
-
|
|
264
|
-
self.__groups(fid)
|
|
280
|
+
self.__groups(fid)
|
|
265
281
|
self.__dimensions(fid)
|
|
266
|
-
self.
|
|
282
|
+
self.__compounds(fid)
|
|
283
|
+
self.__variables(fid)
|
|
284
|
+
self.__attrs(fid)
|
|
267
285
|
return fid
|
|
268
286
|
|
|
269
287
|
def create(self: H5Yaml, l1a_name: Path | str) -> None:
|
|
@@ -277,9 +295,10 @@ class H5Yaml:
|
|
|
277
295
|
"""
|
|
278
296
|
try:
|
|
279
297
|
with h5py.File(l1a_name, "w") as fid:
|
|
280
|
-
|
|
281
|
-
self.__groups(fid)
|
|
298
|
+
self.__groups(fid)
|
|
282
299
|
self.__dimensions(fid)
|
|
283
|
-
self.
|
|
300
|
+
self.__compounds(fid)
|
|
301
|
+
self.__variables(fid)
|
|
302
|
+
self.__attrs(fid)
|
|
284
303
|
except PermissionError as exc:
|
|
285
304
|
raise RuntimeError(f"failed create {l1a_name}") from exc
|
h5yaml/yaml_nc.py
CHANGED
|
@@ -18,15 +18,14 @@
|
|
|
18
18
|
# See the License for the specific language governing permissions and
|
|
19
19
|
# limitations under the License.
|
|
20
20
|
#
|
|
21
|
-
"""Create
|
|
21
|
+
"""Create netCDF4 formatted file from a YAML configuration file using netCDF4."""
|
|
22
22
|
|
|
23
23
|
from __future__ import annotations
|
|
24
24
|
|
|
25
25
|
__all__ = ["NcYaml"]
|
|
26
26
|
|
|
27
27
|
import logging
|
|
28
|
-
from pathlib import PurePosixPath
|
|
29
|
-
from typing import TYPE_CHECKING
|
|
28
|
+
from pathlib import Path, PurePosixPath
|
|
30
29
|
|
|
31
30
|
import numpy as np
|
|
32
31
|
|
|
@@ -35,38 +34,91 @@ from netCDF4 import Dataset
|
|
|
35
34
|
|
|
36
35
|
from .conf_from_yaml import conf_from_yaml
|
|
37
36
|
from .lib.adjust_attr import adjust_attr
|
|
38
|
-
from .lib.chunksizes import guess_chunks
|
|
39
|
-
|
|
40
|
-
if TYPE_CHECKING:
|
|
41
|
-
from pathlib import Path
|
|
42
37
|
|
|
43
38
|
|
|
44
39
|
# - class definition -----------------------------------
|
|
45
40
|
class NcYaml:
|
|
46
|
-
"""Class to create a
|
|
41
|
+
"""Class to create a netCDF4 formated file from a YAML configuration file.
|
|
42
|
+
|
|
43
|
+
Parameters
|
|
44
|
+
----------
|
|
45
|
+
nc_yaml_fl : Path | str | list[Path | str]
|
|
46
|
+
YAML files with the netCDF4 format definition
|
|
47
|
+
|
|
48
|
+
"""
|
|
47
49
|
|
|
48
|
-
def __init__(self: NcYaml, nc_yaml_fl: Path) -> None:
|
|
50
|
+
def __init__(self: NcYaml, nc_yaml_fl: Path | str | list[Path | str]) -> None:
|
|
49
51
|
"""Construct a NcYaml instance."""
|
|
50
52
|
self.logger = logging.getLogger("h5yaml.NcYaml")
|
|
53
|
+
self._nc_def = {
|
|
54
|
+
"groups": set(),
|
|
55
|
+
"attrs_global": {},
|
|
56
|
+
"attrs_groups": {},
|
|
57
|
+
"compounds": {},
|
|
58
|
+
"dimensions": {},
|
|
59
|
+
"variables": {},
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
for yaml_fl in nc_yaml_fl if isinstance(nc_yaml_fl, list) else [nc_yaml_fl]:
|
|
63
|
+
try:
|
|
64
|
+
config = conf_from_yaml(yaml_fl)
|
|
65
|
+
except RuntimeError as exc:
|
|
66
|
+
raise RuntimeError from exc
|
|
67
|
+
|
|
68
|
+
for key in self._nc_def:
|
|
69
|
+
if key in config:
|
|
70
|
+
self._nc_def[key] |= (
|
|
71
|
+
set(config[key]) if key == "groups" else config[key]
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def __attrs(self: NcYaml, fid: Dataset) -> None:
|
|
75
|
+
"""Create global and group attributes.
|
|
51
76
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
fid : netCDF4.Dataset
|
|
80
|
+
netCDF4 Dataset (mode 'r+')
|
|
56
81
|
|
|
57
|
-
|
|
82
|
+
"""
|
|
83
|
+
for key, val in self.nc_def["attrs_global"].items():
|
|
84
|
+
if val == "TBW":
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
if isinstance(val, str):
|
|
88
|
+
fid.setncattr_string(key, val)
|
|
89
|
+
else:
|
|
90
|
+
fid.setncattr(key, val)
|
|
91
|
+
|
|
92
|
+
for key, val in self.nc_def["attrs_groups"].items():
|
|
93
|
+
if val == "TBW":
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
if isinstance(val, str):
|
|
97
|
+
fid[str(Path(key).parent)].setncattr_string(Path(key).name, val)
|
|
98
|
+
else:
|
|
99
|
+
fid[str(Path(key).parent)].setncattr(Path(key).name, val)
|
|
58
100
|
|
|
59
101
|
def __groups(self: NcYaml, fid: Dataset) -> None:
|
|
60
|
-
"""Create groups in
|
|
102
|
+
"""Create groups in a netCDF4 product.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
fid : netCDF4.Dataset
|
|
107
|
+
netCDF4 Dataset (mode 'r+')
|
|
108
|
+
|
|
109
|
+
"""
|
|
61
110
|
for key in self.nc_def["groups"]:
|
|
62
|
-
|
|
63
|
-
if pkey.is_absolute():
|
|
64
|
-
_ = fid[pkey.parent].createGroup(pkey.name)
|
|
65
|
-
else:
|
|
66
|
-
_ = fid.createGroup(key)
|
|
111
|
+
_ = fid.createGroup(key)
|
|
67
112
|
|
|
68
113
|
def __dimensions(self: NcYaml, fid: Dataset) -> None:
|
|
69
|
-
"""Add dimensions to
|
|
114
|
+
"""Add dimensions to a netCDF4 product.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
fid : netCDF4.Dataset
|
|
119
|
+
netCDF4 Dataset (mode 'r+')
|
|
120
|
+
|
|
121
|
+
"""
|
|
70
122
|
for key, value in self.nc_def["dimensions"].items():
|
|
71
123
|
pkey = PurePosixPath(key)
|
|
72
124
|
if pkey.is_absolute():
|
|
@@ -113,56 +165,26 @@ class NcYaml:
|
|
|
113
165
|
}
|
|
114
166
|
)
|
|
115
167
|
|
|
116
|
-
def __compounds(self: NcYaml, fid: Dataset) ->
|
|
117
|
-
"""Add compound datatypes to
|
|
118
|
-
if "compounds" not in self.nc_def:
|
|
119
|
-
return {}
|
|
120
|
-
|
|
121
|
-
compounds = {}
|
|
122
|
-
if isinstance(self.nc_def["compounds"], list):
|
|
123
|
-
file_list = self.nc_def["compounds"].copy()
|
|
124
|
-
self.nc_def["compounds"] = {}
|
|
125
|
-
for name in file_list:
|
|
126
|
-
if not (yaml_fl := self.yaml_dir / name).is_file():
|
|
127
|
-
continue
|
|
128
|
-
try:
|
|
129
|
-
res = conf_from_yaml(yaml_fl)
|
|
130
|
-
except RuntimeError as exc:
|
|
131
|
-
raise RuntimeError from exc
|
|
132
|
-
for key, value in res.items():
|
|
133
|
-
self.nc_def["compounds"][key] = value
|
|
134
|
-
|
|
135
|
-
for key, value in self.nc_def["compounds"].items():
|
|
136
|
-
compounds[key] = {
|
|
137
|
-
"dtype": [],
|
|
138
|
-
"units": [],
|
|
139
|
-
"names": [],
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
for _key, _val in value.items():
|
|
143
|
-
compounds[key]["dtype"].append((_key, _val[0]))
|
|
144
|
-
if len(_val) == 3:
|
|
145
|
-
compounds[key]["units"].append(_val[1])
|
|
146
|
-
compounds[key]["names"].append(_val[2] if len(_val) == 3 else _val[1])
|
|
147
|
-
|
|
148
|
-
comp_t = np.dtype(compounds[key]["dtype"])
|
|
149
|
-
_ = fid.createCompoundType(comp_t, key)
|
|
168
|
+
def __compounds(self: NcYaml, fid: Dataset) -> None:
|
|
169
|
+
"""Add compound datatypes to a netCDF4 product.
|
|
150
170
|
|
|
151
|
-
|
|
171
|
+
Parameters
|
|
172
|
+
----------
|
|
173
|
+
fid : netCDF4.Dataset
|
|
174
|
+
netCDF4 Dataset (mode 'r+')
|
|
175
|
+
|
|
176
|
+
"""
|
|
177
|
+
for key, val in self.nc_def["compounds"].items():
|
|
178
|
+
comp_t = np.dtype([(k, v[0]) for k, v in val.items()])
|
|
179
|
+
_ = fid.createCompoundType(comp_t, key)
|
|
152
180
|
|
|
153
|
-
def __variables(
|
|
154
|
-
|
|
155
|
-
fid: Dataset,
|
|
156
|
-
compounds: dict[str, str | int | float] | None,
|
|
157
|
-
) -> None:
|
|
158
|
-
"""Add datasets to HDF5 product.
|
|
181
|
+
def __variables(self: NcYaml, fid: Dataset) -> None:
|
|
182
|
+
"""Add datasets to a netCDF4 product.
|
|
159
183
|
|
|
160
184
|
Parameters
|
|
161
185
|
----------
|
|
162
186
|
fid : netCDF4.Dataset
|
|
163
|
-
|
|
164
|
-
compounds : dict[str, str | int | float]
|
|
165
|
-
Definition of the compound(s) in the product
|
|
187
|
+
netCDF4 Dataset (mode 'r+')
|
|
166
188
|
|
|
167
189
|
"""
|
|
168
190
|
for key, val in self.nc_def["variables"].items():
|
|
@@ -172,10 +194,8 @@ class NcYaml:
|
|
|
172
194
|
|
|
173
195
|
if val["_dtype"] in fid.cmptypes:
|
|
174
196
|
ds_dtype = fid.cmptypes[val["_dtype"]].dtype
|
|
175
|
-
sz_dtype = ds_dtype.itemsize
|
|
176
197
|
else:
|
|
177
198
|
ds_dtype = val["_dtype"]
|
|
178
|
-
sz_dtype = np.dtype(val["_dtype"]).itemsize
|
|
179
199
|
|
|
180
200
|
fillvalue = None
|
|
181
201
|
if "_FillValue" in val:
|
|
@@ -226,16 +246,16 @@ class NcYaml:
|
|
|
226
246
|
if n_udim > 1:
|
|
227
247
|
raise ValueError("more than one unlimited dimension")
|
|
228
248
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
249
|
+
if None in ds_maxshape and val.get("_chunks") == "contiguous":
|
|
250
|
+
raise KeyError(
|
|
251
|
+
"you can not create a contiguous dataset with unlimited dimensions."
|
|
252
|
+
)
|
|
233
253
|
|
|
234
254
|
if val["_dtype"] in fid.cmptypes:
|
|
235
255
|
val["_dtype"] = fid.cmptypes[val["_dtype"]]
|
|
236
256
|
|
|
237
257
|
# create the variable
|
|
238
|
-
if
|
|
258
|
+
if val.get("_chunks") == "contiguous":
|
|
239
259
|
dset = var_grp.createVariable(
|
|
240
260
|
var_name,
|
|
241
261
|
val["_dtype"],
|
|
@@ -244,25 +264,24 @@ class NcYaml:
|
|
|
244
264
|
contiguous=True,
|
|
245
265
|
)
|
|
246
266
|
else:
|
|
267
|
+
ds_chunk = val.get("_chunks")
|
|
268
|
+
if ds_chunk is not None and not isinstance(ds_chunk, bool):
|
|
269
|
+
ds_chunk = tuple(ds_chunk)
|
|
247
270
|
if val.get("_vlen"):
|
|
248
271
|
if val["_dtype"] in fid.cmptypes:
|
|
249
272
|
raise ValueError("can not have vlen with compounds")
|
|
250
273
|
val["_dtype"] = fid.createVLType(ds_dtype, val["_dtype"])
|
|
251
274
|
fillvalue = None
|
|
252
|
-
if ds_maxshape == (None,):
|
|
253
|
-
ds_chunk = (16,)
|
|
254
275
|
|
|
255
276
|
dset = var_grp.createVariable(
|
|
256
277
|
var_name,
|
|
257
278
|
str if val["_dtype"] == "str" else val["_dtype"],
|
|
258
279
|
dimensions=var_dims,
|
|
259
280
|
fill_value=fillvalue,
|
|
260
|
-
contiguous=False,
|
|
261
281
|
compression=compression,
|
|
262
282
|
complevel=complevel,
|
|
263
|
-
chunksizes=
|
|
264
|
-
|
|
265
|
-
),
|
|
283
|
+
chunksizes=ds_chunk,
|
|
284
|
+
contiguous=False,
|
|
266
285
|
)
|
|
267
286
|
dset.setncatts(
|
|
268
287
|
{
|
|
@@ -272,38 +291,45 @@ class NcYaml:
|
|
|
272
291
|
}
|
|
273
292
|
)
|
|
274
293
|
|
|
275
|
-
if
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
if
|
|
279
|
-
dset.attrs["
|
|
294
|
+
if val["_dtype"] in self._nc_def["compounds"]:
|
|
295
|
+
compound = self._nc_def["compounds"][val["_dtype"]]
|
|
296
|
+
res = [v[2] for k, v in compound.items() if len(v) == 3]
|
|
297
|
+
if res:
|
|
298
|
+
dset.attrs["units"] = [v[1] for k, v in compound.items()]
|
|
299
|
+
dset.attrs["long_name"] = res
|
|
300
|
+
else:
|
|
301
|
+
dset.attrs["long_name"] = [v[1] for k, v in compound.items()]
|
|
280
302
|
|
|
281
303
|
@property
|
|
282
304
|
def nc_def(self: NcYaml) -> dict:
|
|
283
|
-
"""Return definition of the
|
|
305
|
+
"""Return definition of the netCDF4 product."""
|
|
284
306
|
return self._nc_def
|
|
285
307
|
|
|
286
308
|
def diskless(self: NcYaml) -> Dataset:
|
|
287
|
-
"""Create a
|
|
309
|
+
"""Create a netCDF4 file in memory."""
|
|
288
310
|
fid = Dataset("diskless_test.nc", "w", diskless=True, persistent=False)
|
|
289
311
|
self.__groups(fid)
|
|
290
312
|
self.__dimensions(fid)
|
|
291
|
-
self.
|
|
313
|
+
self.__compounds(fid)
|
|
314
|
+
self.__variables(fid)
|
|
315
|
+
self.__attrs(fid)
|
|
292
316
|
return fid
|
|
293
317
|
|
|
294
318
|
def create(self: NcYaml, l1a_name: Path | str) -> None:
|
|
295
|
-
"""Create a
|
|
319
|
+
"""Create a netCDF4 file (overwrite if exist).
|
|
296
320
|
|
|
297
321
|
Parameters
|
|
298
322
|
----------
|
|
299
323
|
l1a_name : Path | str
|
|
300
|
-
Full name of the
|
|
324
|
+
Full name of the netCDF4 file to be generated
|
|
301
325
|
|
|
302
326
|
"""
|
|
303
327
|
try:
|
|
304
328
|
with Dataset(l1a_name, "w") as fid:
|
|
305
329
|
self.__groups(fid)
|
|
306
330
|
self.__dimensions(fid)
|
|
307
|
-
self.
|
|
331
|
+
self.__compounds(fid)
|
|
332
|
+
self.__variables(fid)
|
|
333
|
+
self.__attrs(fid)
|
|
308
334
|
except PermissionError as exc:
|
|
309
335
|
raise RuntimeError(f"failed to create {l1a_name}") from exc
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: h5yaml
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Use YAML configuration file to generate HDF5/netCDF4 formated files.
|
|
5
5
|
Project-URL: Homepage, https://github.com/rmvanhees/h5_yaml
|
|
6
6
|
Project-URL: Source, https://github.com/rmvanhees/h5_yaml
|
|
@@ -61,23 +61,39 @@ However, package `netCDF4` has some limitations, which `h5py` has not, for examp
|
|
|
61
61
|
not allow variable-length variables to have a compound data-type.
|
|
62
62
|
|
|
63
63
|
## Installation
|
|
64
|
-
|
|
64
|
+
The package `h5yaml` is available from PyPI. To install it use `pip`:
|
|
65
|
+
|
|
66
|
+
> $ pip install [--user] h5yaml
|
|
67
|
+
|
|
68
|
+
The module `h5yaml` requires Python3.10+ and Python modules: h5py (v3.14+), netCDF4 (v1.7+) and numpy (v2.0+).
|
|
69
|
+
|
|
70
|
+
**Note**: the packages `h5py` and `netCDF4` come with their own HDF5 libraries. If these are different then they may
|
|
71
|
+
collide and result in a *''HDF5 error''*.
|
|
72
|
+
If this is the case then you have to install the development packages of HDF5 and netCDF4 (or compile them from source).
|
|
73
|
+
And reinstall `h5py` and `netCDF4` using the commands:
|
|
74
|
+
|
|
75
|
+
> $ pip uninstall h5py; pip install --no-binary=h5py h5py
|
|
76
|
+
> $ pip uninstall netCDF4; pip install --no-binary=netCDF4 netCDF4
|
|
65
77
|
|
|
66
78
|
## Usage
|
|
67
79
|
|
|
68
80
|
The YAML file should be structured as follows:
|
|
69
81
|
|
|
70
|
-
* The top level are: 'groups', 'dimensions', 'compounds' and '
|
|
82
|
+
* The top level are: 'groups', 'dimensions', 'compounds', 'variables', 'attrs\_global' and 'attrs\_groups'.
|
|
83
|
+
* > 'attrs\_global' and 'attrs\_groups' are added in version 0.3.0
|
|
84
|
+
* The names of the attributes, groups, dimensions, compounds and viariable should be specified as PosixPaths, however:
|
|
85
|
+
* The names of groups should never start with a slash (always erlative to root);
|
|
86
|
+
* All other elements which are stored in root should also not start with a slash;
|
|
87
|
+
* But these elements require a starting slash (absolute paths) when they are stored not the root.
|
|
71
88
|
* The section 'groups' are optional, but you should provide each group you want to use
|
|
72
89
|
in your file. The 'groups' section in the YAML file may look like this:
|
|
73
|
-
|
|
74
90
|
```
|
|
75
91
|
groups:
|
|
76
92
|
- engineering_data
|
|
77
93
|
- image_attributes
|
|
78
94
|
- navigation_data
|
|
79
|
-
- processing_control
|
|
80
95
|
- science_data
|
|
96
|
+
- processing_control/input_data
|
|
81
97
|
```
|
|
82
98
|
|
|
83
99
|
* The section 'dimensions' is obligatory, you should define the dimensions for each
|
|
@@ -133,14 +149,6 @@ The YAML file should be structured as follows:
|
|
|
133
149
|
dark_offs: [f4, '1', dark-offset]
|
|
134
150
|
```
|
|
135
151
|
|
|
136
|
-
Alternatively, provide a list with names of YAML files which contain the definitions
|
|
137
|
-
of the compounds.
|
|
138
|
-
|
|
139
|
-
```
|
|
140
|
-
compounds:
|
|
141
|
-
- h5_nomhk_tm.yaml
|
|
142
|
-
- h5_science_hk.yaml
|
|
143
|
-
```
|
|
144
152
|
* The 'variables' are defined by their data-type ('_dtype') and dimensions ('_dims'),
|
|
145
153
|
and optionally chunk sizes ('_chunks'), compression ('_compression'), variable length
|
|
146
154
|
('_vlen'). In addition, each variable can have as many attributes as you like,
|
|
@@ -148,6 +156,16 @@ The YAML file should be structured as follows:
|
|
|
148
156
|
|
|
149
157
|
```
|
|
150
158
|
variables:
|
|
159
|
+
/science_data/detector_images:
|
|
160
|
+
_dtype: u2
|
|
161
|
+
_dims: [number_of_images, samples_per_image]
|
|
162
|
+
_compression: 3
|
|
163
|
+
_FillValue: 65535
|
|
164
|
+
long_name: Detector pixel values
|
|
165
|
+
coverage_content_type: image
|
|
166
|
+
units: '1'
|
|
167
|
+
valid_min: 0
|
|
168
|
+
valid_max: 65534
|
|
151
169
|
/image_attributes/nr_coadditions:
|
|
152
170
|
_dtype: u2
|
|
153
171
|
_dims: [number_of_images]
|
|
@@ -163,20 +181,18 @@ The YAML file should be structured as follows:
|
|
|
163
181
|
units: seconds
|
|
164
182
|
stats_163:
|
|
165
183
|
_dtype: stats_dtype
|
|
166
|
-
_vlen: True
|
|
167
184
|
_dims: [days]
|
|
185
|
+
_vlen: True
|
|
168
186
|
comment: detector map statistics (MPS=163)
|
|
169
187
|
```
|
|
170
188
|
|
|
171
|
-
### Notes and ToDo
|
|
189
|
+
### Notes and ToDo
|
|
172
190
|
|
|
173
|
-
|
|
174
|
-
* Explain usage of parameter '_chunks', which is currently not correctly implemented.
|
|
175
|
-
* Explain that the usage of variable length data-sets may break netCDF4 compatibility
|
|
191
|
+
* The layout of a HDF5 or netCDF4 file can be complex. From version 0.3.0, you can split the file definition over several YAML files and provide a list with the names of YAML files as input to H5Yaml and NcYaml.
|
|
176
192
|
|
|
177
193
|
## Support [TBW]
|
|
178
194
|
|
|
179
|
-
##
|
|
195
|
+
## Road map
|
|
180
196
|
|
|
181
197
|
* Release v0.1 : stable API to read your YAML files and generate the HDF5/netCDF4 file
|
|
182
198
|
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
h5yaml/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
|
|
2
|
+
h5yaml/conf_from_yaml.py,sha256=GVbWR-I0_sKRxrXmgLxbnTJvAXz5OtFtNYu4Pp3LaaI,1607
|
|
3
|
+
h5yaml/yaml_h5.py,sha256=R_WqvK2korrR_nCY7MlmycRcD5Hc4yeJC6B4rvuwulk,10399
|
|
4
|
+
h5yaml/yaml_nc.py,sha256=G4kvn_Ec72ND8phXgNbFWY7EML-pgiMcBVwZroPtrQY,11330
|
|
5
|
+
h5yaml/Data/h5_compound.yaml,sha256=pAVGyhGpbbFgsb1NoTQZsttPLK1zktTZRufFdSaX78U,1172
|
|
6
|
+
h5yaml/Data/h5_testing.yaml,sha256=s-kUjHiXKr4IOVf2vqz8mUr1vcU61wxKwZFuQCUbemA,6246
|
|
7
|
+
h5yaml/Data/h5_unsupported.yaml,sha256=EfFztuUpuXDl_7wgwIqelwE_gdvu35zKT-YtsUfGQeM,1342
|
|
8
|
+
h5yaml/Data/nc_testing.yaml,sha256=C30hXo73GG4BifIr7oymkbJ4Bh8hxKKDDMnFOO5VED8,5806
|
|
9
|
+
h5yaml/lib/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
|
|
10
|
+
h5yaml/lib/adjust_attr.py,sha256=4dHEGwwIa3a3hihyuSX8jCsC08fYcz_9XWA1pBwiwfc,2284
|
|
11
|
+
h5yaml-0.3.0.dist-info/METADATA,sha256=jFRcsa-2kTu8QIFtKmKWaURD3DcQwrfyiXzNvzsQXnU,8446
|
|
12
|
+
h5yaml-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
13
|
+
h5yaml-0.3.0.dist-info/licenses/LICENSE,sha256=rLarIZOYK5jHuUjMnFbgdI_Tb_4_HAAKSOOIhwiWlE4,11356
|
|
14
|
+
h5yaml-0.3.0.dist-info/RECORD,,
|
h5yaml/lib/chunksizes.py
DELETED
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# This file is part of Python package: `h5yaml`
|
|
3
|
-
#
|
|
4
|
-
# https://github.com/rmvanhees/pyxarr.git
|
|
5
|
-
#
|
|
6
|
-
# Copyright (c) 2025 - R.M. van Hees (SRON)
|
|
7
|
-
# All rights reserved.
|
|
8
|
-
#
|
|
9
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
10
|
-
# you may not use this file except in compliance with the License.
|
|
11
|
-
# You may obtain a copy of the License at
|
|
12
|
-
#
|
|
13
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
14
|
-
#
|
|
15
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
16
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
17
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
18
|
-
# See the License for the specific language governing permissions and
|
|
19
|
-
# limitations under the License.
|
|
20
|
-
#
|
|
21
|
-
"""Obtain chunksizes for HDF5 datasets."""
|
|
22
|
-
|
|
23
|
-
from __future__ import annotations
|
|
24
|
-
|
|
25
|
-
__all__ = ["guess_chunks"]
|
|
26
|
-
|
|
27
|
-
from typing import TYPE_CHECKING
|
|
28
|
-
|
|
29
|
-
if TYPE_CHECKING:
|
|
30
|
-
from numpy.typing import ArrayLike
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def guess_chunks(dims: ArrayLike[int], dtype_sz: int) -> str | tuple[int]:
|
|
34
|
-
"""Perform an educated guess for the dataset chunk sizes.
|
|
35
|
-
|
|
36
|
-
Parameters
|
|
37
|
-
----------
|
|
38
|
-
dims : ArrayLike[int]
|
|
39
|
-
Dimensions of the variable
|
|
40
|
-
dtype_sz : int
|
|
41
|
-
The element size of the data-type of the variable
|
|
42
|
-
|
|
43
|
-
Returns
|
|
44
|
-
-------
|
|
45
|
-
"contiguous" or tuple with chunk-sizes
|
|
46
|
-
|
|
47
|
-
"""
|
|
48
|
-
fixed_size = dtype_sz
|
|
49
|
-
if len(dims) > 1:
|
|
50
|
-
for val in [x for x in dims[1:] if x > 0]:
|
|
51
|
-
fixed_size *= val
|
|
52
|
-
|
|
53
|
-
# first variables without an unlimited dimension
|
|
54
|
-
if 0 not in dims:
|
|
55
|
-
if fixed_size < 400000:
|
|
56
|
-
return "contiguous"
|
|
57
|
-
|
|
58
|
-
res = list(dims)
|
|
59
|
-
res[0] = max(1, 2048000 // fixed_size)
|
|
60
|
-
return tuple(res)
|
|
61
|
-
|
|
62
|
-
# then variables with an unlimited dimension
|
|
63
|
-
if len(dims) == 1:
|
|
64
|
-
return (1024,)
|
|
65
|
-
|
|
66
|
-
udim = dims.index(0)
|
|
67
|
-
res = list(dims)
|
|
68
|
-
if fixed_size < 400000:
|
|
69
|
-
res[udim] = 1024
|
|
70
|
-
else:
|
|
71
|
-
res[udim] = max(1, 2048000 // fixed_size)
|
|
72
|
-
|
|
73
|
-
return tuple(res)
|
h5yaml-0.2.0.dist-info/RECORD
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
h5yaml/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
|
|
2
|
-
h5yaml/conf_from_yaml.py,sha256=-2ar_gUmc5qvD1KlcctnpPY8G5c4TTXOF2tfrgcT9m4,1560
|
|
3
|
-
h5yaml/yaml_h5.py,sha256=jVZAL5Cu6dqgdn25cgVBP3g7yx2wdE-cmCbkaOmQKZ4,10153
|
|
4
|
-
h5yaml/yaml_nc.py,sha256=M6g4ZTPMlPmGZjyn3mLFnoQpmynGwgA0HtlUXSGNvvw,10963
|
|
5
|
-
h5yaml/Data/h5_compound.yaml,sha256=z3dMCJDRAw14boRp0zT74bz_oFi21yu8coUoKOW-d2Q,1131
|
|
6
|
-
h5yaml/Data/h5_testing.yaml,sha256=NhXeXjdblh3bv1hPjCl5DvIhEXo3EpD4mlgaZDElsJc,5626
|
|
7
|
-
h5yaml/Data/h5_unsupported.yaml,sha256=v4HYhiTikFt6UoEUJBnmSse_WeHbmBgqF2e1bCJEfLw,1502
|
|
8
|
-
h5yaml/Data/nc_testing.yaml,sha256=zuXcYrcuCankndt5e4nRPj2-xed97IA9yvfpn89XQgw,5451
|
|
9
|
-
h5yaml/lib/__init__.py,sha256=NdNciPgYnbq-aVM6QqNGNZtdL72rTGLAMrDy0Yw7ckk,751
|
|
10
|
-
h5yaml/lib/adjust_attr.py,sha256=4dHEGwwIa3a3hihyuSX8jCsC08fYcz_9XWA1pBwiwfc,2284
|
|
11
|
-
h5yaml/lib/chunksizes.py,sha256=R1kdaKWF0Hol_maZ0tPDoUyWIH5RatP7d2J1yBA8bkk,1949
|
|
12
|
-
h5yaml-0.2.0.dist-info/METADATA,sha256=IZxpXl9fI3Z7pa1DoRumjxPZXn8N_ykkJEdaUNnFJlw,7280
|
|
13
|
-
h5yaml-0.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
14
|
-
h5yaml-0.2.0.dist-info/licenses/LICENSE,sha256=rLarIZOYK5jHuUjMnFbgdI_Tb_4_HAAKSOOIhwiWlE4,11356
|
|
15
|
-
h5yaml-0.2.0.dist-info/RECORD,,
|
|
File without changes
|