pyTEMlib 0.2020.11.1__py3-none-any.whl → 0.2024.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyTEMlib might be problematic. Click here for more details.
- pyTEMlib/__init__.py +11 -11
- pyTEMlib/animation.py +631 -0
- pyTEMlib/atom_tools.py +240 -245
- pyTEMlib/config_dir.py +57 -33
- pyTEMlib/core_loss_widget.py +658 -0
- pyTEMlib/crystal_tools.py +1255 -0
- pyTEMlib/diffraction_plot.py +756 -0
- pyTEMlib/dynamic_scattering.py +293 -0
- pyTEMlib/eds_tools.py +609 -0
- pyTEMlib/eels_dialog.py +749 -491
- pyTEMlib/{interactive_eels.py → eels_dialog_utilities.py} +1199 -1177
- pyTEMlib/eels_tools.py +2031 -1698
- pyTEMlib/file_tools.py +1276 -560
- pyTEMlib/file_tools_qt.py +193 -0
- pyTEMlib/graph_tools.py +1166 -450
- pyTEMlib/graph_viz.py +449 -0
- pyTEMlib/image_dialog.py +158 -0
- pyTEMlib/image_dlg.py +146 -232
- pyTEMlib/image_tools.py +1399 -1028
- pyTEMlib/info_widget.py +933 -0
- pyTEMlib/interactive_image.py +1 -226
- pyTEMlib/kinematic_scattering.py +1196 -0
- pyTEMlib/low_loss_widget.py +176 -0
- pyTEMlib/microscope.py +61 -81
- pyTEMlib/peak_dialog.py +1047 -410
- pyTEMlib/peak_dlg.py +286 -242
- pyTEMlib/probe_tools.py +653 -207
- pyTEMlib/sidpy_tools.py +153 -136
- pyTEMlib/simulation_tools.py +104 -87
- pyTEMlib/version.py +6 -3
- pyTEMlib/xrpa_x_sections.py +20972 -0
- {pyTEMlib-0.2020.11.1.dist-info → pyTEMlib-0.2024.9.0.dist-info}/LICENSE +21 -21
- pyTEMlib-0.2024.9.0.dist-info/METADATA +92 -0
- pyTEMlib-0.2024.9.0.dist-info/RECORD +37 -0
- {pyTEMlib-0.2020.11.1.dist-info → pyTEMlib-0.2024.9.0.dist-info}/WHEEL +5 -5
- {pyTEMlib-0.2020.11.1.dist-info → pyTEMlib-0.2024.9.0.dist-info}/entry_points.txt +0 -1
- pyTEMlib/KinsCat.py +0 -2758
- pyTEMlib/__version__.py +0 -2
- pyTEMlib/data/TEMlibrc +0 -68
- pyTEMlib/data/edges_db.csv +0 -189
- pyTEMlib/data/edges_db.pkl +0 -0
- pyTEMlib/data/fparam.txt +0 -103
- pyTEMlib/data/microscopes.csv +0 -7
- pyTEMlib/data/microscopes.xml +0 -167
- pyTEMlib/data/path.txt +0 -1
- pyTEMlib/defaults_parser.py +0 -90
- pyTEMlib/dm3_reader.py +0 -613
- pyTEMlib/edges_db.py +0 -76
- pyTEMlib/eels_dlg.py +0 -224
- pyTEMlib/hdf_utils.py +0 -483
- pyTEMlib/image_tools1.py +0 -2194
- pyTEMlib/info_dialog.py +0 -237
- pyTEMlib/info_dlg.py +0 -202
- pyTEMlib/nion_reader.py +0 -297
- pyTEMlib/nsi_reader.py +0 -170
- pyTEMlib/structure_tools.py +0 -316
- pyTEMlib/test.py +0 -2072
- pyTEMlib-0.2020.11.1.dist-info/METADATA +0 -20
- pyTEMlib-0.2020.11.1.dist-info/RECORD +0 -45
- {pyTEMlib-0.2020.11.1.dist-info → pyTEMlib-0.2024.9.0.dist-info}/top_level.txt +0 -0
pyTEMlib/hdf_utils.py
DELETED
|
@@ -1,483 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
"""
|
|
3
|
-
Lower-level and simpler NSID-specific HDF5 utilities that facilitate
|
|
4
|
-
higher-level data operations
|
|
5
|
-
|
|
6
|
-
goes back to sidpy
|
|
7
|
-
|
|
8
|
-
Created on Tue Aug 3 21:14:25 2020
|
|
9
|
-
|
|
10
|
-
@author: Gerd Duscher, and Suhas Somnath
|
|
11
|
-
"""
|
|
12
|
-
from __future__ import division, print_function, absolute_import, unicode_literals
|
|
13
|
-
import sys
|
|
14
|
-
import h5py
|
|
15
|
-
import numpy as np
|
|
16
|
-
import dask.array as da
|
|
17
|
-
|
|
18
|
-
import sys
|
|
19
|
-
|
|
20
|
-
import sidpy
|
|
21
|
-
from sidpy.base.num_utils import contains_integers
|
|
22
|
-
from sidpy.hdf.hdf_utils import get_attr, copy_dataset
|
|
23
|
-
from sidpy.hdf import hdf_utils as hut
|
|
24
|
-
from sidpy import Dimension
|
|
25
|
-
|
|
26
|
-
if sys.version_info.major == 3:
|
|
27
|
-
unicode = str
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def get_all_main(parent, verbose=False):
|
|
31
|
-
"""
|
|
32
|
-
Simple function to recursively print the contents of an hdf5 group
|
|
33
|
-
Parameters
|
|
34
|
-
----------
|
|
35
|
-
parent : :class:`h5py.Group`
|
|
36
|
-
HDF5 Group to search within
|
|
37
|
-
verbose : bool, optional. Default = False
|
|
38
|
-
If true, extra print statements (usually for debugging) are enabled
|
|
39
|
-
Returns
|
|
40
|
-
-------
|
|
41
|
-
main_list : list of h5py.Dataset
|
|
42
|
-
The datasets found in the file that meet the 'Main Data' criteria.
|
|
43
|
-
"""
|
|
44
|
-
if not isinstance(parent, (h5py.Group, h5py.File)):
|
|
45
|
-
raise TypeError('parent should be a h5py.File or h5py.Group object')
|
|
46
|
-
|
|
47
|
-
main_list = list()
|
|
48
|
-
|
|
49
|
-
def __check(name, obj):
|
|
50
|
-
if verbose:
|
|
51
|
-
print(name, obj)
|
|
52
|
-
if isinstance(obj, h5py.Dataset):
|
|
53
|
-
if verbose:
|
|
54
|
-
print(name, 'is an HDF5 Dataset.')
|
|
55
|
-
ismain = check_if_main(obj)
|
|
56
|
-
if ismain:
|
|
57
|
-
if verbose:
|
|
58
|
-
print(name, 'is a `Main` dataset.')
|
|
59
|
-
main_list.append(obj)
|
|
60
|
-
|
|
61
|
-
if verbose:
|
|
62
|
-
print('Checking the group {} for `Main` datasets.'.format(parent.name))
|
|
63
|
-
parent.visititems(__check)
|
|
64
|
-
|
|
65
|
-
return main_list
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def find_dataset(h5_group, dset_name):
|
|
69
|
-
"""
|
|
70
|
-
Uses visit() to find all datasets with the desired name
|
|
71
|
-
Parameters
|
|
72
|
-
----------
|
|
73
|
-
h5_group : :class:`h5py.Group`
|
|
74
|
-
Group to search within for the Dataset
|
|
75
|
-
dset_name : str
|
|
76
|
-
Name of the dataset to search for
|
|
77
|
-
Returns
|
|
78
|
-
-------
|
|
79
|
-
datasets : list
|
|
80
|
-
List of [Name, object] pairs corresponding to datasets that match `ds_name`.
|
|
81
|
-
"""
|
|
82
|
-
|
|
83
|
-
# print 'Finding all instances of', ds_name
|
|
84
|
-
datasets = []
|
|
85
|
-
|
|
86
|
-
for obj in hut.find_dataset(h5_group, dset_name):
|
|
87
|
-
datasets.append(obj)
|
|
88
|
-
|
|
89
|
-
return datasets
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def validate_main_dset(h5_main, must_be_h5):
|
|
93
|
-
"""
|
|
94
|
-
Checks to make sure that the provided object is a NSID main dataset
|
|
95
|
-
Errors in parameters will result in Exceptions
|
|
96
|
-
Parameters
|
|
97
|
-
----------
|
|
98
|
-
h5_main : h5py.Dataset or numpy.ndarray or Dask.array.core.array
|
|
99
|
-
object that represents the NSID main data
|
|
100
|
-
must_be_h5 : bool
|
|
101
|
-
Set to True if the expecting an h5py.Dataset object.
|
|
102
|
-
Set to False if expecting a numpy.ndarray or Dask.array.core.array
|
|
103
|
-
Returns
|
|
104
|
-
-------
|
|
105
|
-
"""
|
|
106
|
-
# Check that h5_main is a dataset
|
|
107
|
-
if must_be_h5:
|
|
108
|
-
if not isinstance(h5_main, h5py.Dataset):
|
|
109
|
-
raise TypeError('{} is not an HDF5 Dataset object.'.format(h5_main))
|
|
110
|
-
else:
|
|
111
|
-
if not isinstance(h5_main, (np.ndarray, da.core.Array)):
|
|
112
|
-
raise TypeError('raw_data should either be a np.ndarray or a '
|
|
113
|
-
'da.core.Array')
|
|
114
|
-
|
|
115
|
-
# Check dimensionality
|
|
116
|
-
if len(h5_main.shape) != len(h5_main.dims):
|
|
117
|
-
raise ValueError('Main data does not have full set of dimensional '
|
|
118
|
-
'scales. Provided object has shape: {} but only {} '
|
|
119
|
-
'dimensional scales'
|
|
120
|
-
''.format(h5_main.shape, len(h5_main.dims)))
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def validate_anc_h5_dsets(h5_inds, h5_vals, main_shape, is_spectroscopic=True):
|
|
124
|
-
"""
|
|
125
|
-
Checks ancillary HDF5 datasets against shape of a main dataset.
|
|
126
|
-
Errors in parameters will result in Exceptions
|
|
127
|
-
Parameters
|
|
128
|
-
----------
|
|
129
|
-
h5_inds : h5py.Dataset
|
|
130
|
-
HDF5 dataset corresponding to the ancillary Indices dataset
|
|
131
|
-
h5_vals : h5py.Dataset
|
|
132
|
-
HDF5 dataset corresponding to the ancillary Values dataset
|
|
133
|
-
main_shape : array-like
|
|
134
|
-
Shape of the main dataset expressed as a tuple or similar
|
|
135
|
-
is_spectroscopic : bool, Optional. Default = True
|
|
136
|
-
set to True if ``dims`` correspond to Spectroscopic Dimensions.
|
|
137
|
-
False otherwise.
|
|
138
|
-
"""
|
|
139
|
-
if not isinstance(h5_inds, h5py.Dataset):
|
|
140
|
-
raise TypeError('h5_inds must be a h5py.Dataset object')
|
|
141
|
-
if not isinstance(h5_vals, h5py.Dataset):
|
|
142
|
-
raise TypeError('h5_vals must be a h5py.Dataset object')
|
|
143
|
-
if h5_inds.shape != h5_vals.shape:
|
|
144
|
-
raise ValueError('h5_inds: {} and h5_vals: {} should be of the same '
|
|
145
|
-
'shape'.format(h5_inds.shape, h5_vals.shape))
|
|
146
|
-
if isinstance(main_shape, (list, tuple)):
|
|
147
|
-
if not contains_integers(main_shape, min_val=1) or \
|
|
148
|
-
len(main_shape) != 2:
|
|
149
|
-
raise ValueError("'main_shape' must be a valid HDF5 dataset shape")
|
|
150
|
-
else:
|
|
151
|
-
raise TypeError('main_shape should be of the following types:'
|
|
152
|
-
'h5py.Dataset, tuple, or list. {} provided'
|
|
153
|
-
''.format(type(main_shape)))
|
|
154
|
-
|
|
155
|
-
if h5_inds.shape[is_spectroscopic] != main_shape[is_spectroscopic]:
|
|
156
|
-
raise ValueError('index {} in shape of h5_inds: {} and main_data: {} '
|
|
157
|
-
'should be equal'.format(int(is_spectroscopic),
|
|
158
|
-
h5_inds.shape, main_shape))
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
def validate_dims_against_main(main_shape, dims, is_spectroscopic=True):
|
|
162
|
-
"""
|
|
163
|
-
Checks Dimension objects against a given shape for main datasets.
|
|
164
|
-
Errors in parameters will result in Exceptions
|
|
165
|
-
Parameters
|
|
166
|
-
----------
|
|
167
|
-
main_shape : array-like
|
|
168
|
-
Tuple or list with the shape of the main data
|
|
169
|
-
dims : iterable
|
|
170
|
-
List of Dimension objects
|
|
171
|
-
is_spectroscopic : bool, Optional. Default = True
|
|
172
|
-
set to True if ``dims`` correspond to Spectroscopic Dimensions.
|
|
173
|
-
False otherwise.
|
|
174
|
-
"""
|
|
175
|
-
if not isinstance(main_shape, (list, tuple)):
|
|
176
|
-
raise TypeError('main_shape should be a list or tuple. Provided object'
|
|
177
|
-
' was of type: {}'.format(type(main_shape)))
|
|
178
|
-
if len(main_shape) != 2:
|
|
179
|
-
raise ValueError('"main_shape" should be of length 2')
|
|
180
|
-
contains_integers(main_shape, min_val=1)
|
|
181
|
-
|
|
182
|
-
if isinstance(dims, Dimension):
|
|
183
|
-
dims = [dims]
|
|
184
|
-
elif not isinstance(dims, (list, tuple)):
|
|
185
|
-
raise TypeError('"dims" must be a list or tuple of nsid.Dimension '
|
|
186
|
-
'objects. Provided object was of type: {}'
|
|
187
|
-
''.format(type(dims)))
|
|
188
|
-
if not all([isinstance(obj, Dimension) for obj in dims]):
|
|
189
|
-
raise TypeError('One or more objects in "dims" was not nsid.Dimension')
|
|
190
|
-
|
|
191
|
-
if is_spectroscopic:
|
|
192
|
-
main_dim = 1
|
|
193
|
-
dim_category = 'Spectroscopic'
|
|
194
|
-
else:
|
|
195
|
-
main_dim = 0
|
|
196
|
-
dim_category = 'Position'
|
|
197
|
-
|
|
198
|
-
# TODO: This is where the dimension type will need to be taken into account
|
|
199
|
-
lhs = main_shape[main_dim]
|
|
200
|
-
rhs = np.product([len(x.values) for x in dims])
|
|
201
|
-
if lhs != rhs:
|
|
202
|
-
raise ValueError(dim_category +
|
|
203
|
-
' dimensions in main data of size: {} do not match '
|
|
204
|
-
'with product of values in provided Dimension objects'
|
|
205
|
-
': {}'.format(lhs, rhs))
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
def check_if_main(h5_main, verbose=False):
|
|
209
|
-
"""
|
|
210
|
-
Checks the input dataset to see if it has all the necessary
|
|
211
|
-
features to be considered a Main dataset. This means it is
|
|
212
|
-
dataset has dimensions of correct size and has the following attributes:
|
|
213
|
-
* quantity
|
|
214
|
-
* units
|
|
215
|
-
* main_data_name
|
|
216
|
-
* data_type
|
|
217
|
-
* modality
|
|
218
|
-
* source
|
|
219
|
-
In addition, the shapes of the ancillary matrices should match with that of
|
|
220
|
-
h5_main
|
|
221
|
-
Parameters
|
|
222
|
-
----------
|
|
223
|
-
h5_main : HDF5 Dataset
|
|
224
|
-
Dataset of interest
|
|
225
|
-
verbose : Boolean (Optional. Default = False)
|
|
226
|
-
Whether or not to print statements
|
|
227
|
-
Returns
|
|
228
|
-
-------
|
|
229
|
-
success : Boolean
|
|
230
|
-
True if all tests pass
|
|
231
|
-
"""
|
|
232
|
-
try:
|
|
233
|
-
validate_main_dset(h5_main, True)
|
|
234
|
-
except Exception as exep:
|
|
235
|
-
if verbose:
|
|
236
|
-
print(exep)
|
|
237
|
-
return False
|
|
238
|
-
|
|
239
|
-
# h5_name = h5_main.name.split('/')[-1]
|
|
240
|
-
h5_group = h5_main.parent
|
|
241
|
-
|
|
242
|
-
# success = True
|
|
243
|
-
|
|
244
|
-
# Check for Datasets
|
|
245
|
-
|
|
246
|
-
attrs_names = ['dimension_type', 'name', 'nsid_version', 'quantity', 'units', ]
|
|
247
|
-
attr_success = []
|
|
248
|
-
# Check for all required attributes in dataset
|
|
249
|
-
main_attrs_names = ['quantity', 'units', 'main_data_name', 'data_type', 'modality', 'source']
|
|
250
|
-
main_attr_success = np.all([att in h5_main.attrs for att in main_attrs_names])
|
|
251
|
-
if verbose:
|
|
252
|
-
print('All Attributes in dataset: ', main_attr_success)
|
|
253
|
-
if not main_attr_success:
|
|
254
|
-
if verbose:
|
|
255
|
-
print('{} does not have the mandatory attributes'.format(h5_main.name))
|
|
256
|
-
return False
|
|
257
|
-
|
|
258
|
-
for attr_name in main_attrs_names:
|
|
259
|
-
val = get_attr(h5_main, attr_name)
|
|
260
|
-
if not isinstance(val, (str, unicode)):
|
|
261
|
-
if verbose:
|
|
262
|
-
print('Attribute {} of {} found to be {}. Expected a string'.format(attr_name, h5_main.name, val))
|
|
263
|
-
return False
|
|
264
|
-
|
|
265
|
-
length_success = []
|
|
266
|
-
dset_success = []
|
|
267
|
-
# Check for Validity of Dimensional Scales
|
|
268
|
-
for i, dimension in enumerate(h5_main.dims):
|
|
269
|
-
# check for all required attributes
|
|
270
|
-
h5_dim_dset = h5_group[dimension.label]
|
|
271
|
-
attr_success.append(np.all([att in h5_dim_dset.attrs for att in attrs_names]))
|
|
272
|
-
dset_success.append(np.all([attr_success, isinstance(h5_dim_dset, h5py.Dataset)]))
|
|
273
|
-
# dimensional scale has to be 1D
|
|
274
|
-
if len(h5_dim_dset.shape) == 1:
|
|
275
|
-
# and of the same length as the shape of the dataset
|
|
276
|
-
length_success.append(h5_main.shape[i] == h5_dim_dset.shape[0])
|
|
277
|
-
else:
|
|
278
|
-
length_success.append(False)
|
|
279
|
-
# We have the list now and can get error messages according to which dataset is bad or not.
|
|
280
|
-
if np.all([np.all(attr_success), np.all(length_success), np.all(dset_success)]):
|
|
281
|
-
if verbose:
|
|
282
|
-
print('Dimensions: All Attributes: ', np.all(attr_success))
|
|
283
|
-
print('Dimensions: All Correct Length: ', np.all(length_success))
|
|
284
|
-
print('Dimensions: All h5 Datasets: ', np.all(dset_success))
|
|
285
|
-
else:
|
|
286
|
-
print('length of dimension scale {length_success.index(False)} is wrong')
|
|
287
|
-
print('attributes in dimension scale {attr_success.index(False)} are wrong')
|
|
288
|
-
print('dimension scale {dset_success.index(False)} is not a dataset')
|
|
289
|
-
return False
|
|
290
|
-
|
|
291
|
-
return main_attr_success
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
def link_as_main(h5_main, dim_dict):
|
|
295
|
-
"""
|
|
296
|
-
Attaches datasets as h5 Dimensional Scales to `h5_main`
|
|
297
|
-
Parameters
|
|
298
|
-
----------
|
|
299
|
-
h5_main : h5py.Dataset
|
|
300
|
-
N-dimensional Dataset which will have the references added as h5 Dimensional Scales
|
|
301
|
-
dim_dict: dictionary with dimensional order as key and items are datasets to be used as h5 Dimensional Scales
|
|
302
|
-
|
|
303
|
-
Returns
|
|
304
|
-
-------
|
|
305
|
-
pyNSID.NSIDataset
|
|
306
|
-
NSIDataset version of h5_main now that it is a NSID Main dataset
|
|
307
|
-
"""
|
|
308
|
-
if not isinstance(h5_main, h5py.Dataset):
|
|
309
|
-
raise TypeError('h5_main should be a h5py.Dataset object')
|
|
310
|
-
|
|
311
|
-
h5_parent_group = h5_main.parent
|
|
312
|
-
main_shape = h5_main.shape
|
|
313
|
-
######################
|
|
314
|
-
# Validate Dimensions
|
|
315
|
-
######################
|
|
316
|
-
# An N dimensional dataset should have N items in the dimension dictionary
|
|
317
|
-
if len(dim_dict) != len(main_shape):
|
|
318
|
-
raise ValueError('Incorrect number of dimensions: {} provided to support main data, of shape: {}'
|
|
319
|
-
.format(len(dim_dict), main_shape))
|
|
320
|
-
if set(range(len(main_shape))) != set(dim_dict.keys()):
|
|
321
|
-
raise KeyError('')
|
|
322
|
-
|
|
323
|
-
dim_names = []
|
|
324
|
-
for index, dim_exp_size in enumerate(main_shape):
|
|
325
|
-
this_dim = dim_dict[index]
|
|
326
|
-
if isinstance(this_dim, h5py.Dataset):
|
|
327
|
-
error_message = validate_dimensions(this_dim, main_shape[index])
|
|
328
|
-
if len(error_message) > 0:
|
|
329
|
-
raise TypeError('Dimension {} has the following error_message:\n'.format(index), error_message)
|
|
330
|
-
else:
|
|
331
|
-
# if this_dim.name not in dim_names:
|
|
332
|
-
if this_dim.name not in dim_names: # names must be unique
|
|
333
|
-
dim_names.append(this_dim.name)
|
|
334
|
-
else:
|
|
335
|
-
raise TypeError('All dimension names must be unique, found {} twice'.format(this_dim.name))
|
|
336
|
-
if this_dim.file != h5_parent_group.file:
|
|
337
|
-
copy_dataset(this_dim, h5_parent_group, verbose=False)
|
|
338
|
-
else:
|
|
339
|
-
raise TypeError('Items in dictionary must all be h5py.Datasets !')
|
|
340
|
-
|
|
341
|
-
################
|
|
342
|
-
# Attach Scales
|
|
343
|
-
################
|
|
344
|
-
for i, this_dim_dset in dim_dict.items():
|
|
345
|
-
this_dim_dset.make_scale(this_dim_dset.attrs['name'])
|
|
346
|
-
h5_main.dims[int(i)].label = this_dim_dset.attrs['name']
|
|
347
|
-
h5_main.dims[int(i)].attach_scale(this_dim_dset)
|
|
348
|
-
|
|
349
|
-
return h5_main
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
def get_source_dataset(h5_group):
|
|
353
|
-
"""
|
|
354
|
-
Find the name of the source dataset used to create the input `h5_group`,
|
|
355
|
-
so long as the source dataset is in the same HDF5 file
|
|
356
|
-
Parameters
|
|
357
|
-
----------
|
|
358
|
-
h5_group : :class:`h5py.Group`
|
|
359
|
-
Child group whose source dataset will be returned
|
|
360
|
-
Returns
|
|
361
|
-
-------
|
|
362
|
-
h5_source : NSIDataset object
|
|
363
|
-
Main dataset from which this group was generated
|
|
364
|
-
"""
|
|
365
|
-
if not isinstance(h5_group, h5py.Group):
|
|
366
|
-
raise TypeError('h5_group should be a h5py.Group object')
|
|
367
|
-
|
|
368
|
-
h5_parent_group = h5_group.parent
|
|
369
|
-
group_name = h5_group.name.split('/')[-1]
|
|
370
|
-
# What if the group name was not formatted according to Pycroscopy rules?
|
|
371
|
-
name_split = group_name.split('-')
|
|
372
|
-
if len(name_split) != 2:
|
|
373
|
-
raise ValueError("The provided group's name could not be split by '-' as expected in "
|
|
374
|
-
"SourceDataset-ProcessName_000")
|
|
375
|
-
h5_source = h5_parent_group[name_split[0]]
|
|
376
|
-
|
|
377
|
-
if not isinstance(h5_source, h5py.Dataset):
|
|
378
|
-
raise ValueError('Source object was not a dataset!')
|
|
379
|
-
|
|
380
|
-
return h5_source
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
def validate_dimensions(this_dim, dim_shape):
|
|
384
|
-
"""
|
|
385
|
-
Checks if the provided object is an h5 dataset.
|
|
386
|
-
A valid dataset to be uses as dimension must be 1D not a compound data type but 'simple'.
|
|
387
|
-
Such a dataset must have ancillary attributes 'name', quantity', 'units', and 'dimension_type',
|
|
388
|
-
which have to be of types str, str, str, and bool respectively and not empty
|
|
389
|
-
If it is not valid of dataset, Exceptions are raised.
|
|
390
|
-
|
|
391
|
-
Parameters
|
|
392
|
-
----------
|
|
393
|
-
this_dim : h5 dataset
|
|
394
|
-
with non empty attributes 'name', quantity', 'units', and 'dimension_type'
|
|
395
|
-
dim_shape : required length of dataset
|
|
396
|
-
|
|
397
|
-
Returns
|
|
398
|
-
-------
|
|
399
|
-
error_message: string, empty if ok.
|
|
400
|
-
"""
|
|
401
|
-
|
|
402
|
-
if not isinstance(this_dim, h5py.Dataset):
|
|
403
|
-
error_message = 'this Dimension must be a h5 Dataset'
|
|
404
|
-
return error_message
|
|
405
|
-
|
|
406
|
-
error_message = ''
|
|
407
|
-
# Is it 1D?
|
|
408
|
-
if len(this_dim.shape) != 1:
|
|
409
|
-
error_message += ' High dimensional datasets are not allowed as dimensions;\n'
|
|
410
|
-
# Does this dataset have a "simple" dtype - no compound data type allowed!
|
|
411
|
-
# is the shape matching with the main dataset?
|
|
412
|
-
if len(this_dim) != dim_shape:
|
|
413
|
-
error_message += ' Dimension has wrong length;\n'
|
|
414
|
-
# Does it contain some ancillary attributes like 'name', quantity', 'units', and 'dimension_type'
|
|
415
|
-
necessary_attributes = ['name', 'quantity', 'units', 'dimension_type']
|
|
416
|
-
for key in necessary_attributes:
|
|
417
|
-
if key not in this_dim.attrs:
|
|
418
|
-
error_message += 'Missing {} attribute in dimension;\n '.format(key)
|
|
419
|
-
elif not isinstance(this_dim.attrs[key], str):
|
|
420
|
-
error_message += '{} attribute in dimension should be string;\n '.format(key)
|
|
421
|
-
|
|
422
|
-
return error_message
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
def validate_main_dimensions(main_shape, dim_dict, h5_parent_group):
|
|
426
|
-
# Each item could either be a Dimension object or a HDF5 dataset
|
|
427
|
-
# Collect the file within which these ancillary HDF5 objects are present if they are provided
|
|
428
|
-
which_h5_file = {}
|
|
429
|
-
# Also collect the names of the dimensions. We want them to be unique
|
|
430
|
-
dim_names = []
|
|
431
|
-
|
|
432
|
-
dimensions_correct = []
|
|
433
|
-
for index, dim_exp_size in enumerate(main_shape):
|
|
434
|
-
this_dim = dim_dict[index]
|
|
435
|
-
if isinstance(this_dim, h5py.Dataset):
|
|
436
|
-
error_message = validate_dimensions(this_dim, main_shape[index])
|
|
437
|
-
|
|
438
|
-
# All these checks should live in a helper function for cleanliness
|
|
439
|
-
|
|
440
|
-
if len(error_message) > 0:
|
|
441
|
-
print('Dimension {} has the following error_message:\n'.format(index), error_message)
|
|
442
|
-
|
|
443
|
-
else:
|
|
444
|
-
if this_dim.name not in dim_names: # names must be unique
|
|
445
|
-
dim_names.append(this_dim.name)
|
|
446
|
-
else:
|
|
447
|
-
raise TypeError('All dimension names must be unique, found'
|
|
448
|
-
' {} twice'.format(this_dim.name))
|
|
449
|
-
|
|
450
|
-
# are all datasets in the same file?
|
|
451
|
-
if this_dim.file != h5_parent_group.file:
|
|
452
|
-
copy_dataset(this_dim, h5_parent_group, verbose=True)
|
|
453
|
-
|
|
454
|
-
elif isinstance(this_dim, Dimension):
|
|
455
|
-
# is the shape matching with the main dataset?
|
|
456
|
-
dimensions_correct.append(len(this_dim.values) == dim_exp_size)
|
|
457
|
-
# Is there a HDF5 dataset with the same name already in the provided group
|
|
458
|
-
# where this dataset will be created?
|
|
459
|
-
if this_dim.name in h5_parent_group:
|
|
460
|
-
# check if this object with the same name is a dataset and if it satisfies the above tests
|
|
461
|
-
if isinstance(h5_parent_group[this_dim.name], h5py.Dataset):
|
|
462
|
-
print('needs more checking')
|
|
463
|
-
dimensions_correct[-1] = False
|
|
464
|
-
else:
|
|
465
|
-
dimensions_correct[-1] = True
|
|
466
|
-
# Otherwise, just append the dimension name for the uniqueness test
|
|
467
|
-
elif this_dim.name not in dim_names:
|
|
468
|
-
dim_names.append(this_dim.name)
|
|
469
|
-
else:
|
|
470
|
-
dimensions_correct[-1] = False
|
|
471
|
-
else:
|
|
472
|
-
raise TypeError('Values of dim_dict should either be h5py.Dataset '
|
|
473
|
-
'objects or Dimension. Object at index: {} was of '
|
|
474
|
-
'type: {}'.format(index, index))
|
|
475
|
-
|
|
476
|
-
for dim in which_h5_file:
|
|
477
|
-
if which_h5_file[dim] != h5_parent_group.file.filename:
|
|
478
|
-
print('need to copy dimension', dim)
|
|
479
|
-
for i, dim_name in enumerate(dim_names[:-1]):
|
|
480
|
-
if dim_name in dim_names[i + 1:]:
|
|
481
|
-
print(dim_name, ' is not unique')
|
|
482
|
-
|
|
483
|
-
return dimensions_correct
|