pytme 0.2.9__cp311-cp311-macosx_15_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytme-0.2.9.data/scripts/estimate_ram_usage.py +97 -0
- pytme-0.2.9.data/scripts/match_template.py +1135 -0
- pytme-0.2.9.data/scripts/postprocess.py +622 -0
- pytme-0.2.9.data/scripts/preprocess.py +209 -0
- pytme-0.2.9.data/scripts/preprocessor_gui.py +1227 -0
- pytme-0.2.9.dist-info/METADATA +95 -0
- pytme-0.2.9.dist-info/RECORD +119 -0
- pytme-0.2.9.dist-info/WHEEL +5 -0
- pytme-0.2.9.dist-info/entry_points.txt +6 -0
- pytme-0.2.9.dist-info/licenses/LICENSE +153 -0
- pytme-0.2.9.dist-info/top_level.txt +3 -0
- scripts/__init__.py +0 -0
- scripts/estimate_ram_usage.py +97 -0
- scripts/match_template.py +1135 -0
- scripts/postprocess.py +622 -0
- scripts/preprocess.py +209 -0
- scripts/preprocessor_gui.py +1227 -0
- tests/__init__.py +0 -0
- tests/data/Blurring/blob_width18.npy +0 -0
- tests/data/Blurring/edgegaussian_sigma3.npy +0 -0
- tests/data/Blurring/gaussian_sigma2.npy +0 -0
- tests/data/Blurring/hamming_width6.npy +0 -0
- tests/data/Blurring/kaiserb_width18.npy +0 -0
- tests/data/Blurring/localgaussian_sigma0510.npy +0 -0
- tests/data/Blurring/mean_size5.npy +0 -0
- tests/data/Blurring/ntree_sigma0510.npy +0 -0
- tests/data/Blurring/rank_rank3.npy +0 -0
- tests/data/Maps/.DS_Store +0 -0
- tests/data/Maps/emd_8621.mrc.gz +0 -0
- tests/data/README.md +2 -0
- tests/data/Raw/em_map.map +0 -0
- tests/data/Structures/.DS_Store +0 -0
- tests/data/Structures/1pdj.cif +3339 -0
- tests/data/Structures/1pdj.pdb +1429 -0
- tests/data/Structures/5khe.cif +3685 -0
- tests/data/Structures/5khe.ent +2210 -0
- tests/data/Structures/5khe.pdb +2210 -0
- tests/data/Structures/5uz4.cif +70548 -0
- tests/preprocessing/__init__.py +0 -0
- tests/preprocessing/test_compose.py +76 -0
- tests/preprocessing/test_frequency_filters.py +178 -0
- tests/preprocessing/test_preprocessor.py +136 -0
- tests/preprocessing/test_utils.py +79 -0
- tests/test_analyzer.py +216 -0
- tests/test_backends.py +446 -0
- tests/test_density.py +503 -0
- tests/test_extensions.py +130 -0
- tests/test_matching_cli.py +283 -0
- tests/test_matching_data.py +162 -0
- tests/test_matching_exhaustive.py +124 -0
- tests/test_matching_memory.py +30 -0
- tests/test_matching_optimization.py +226 -0
- tests/test_matching_utils.py +189 -0
- tests/test_orientations.py +175 -0
- tests/test_parser.py +33 -0
- tests/test_rotations.py +153 -0
- tests/test_structure.py +247 -0
- tme/__init__.py +6 -0
- tme/__version__.py +1 -0
- tme/analyzer/__init__.py +2 -0
- tme/analyzer/_utils.py +186 -0
- tme/analyzer/aggregation.py +577 -0
- tme/analyzer/peaks.py +953 -0
- tme/backends/__init__.py +171 -0
- tme/backends/_cupy_utils.py +734 -0
- tme/backends/_jax_utils.py +188 -0
- tme/backends/cupy_backend.py +294 -0
- tme/backends/jax_backend.py +314 -0
- tme/backends/matching_backend.py +1270 -0
- tme/backends/mlx_backend.py +241 -0
- tme/backends/npfftw_backend.py +583 -0
- tme/backends/pytorch_backend.py +430 -0
- tme/data/__init__.py +0 -0
- tme/data/c48n309.npy +0 -0
- tme/data/c48n527.npy +0 -0
- tme/data/c48n9.npy +0 -0
- tme/data/c48u1.npy +0 -0
- tme/data/c48u1153.npy +0 -0
- tme/data/c48u1201.npy +0 -0
- tme/data/c48u1641.npy +0 -0
- tme/data/c48u181.npy +0 -0
- tme/data/c48u2219.npy +0 -0
- tme/data/c48u27.npy +0 -0
- tme/data/c48u2947.npy +0 -0
- tme/data/c48u3733.npy +0 -0
- tme/data/c48u4749.npy +0 -0
- tme/data/c48u5879.npy +0 -0
- tme/data/c48u7111.npy +0 -0
- tme/data/c48u815.npy +0 -0
- tme/data/c48u83.npy +0 -0
- tme/data/c48u8649.npy +0 -0
- tme/data/c600v.npy +0 -0
- tme/data/c600vc.npy +0 -0
- tme/data/metadata.yaml +80 -0
- tme/data/quat_to_numpy.py +42 -0
- tme/data/scattering_factors.pickle +0 -0
- tme/density.py +2263 -0
- tme/extensions.cpython-311-darwin.so +0 -0
- tme/external/bindings.cpp +332 -0
- tme/filters/__init__.py +6 -0
- tme/filters/_utils.py +311 -0
- tme/filters/bandpass.py +230 -0
- tme/filters/compose.py +81 -0
- tme/filters/ctf.py +393 -0
- tme/filters/reconstruction.py +160 -0
- tme/filters/wedge.py +542 -0
- tme/filters/whitening.py +191 -0
- tme/matching_data.py +863 -0
- tme/matching_exhaustive.py +497 -0
- tme/matching_optimization.py +1311 -0
- tme/matching_scores.py +1183 -0
- tme/matching_utils.py +1188 -0
- tme/memory.py +337 -0
- tme/orientations.py +598 -0
- tme/parser.py +685 -0
- tme/preprocessor.py +1329 -0
- tme/rotations.py +350 -0
- tme/structure.py +1864 -0
- tme/types.py +13 -0
tme/density.py
ADDED
@@ -0,0 +1,2263 @@
|
|
1
|
+
""" Representation of N-dimensional densities
|
2
|
+
|
3
|
+
Copyright (c) 2023 European Molecular Biology Laboratory
|
4
|
+
|
5
|
+
Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
|
6
|
+
"""
|
7
|
+
|
8
|
+
import warnings
|
9
|
+
from io import BytesIO
|
10
|
+
from copy import deepcopy
|
11
|
+
from gzip import open as gzip_open
|
12
|
+
from typing import Tuple, Dict, Set
|
13
|
+
from os.path import splitext, basename
|
14
|
+
|
15
|
+
import h5py
|
16
|
+
import mrcfile
|
17
|
+
import numpy as np
|
18
|
+
import skimage.io as skio
|
19
|
+
|
20
|
+
from scipy.ndimage import (
|
21
|
+
zoom,
|
22
|
+
laplace,
|
23
|
+
sobel,
|
24
|
+
minimum_filter,
|
25
|
+
binary_erosion,
|
26
|
+
generic_gradient_magnitude,
|
27
|
+
)
|
28
|
+
from scipy.spatial import ConvexHull
|
29
|
+
|
30
|
+
from .types import NDArray
|
31
|
+
from .rotations import align_to_axis
|
32
|
+
from .backends import NumpyFFTWBackend
|
33
|
+
from .structure import Structure
|
34
|
+
from .matching_utils import (
|
35
|
+
array_to_memmap,
|
36
|
+
memmap_to_array,
|
37
|
+
minimum_enclosing_box,
|
38
|
+
)
|
39
|
+
|
40
|
+
__all__ = ["Density"]
|
41
|
+
|
42
|
+
|
43
|
+
class Density:
|
44
|
+
"""
|
45
|
+
Abstract representation of N-dimensional densities.
|
46
|
+
|
47
|
+
Parameters
|
48
|
+
----------
|
49
|
+
data : array_like
|
50
|
+
Array of densities.
|
51
|
+
origin : array_like, optional
|
52
|
+
Origin of the coordinate system, zero by default.
|
53
|
+
sampling_rate : array_like, optional
|
54
|
+
Sampling rate along data axis, one by default.
|
55
|
+
metadata : dict, optional
|
56
|
+
Dictionary with metadata information, empty by default.
|
57
|
+
|
58
|
+
Raises
|
59
|
+
------
|
60
|
+
ValueError
|
61
|
+
If metadata is not a dictionary.
|
62
|
+
|
63
|
+
If sampling rate / origin is not defined for a single or all axes.
|
64
|
+
|
65
|
+
Examples
|
66
|
+
--------
|
67
|
+
The following achieves the minimal definition of a :py:class:`Density` instance
|
68
|
+
|
69
|
+
>>> import numpy as np
|
70
|
+
>>> from tme import Density
|
71
|
+
>>> data = np.random.rand(50,70,40)
|
72
|
+
>>> Density(data=data)
|
73
|
+
|
74
|
+
Optional parameters ``origin`` correspond to the coordinate system reference,
|
75
|
+
``sampling_rate`` to the spatial length per axis element, and ``metadata`` to
|
76
|
+
a dictionary with supplementary information. By default,
|
77
|
+
:py:attr:`Density.origin` is set to zero, :py:attr:`Density.sampling_rate`
|
78
|
+
to one, and :py:attr:`Density.metadata` is an empty dictionary. If provided,
|
79
|
+
``origin`` and ``sampling_rate`` either need to be a single value
|
80
|
+
|
81
|
+
>>> Density(data=data, origin=0, sampling_rate=1)
|
82
|
+
|
83
|
+
be specified along each data axis
|
84
|
+
|
85
|
+
>>> Density(data=data, origin=(0, 0, 0), sampling_rate=(1.5, 1.1, 1.2))
|
86
|
+
|
87
|
+
or be a combination of both
|
88
|
+
|
89
|
+
>>> Density(data=data, origin=0, sampling_rate=(1.5, 1.1, 1.2))
|
90
|
+
"""
|
91
|
+
|
92
|
+
def __init__(
|
93
|
+
self,
|
94
|
+
data: NDArray,
|
95
|
+
origin: NDArray = None,
|
96
|
+
sampling_rate: NDArray = None,
|
97
|
+
metadata: Dict = {},
|
98
|
+
):
|
99
|
+
origin = np.zeros(data.ndim) if origin is None else origin
|
100
|
+
sampling_rate = 1 if sampling_rate is None else sampling_rate
|
101
|
+
origin, sampling_rate = np.asarray(origin), np.asarray(sampling_rate)
|
102
|
+
origin = np.repeat(origin, data.ndim // origin.size)
|
103
|
+
sampling_rate = np.repeat(sampling_rate, data.ndim // sampling_rate.size)
|
104
|
+
|
105
|
+
if sampling_rate.size != data.ndim:
|
106
|
+
raise ValueError(
|
107
|
+
"sampling_rate size should be 1 or "
|
108
|
+
f"{data.ndim}, not {sampling_rate.size}."
|
109
|
+
)
|
110
|
+
if origin.size != data.ndim:
|
111
|
+
raise ValueError(f"Expected origin size : {data.ndim}, got {origin.size}.")
|
112
|
+
if not isinstance(metadata, dict):
|
113
|
+
raise ValueError("Argument metadata has to be of class dict.")
|
114
|
+
|
115
|
+
self.data, self.origin, self.sampling_rate = data, origin, sampling_rate
|
116
|
+
self.metadata = metadata
|
117
|
+
|
118
|
+
def __repr__(self):
|
119
|
+
response = "Density object at {}\nOrigin: {}, Sampling Rate: {}, Shape: {}"
|
120
|
+
return response.format(
|
121
|
+
hex(id(self)),
|
122
|
+
tuple(round(float(x), 3) for x in self.origin),
|
123
|
+
tuple(round(float(x), 3) for x in self.sampling_rate),
|
124
|
+
self.shape,
|
125
|
+
)
|
126
|
+
|
127
|
+
@classmethod
|
128
|
+
def from_file(
|
129
|
+
cls, filename: str, subset: Tuple[slice] = None, use_memmap: bool = False
|
130
|
+
) -> "Density":
|
131
|
+
"""
|
132
|
+
Reads a file into a :py:class:`Density` instance.
|
133
|
+
|
134
|
+
Parameters
|
135
|
+
----------
|
136
|
+
filename : str
|
137
|
+
Path to a file in CCP4/MRC, EM, HDF5 or a format supported by
|
138
|
+
:obj:`skimage.io.imread`. The file can be gzip compressed.
|
139
|
+
subset : tuple of slices, optional
|
140
|
+
Slices representing the desired subset along each dimension.
|
141
|
+
use_memmap : bool, optional
|
142
|
+
Memory map the data contained in ``filename`` to save memory.
|
143
|
+
|
144
|
+
Returns
|
145
|
+
-------
|
146
|
+
:py:class:`Density`
|
147
|
+
Class instance representing the data in ``filename``.
|
148
|
+
|
149
|
+
References
|
150
|
+
----------
|
151
|
+
.. [1] Burnley T et al., Acta Cryst. D, 2017
|
152
|
+
.. [2] Nickell S. et al, Journal of Structural Biology, 2005.
|
153
|
+
.. [3] https://scikit-image.org/docs/stable/api/skimage.io.html
|
154
|
+
|
155
|
+
Examples
|
156
|
+
--------
|
157
|
+
:py:meth:`Density.from_file` reads files in CCP4/MRC, EM, or a format supported
|
158
|
+
by skimage.io.imread and converts them into a :py:class:`Density` instance. The
|
159
|
+
following outlines how to read a file in the CCP4/MRC format [1]_:
|
160
|
+
|
161
|
+
>>> from tme import Density
|
162
|
+
>>> Density.from_file("/path/to/file.mrc")
|
163
|
+
|
164
|
+
In some cases, you might want to read only a specific subset of the data.
|
165
|
+
This can be achieved by passing a tuple of slices to the ``subset`` parameter.
|
166
|
+
For example, to read only the first 50 voxels along each dimension:
|
167
|
+
|
168
|
+
>>> subset_slices = (slice(0, 50), slice(0, 50), slice(0, 50))
|
169
|
+
>>> Density.from_file("/path/to/file.mrc", subset=subset_slices)
|
170
|
+
|
171
|
+
Memory mapping can be used to read the file from disk without loading
|
172
|
+
it entirely into memory. This is particularly useful for large datasets
|
173
|
+
or when working with limited memory resources:
|
174
|
+
|
175
|
+
>>> Density.from_file("/path/to/large_file.mrc", use_memmap=True)
|
176
|
+
|
177
|
+
Note that use_memmap will be ignored if the file is gzip compressed.
|
178
|
+
|
179
|
+
If the input file has an `.em` or `.em.gz` extension, it will automatically
|
180
|
+
be parsed as EM file [2]_.
|
181
|
+
|
182
|
+
>>> Density.from_file("/path/to/file.em")
|
183
|
+
>>> Density.from_file("/path/to/file.em.gz")
|
184
|
+
|
185
|
+
If the file format is not CCP4/MRC or EM, :py:meth:`Density.from_file` attempts
|
186
|
+
to use :obj:`skimage.io.imread` to read the file [3]_, which does not extract
|
187
|
+
origin or sampling_rate information from the file:
|
188
|
+
|
189
|
+
>>> Density.from_file("/path/to/other_format.tif")
|
190
|
+
|
191
|
+
Notes
|
192
|
+
-----
|
193
|
+
If ``filename`` ends ".em" or ".h5" it will be parsed as EM or HDF5 file.
|
194
|
+
Otherwise, the default reader is CCP4/MRC and on failure
|
195
|
+
:obj:`skimage.io.imread` is used regardless of extension. The later does
|
196
|
+
not extract origin or sampling_rate information from the file.
|
197
|
+
|
198
|
+
See Also
|
199
|
+
--------
|
200
|
+
:py:meth:`Density.to_file`
|
201
|
+
|
202
|
+
"""
|
203
|
+
try:
|
204
|
+
with warnings.catch_warnings():
|
205
|
+
if filename.endswith("em") or filename.endswith("em.gz"):
|
206
|
+
func = cls._load_em
|
207
|
+
elif filename.endswith("h5") or filename.endswith("h5.gz"):
|
208
|
+
func = cls._load_hdf5
|
209
|
+
else:
|
210
|
+
func = cls._load_mrc
|
211
|
+
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
212
|
+
data, origin, sampling_rate, meta = func(
|
213
|
+
filename=filename, subset=subset, use_memmap=use_memmap
|
214
|
+
)
|
215
|
+
except ValueError:
|
216
|
+
data, origin, sampling_rate, meta = cls._load_skio(filename=filename)
|
217
|
+
if subset is not None:
|
218
|
+
cls._validate_slices(slices=subset, shape=data.shape)
|
219
|
+
data = data[subset].copy()
|
220
|
+
|
221
|
+
return cls(data=data, origin=origin, sampling_rate=sampling_rate, metadata=meta)
|
222
|
+
|
223
|
+
@classmethod
|
224
|
+
def _load_mrc(
|
225
|
+
cls, filename: str, subset: Tuple[int] = None, use_memmap: bool = False
|
226
|
+
) -> Tuple[NDArray, NDArray, NDArray, Dict]:
|
227
|
+
"""
|
228
|
+
Extracts data from a CCP4/MRC file.
|
229
|
+
|
230
|
+
Parameters
|
231
|
+
----------
|
232
|
+
filename : str
|
233
|
+
Path to a file in CCP4/MRC format.
|
234
|
+
subset : tuple of slices, optional
|
235
|
+
Slices representing the desired subset along each dimension.
|
236
|
+
use_memmap : bool, optional
|
237
|
+
Whether the Density objects data attribute should be memmory mapped.
|
238
|
+
|
239
|
+
Returns
|
240
|
+
-------
|
241
|
+
Tuple[NDArray, NDArray, NDArray, Dict]
|
242
|
+
File data, coordinate origin, sampling rate array and metadata dictionary.
|
243
|
+
|
244
|
+
References
|
245
|
+
----------
|
246
|
+
.. [1] Burnley T, Palmer C & Winn M (2017) Recent developments in the
|
247
|
+
CCP-EM software suite. Acta Cryst. D73:469–477.
|
248
|
+
doi: 10.1107/S2059798317007859
|
249
|
+
|
250
|
+
See Also
|
251
|
+
--------
|
252
|
+
:py:meth:`Density.from_file`
|
253
|
+
|
254
|
+
"""
|
255
|
+
with mrcfile.open(filename, header_only=True, permissive=True) as mrc:
|
256
|
+
data_shape = mrc.header.nz, mrc.header.ny, mrc.header.nx
|
257
|
+
data_type = mrcfile.utils.data_dtype_from_header(mrc.header)
|
258
|
+
|
259
|
+
# All map related parameters should be in zyx order
|
260
|
+
origin = (
|
261
|
+
mrc.header["origin"]
|
262
|
+
.astype([("x", "<f4"), ("y", "<f4"), ("z", "<f4")])
|
263
|
+
.view(("<f4", 3))
|
264
|
+
)
|
265
|
+
origin = origin[::-1]
|
266
|
+
|
267
|
+
# nx := column; ny := row; nz := section
|
268
|
+
start = np.array([mrc.header[x] for x in ("nzstart", "nystart", "nxstart")])
|
269
|
+
# mapc := column; mapr := row; maps := section;
|
270
|
+
crs_index = tuple(int(mrc.header[x]) - 1 for x in ("mapc", "mapr", "maps"))
|
271
|
+
if not (0 in crs_index and 1 in crs_index and 2 in crs_index):
|
272
|
+
raise ValueError(f"Malformatted CRS array in {filename}")
|
273
|
+
|
274
|
+
sampling_rate = mrc.voxel_size.astype(
|
275
|
+
[("x", "<f4"), ("y", "<f4"), ("z", "<f4")]
|
276
|
+
).view(("<f4", 3))
|
277
|
+
sampling_rate = np.array(sampling_rate)[::-1]
|
278
|
+
|
279
|
+
if np.allclose(origin, 0) and not np.allclose(start, 0):
|
280
|
+
origin = np.multiply(start, sampling_rate)
|
281
|
+
|
282
|
+
extended_header = mrc.header.nsymbt
|
283
|
+
|
284
|
+
metadata = {
|
285
|
+
"min": float(mrc.header.dmin),
|
286
|
+
"max": float(mrc.header.dmax),
|
287
|
+
"mean": float(mrc.header.dmean),
|
288
|
+
"std": float(mrc.header.rms),
|
289
|
+
}
|
290
|
+
|
291
|
+
non_standard_crs = not np.all(crs_index == (0, 1, 2))
|
292
|
+
if non_standard_crs:
|
293
|
+
warnings.warn("Non standard MAPC, MAPR, MAPS, adapting data and origin.")
|
294
|
+
|
295
|
+
if is_gzipped(filename):
|
296
|
+
if use_memmap:
|
297
|
+
warnings.warn(
|
298
|
+
f"Cannot open gzipped file {filename} as memmap."
|
299
|
+
f" Please run 'gunzip {filename}' to use memmap functionality."
|
300
|
+
)
|
301
|
+
use_memmap = False
|
302
|
+
|
303
|
+
kwargs = {"header_only": False, "permissive": True}
|
304
|
+
if subset is not None:
|
305
|
+
# Format is zyx, but subsets are xyz
|
306
|
+
subset = tuple(
|
307
|
+
subset[i] if i < len(subset) else slice(0, data_shape[i])
|
308
|
+
for i in crs_index[::-1]
|
309
|
+
)
|
310
|
+
subset_shape = tuple(x.stop - x.start for x in subset)
|
311
|
+
if np.allclose(subset_shape, data_shape):
|
312
|
+
return cls._load_mrc(
|
313
|
+
filename=filename, subset=None, use_memmap=use_memmap
|
314
|
+
)
|
315
|
+
|
316
|
+
data = cls._read_binary_subset(
|
317
|
+
filename=filename,
|
318
|
+
slices=subset,
|
319
|
+
data_shape=data_shape,
|
320
|
+
dtype=data_type,
|
321
|
+
header_size=1024 + extended_header,
|
322
|
+
)
|
323
|
+
elif subset is None and not use_memmap:
|
324
|
+
with mrcfile.open(filename, **kwargs) as mrc:
|
325
|
+
data = mrc.data
|
326
|
+
else:
|
327
|
+
with mrcfile.mrcmemmap.MrcMemmap(filename, **kwargs) as mrc:
|
328
|
+
data = mrc.data
|
329
|
+
|
330
|
+
if non_standard_crs:
|
331
|
+
data = np.transpose(data, crs_index)
|
332
|
+
origin = np.take(origin, crs_index)
|
333
|
+
|
334
|
+
return data.T, origin[::-1], sampling_rate[::-1], metadata
|
335
|
+
|
336
|
+
@classmethod
|
337
|
+
def _load_em(
|
338
|
+
cls, filename: str, subset: Tuple[int] = None, use_memmap: bool = False
|
339
|
+
) -> Tuple[NDArray, NDArray, NDArray, Dict]:
|
340
|
+
"""
|
341
|
+
Extracts data from a EM file.
|
342
|
+
|
343
|
+
Parameters
|
344
|
+
----------
|
345
|
+
filename : str
|
346
|
+
Path to a file in EM format.
|
347
|
+
subset : tuple of slices, optional
|
348
|
+
Slices representing the desired subset along each dimension.
|
349
|
+
use_memmap : bool, optional
|
350
|
+
Whether the Density objects data attribute should be memmory mapped.
|
351
|
+
|
352
|
+
Returns
|
353
|
+
-------
|
354
|
+
Tuple[NDArray, NDArray, NDArray, Dict]
|
355
|
+
File data, coordinate origin, sampling rate array and metadata dictionary.
|
356
|
+
|
357
|
+
References
|
358
|
+
----------
|
359
|
+
.. [1] Nickell S. et al, Journal of Structural Biology, 2005.
|
360
|
+
|
361
|
+
Warns
|
362
|
+
-----
|
363
|
+
If the sampling rate is zero.
|
364
|
+
|
365
|
+
Notes
|
366
|
+
-----
|
367
|
+
A sampling rate of zero will be treated as missing value and changed to one. This
|
368
|
+
function does not yet extract an origin like :py:meth:`Density._load_mrc`.
|
369
|
+
|
370
|
+
See Also
|
371
|
+
--------
|
372
|
+
:py:meth:`Density.from_file`
|
373
|
+
"""
|
374
|
+
DATA_TYPE_CODING = {
|
375
|
+
1: np.byte,
|
376
|
+
2: np.int16,
|
377
|
+
3: np.int32,
|
378
|
+
5: np.float32,
|
379
|
+
6: np.float64,
|
380
|
+
8: np.complex64,
|
381
|
+
9: np.complex128,
|
382
|
+
}
|
383
|
+
|
384
|
+
func = gzip_open if is_gzipped(filename) else open
|
385
|
+
with func(filename, mode="rb") as f:
|
386
|
+
if is_gzipped(filename):
|
387
|
+
f = BytesIO(f.read())
|
388
|
+
|
389
|
+
f.seek(3, 1)
|
390
|
+
data_type_code = np.frombuffer(f.read(1), dtype="<i1")[0]
|
391
|
+
data_type = DATA_TYPE_CODING.get(data_type_code)
|
392
|
+
|
393
|
+
data_shape = np.frombuffer(f.read(3 * 4), dtype="<i4")[::-1]
|
394
|
+
|
395
|
+
f.seek(80, 1)
|
396
|
+
user_params = np.frombuffer(f.read(40 * 4), dtype="<i4")
|
397
|
+
|
398
|
+
pixel_size = user_params[6] / 1000.0
|
399
|
+
f.seek(256, 1)
|
400
|
+
|
401
|
+
if use_memmap and subset is None:
|
402
|
+
data = np.memmap(f, dtype=data_type, mode="r", offset=f.tell()).reshape(
|
403
|
+
data_shape
|
404
|
+
)
|
405
|
+
elif subset is None:
|
406
|
+
data_size = np.prod(data_shape) * np.dtype(data_type).itemsize
|
407
|
+
data = np.frombuffer(f.read(data_size), dtype=data_type).reshape(
|
408
|
+
data_shape
|
409
|
+
)
|
410
|
+
data = data.astype(np.float32)
|
411
|
+
else:
|
412
|
+
subset_shape = [x.stop - x.start for x in subset]
|
413
|
+
if np.allclose(subset_shape, data_shape):
|
414
|
+
return cls._load_em(
|
415
|
+
filename=filename, subset=None, use_memmap=use_memmap
|
416
|
+
)
|
417
|
+
|
418
|
+
data = cls._read_binary_subset(
|
419
|
+
filename=filename,
|
420
|
+
slices=subset,
|
421
|
+
data_shape=data_shape,
|
422
|
+
dtype=data_type(),
|
423
|
+
header_size=f.tell(),
|
424
|
+
)
|
425
|
+
|
426
|
+
origin = np.zeros(3, dtype=data.dtype)
|
427
|
+
|
428
|
+
if pixel_size == 0:
|
429
|
+
warnings.warn(
|
430
|
+
f"Got invalid sampling rate {pixel_size}, overwriting it to 1."
|
431
|
+
)
|
432
|
+
pixel_size = 1
|
433
|
+
sampling_rate = np.repeat(pixel_size, data.ndim).astype(data.dtype)
|
434
|
+
|
435
|
+
return data, origin, sampling_rate, {}
|
436
|
+
|
437
|
+
@staticmethod
|
438
|
+
def _validate_slices(slices: Tuple[slice], shape: Tuple[int]):
|
439
|
+
"""
|
440
|
+
Validate whether the given slices fit within the provided data shape.
|
441
|
+
|
442
|
+
Parameters
|
443
|
+
----------
|
444
|
+
slices : Tuple[slice]
|
445
|
+
A tuple of slice objects, one per dimension of the data.
|
446
|
+
shape : Tuple[int]
|
447
|
+
The shape of the data being sliced, as a tuple of integers.
|
448
|
+
|
449
|
+
Raises
|
450
|
+
------
|
451
|
+
ValueError
|
452
|
+
- If the length of `slices` doesn't match the dimension of shape.
|
453
|
+
- If any slice has a stop value exceeding any dimension in shape.
|
454
|
+
- If any slice has a stop value that is negative.
|
455
|
+
"""
|
456
|
+
|
457
|
+
n_dims = len(shape)
|
458
|
+
if len(slices) != n_dims:
|
459
|
+
raise ValueError(
|
460
|
+
f"Expected length of slices : {n_dims}, got : {len(slices)}"
|
461
|
+
)
|
462
|
+
|
463
|
+
if any(
|
464
|
+
[
|
465
|
+
slices[i].stop > shape[i] or slices[i].start > shape[i]
|
466
|
+
for i in range(n_dims)
|
467
|
+
]
|
468
|
+
):
|
469
|
+
raise ValueError(f"Subset exceeds data dimensions ({shape}).")
|
470
|
+
|
471
|
+
if any([slices[i].stop < 0 or slices[i].start < 0 for i in range(n_dims)]):
|
472
|
+
raise ValueError("Subsets have to be non-negative.")
|
473
|
+
|
474
|
+
@classmethod
|
475
|
+
def _read_binary_subset(
|
476
|
+
cls,
|
477
|
+
filename: str,
|
478
|
+
slices: Tuple[slice],
|
479
|
+
data_shape: Tuple[int],
|
480
|
+
dtype: type,
|
481
|
+
header_size: int,
|
482
|
+
) -> NDArray:
|
483
|
+
"""
|
484
|
+
Read a subset of data from a binary file with a header.
|
485
|
+
|
486
|
+
Parameters
|
487
|
+
----------
|
488
|
+
filename : str
|
489
|
+
Path to the binary file.
|
490
|
+
slices : tuple of slice objects
|
491
|
+
Slices representing the desired subset in each dimension.
|
492
|
+
data_shape : tuple of ints
|
493
|
+
Shape of the complete dataset in the file.
|
494
|
+
dtype : numpy dtype
|
495
|
+
Data type of the dataset in the file.
|
496
|
+
header_size : int
|
497
|
+
Size of the file's header in bytes.
|
498
|
+
|
499
|
+
Returns
|
500
|
+
-------
|
501
|
+
NDArray
|
502
|
+
Subset of the dataset as specified by the slices.
|
503
|
+
|
504
|
+
Raises
|
505
|
+
------
|
506
|
+
NotImplementedError
|
507
|
+
If the data is not three dimensional.
|
508
|
+
|
509
|
+
See Also
|
510
|
+
--------
|
511
|
+
:py:meth:`Density._load_mrc`
|
512
|
+
:py:meth:`Density._load_em`
|
513
|
+
"""
|
514
|
+
n_dims = len(data_shape)
|
515
|
+
if n_dims != 3:
|
516
|
+
raise NotImplementedError("Only 3-dimensional data can be subsetted.")
|
517
|
+
|
518
|
+
cls._validate_slices(slices=slices, shape=data_shape)
|
519
|
+
bytes_per_item = dtype.itemsize
|
520
|
+
|
521
|
+
subset_shape = [s.stop - s.start for s in slices]
|
522
|
+
subset_data = np.empty(subset_shape, dtype=dtype)
|
523
|
+
|
524
|
+
row_bytes = (slices[2].stop - slices[2].start) * bytes_per_item
|
525
|
+
full_row_bytes = data_shape[2] * bytes_per_item
|
526
|
+
x_offset = slices[2].start * bytes_per_item
|
527
|
+
|
528
|
+
func = gzip_open if is_gzipped(filename) else open
|
529
|
+
with func(filename, mode="rb") as f:
|
530
|
+
if is_gzipped(filename):
|
531
|
+
f = BytesIO(f.read())
|
532
|
+
|
533
|
+
for z in range(slices[0].start, slices[0].stop):
|
534
|
+
base_offset_z = header_size + z * data_shape[1] * full_row_bytes
|
535
|
+
|
536
|
+
for y in range(slices[1].start, slices[1].stop):
|
537
|
+
offset = base_offset_z + y * full_row_bytes + x_offset
|
538
|
+
f.seek(offset)
|
539
|
+
row = np.frombuffer(f.read(row_bytes), dtype=dtype)
|
540
|
+
subset_data[z - slices[0].start, y - slices[1].start] = row
|
541
|
+
|
542
|
+
return subset_data
|
543
|
+
|
544
|
+
@staticmethod
|
545
|
+
def _load_skio(filename: str) -> Tuple[NDArray, NDArray, NDArray, Dict]:
|
546
|
+
"""
|
547
|
+
Uses :obj:`skimage.io.imread` to extract data from filename [1]_.
|
548
|
+
|
549
|
+
Parameters
|
550
|
+
----------
|
551
|
+
filename : str
|
552
|
+
Path to a file whose format is supported by :obj:`skimage.io.imread`.
|
553
|
+
|
554
|
+
Returns
|
555
|
+
-------
|
556
|
+
Tuple[NDArray, NDArray, NDArray, Dict]
|
557
|
+
File data, coordinate origin, sampling rate array and metadata dictionary.
|
558
|
+
|
559
|
+
References
|
560
|
+
----------
|
561
|
+
.. [1] https://scikit-image.org/docs/stable/api/skimage.io.html
|
562
|
+
|
563
|
+
Warns
|
564
|
+
-----
|
565
|
+
Warns that origin and sampling_rate are not yet extracted from ``filename``.
|
566
|
+
|
567
|
+
See Also
|
568
|
+
--------
|
569
|
+
:py:meth:`Density.from_file`
|
570
|
+
"""
|
571
|
+
swap = filename
|
572
|
+
if is_gzipped(filename):
|
573
|
+
with gzip_open(filename, "rb") as infile:
|
574
|
+
swap = BytesIO(infile.read())
|
575
|
+
|
576
|
+
data = skio.imread(swap)
|
577
|
+
warnings.warn(
|
578
|
+
"origin and sampling_rate are not yet extracted from non CCP4/MRC files."
|
579
|
+
)
|
580
|
+
return data, np.zeros(data.ndim), np.ones(data.ndim), {}
|
581
|
+
|
582
|
+
@staticmethod
|
583
|
+
def _load_hdf5(
|
584
|
+
filename: str, subset: Tuple[slice], use_memmap: bool = False, **kwargs
|
585
|
+
) -> "Density":
|
586
|
+
"""
|
587
|
+
Extracts data from an H5 file.
|
588
|
+
|
589
|
+
Parameters
|
590
|
+
----------
|
591
|
+
filename : str
|
592
|
+
Path to a file in CCP4/MRC format.
|
593
|
+
subset : tuple of slices, optional
|
594
|
+
Slices representing the desired subset along each dimension.
|
595
|
+
use_memmap : bool, optional
|
596
|
+
Whether the Density objects data attribute should be memmory mapped.
|
597
|
+
|
598
|
+
Returns
|
599
|
+
-------
|
600
|
+
Density
|
601
|
+
An instance of the Density class populated with the data from the HDF5 file.
|
602
|
+
|
603
|
+
See Also
|
604
|
+
--------
|
605
|
+
:py:meth:`Density._save_hdf5`
|
606
|
+
"""
|
607
|
+
subset = ... if subset is None else subset
|
608
|
+
|
609
|
+
with h5py.File(filename, mode="r") as infile:
|
610
|
+
data = infile["data"]
|
611
|
+
data_attributes = [
|
612
|
+
infile["data"].id.get_offset(),
|
613
|
+
infile["data"].shape,
|
614
|
+
infile["data"].dtype,
|
615
|
+
]
|
616
|
+
origin = infile["origin"][...].copy()
|
617
|
+
sampling_rate = infile["sampling_rate"][...].copy()
|
618
|
+
metadata = {key: val for key, val in infile.attrs.items()}
|
619
|
+
if not use_memmap:
|
620
|
+
return data[subset], origin, sampling_rate, metadata
|
621
|
+
|
622
|
+
offset, shape, dtype = data_attributes
|
623
|
+
data = np.memmap(filename, dtype=dtype, shape=shape, offset=offset)[subset]
|
624
|
+
|
625
|
+
return data, origin, sampling_rate, metadata
|
626
|
+
|
627
|
+
@classmethod
|
628
|
+
def from_structure(
|
629
|
+
cls,
|
630
|
+
filename_or_structure: str,
|
631
|
+
shape: Tuple[int] = None,
|
632
|
+
sampling_rate: NDArray = np.ones(1),
|
633
|
+
origin: Tuple[float] = None,
|
634
|
+
weight_type: str = "atomic_weight",
|
635
|
+
weight_type_args: Dict = {},
|
636
|
+
chain: str = None,
|
637
|
+
filter_by_elements: Set = None,
|
638
|
+
filter_by_residues: Set = None,
|
639
|
+
) -> "Density":
|
640
|
+
"""
|
641
|
+
Reads in an atomic structure and converts it into a :py:class:`Density`
|
642
|
+
instance.
|
643
|
+
|
644
|
+
Parameters
|
645
|
+
----------
|
646
|
+
filename_or_structure : str or :py:class:`tme.structure.Structure`
|
647
|
+
Either :py:class:`tme.structure.Structure` instance or path to
|
648
|
+
structure file that can be read by
|
649
|
+
:py:meth:`tme.structure.Structure.from_file`.
|
650
|
+
shape : tuple of int, optional
|
651
|
+
Shape of the new :py:class:`Density` instance. By default,
|
652
|
+
computes the minimum 3D box holding all atoms.
|
653
|
+
sampling_rate : float, optional
|
654
|
+
Sampling rate of the output array along each axis, in the same unit
|
655
|
+
as the atoms in the structure. Defaults to one Ångstroms
|
656
|
+
per axis unit.
|
657
|
+
origin : tuple of float, optional
|
658
|
+
Origin of the coordinate system. If provided, its expected to be in
|
659
|
+
z, y, x form in the same unit as the atoms in the structure.
|
660
|
+
By default, computes origin as distance between minimal coordinate
|
661
|
+
and coordinate system origin.
|
662
|
+
weight_type : str, optional
|
663
|
+
Which weight should be given to individual atoms. For valid values
|
664
|
+
see :py:meth:`tme.structure.Structure.to_volume`.
|
665
|
+
weight_type_args : dict, optional
|
666
|
+
Additional arguments for atom weight computation.
|
667
|
+
chain : str, optional
|
668
|
+
The chain that should be extracted from the structure. If multiple chains
|
669
|
+
should be selected, they needto be a comma separated string,
|
670
|
+
e.g. 'A,B,CE'. If chain None, all chains are returned. Default is None.
|
671
|
+
filter_by_elements : set, optional
|
672
|
+
Set of atomic elements to keep. Default is all atoms.
|
673
|
+
filter_by_residues : set, optional
|
674
|
+
Set of residues to keep. Default is all residues.
|
675
|
+
|
676
|
+
Returns
|
677
|
+
-------
|
678
|
+
:py:class:`Density`
|
679
|
+
Newly created :py:class:`Density` instance.
|
680
|
+
|
681
|
+
References
|
682
|
+
----------
|
683
|
+
.. [1] Sorzano, Carlos et al (Mar. 2015). Fast and accurate conversion
|
684
|
+
of atomic models into electron density maps. AIMS Biophysics 2, 8–20.
|
685
|
+
|
686
|
+
Examples
|
687
|
+
--------
|
688
|
+
The following outlines the minimal parameters needed to read in an
|
689
|
+
atomic structure and convert it into a :py:class:`Density` instance. For
|
690
|
+
specification on supported formats refer to
|
691
|
+
:py:meth:`tme.structure.Structure.from_file`.
|
692
|
+
|
693
|
+
>>> path_to_structure = "/path/to/structure.cif"
|
694
|
+
>>> density = Density.from_structure(path_to_structure)
|
695
|
+
|
696
|
+
:py:meth:`Density.from_structure` will automatically determine the appropriate
|
697
|
+
density dimensions based on the structure. The origin will be computed as
|
698
|
+
minimal distance required to move the closest atom of the structure to the
|
699
|
+
coordinate system origin. Furthermore, all chains will be used and the atom
|
700
|
+
densities will be represented by their atomic weight and accumulated
|
701
|
+
on a per-voxel basis.
|
702
|
+
|
703
|
+
The following will read in chain A of an atomic structure and discretize
|
704
|
+
it on a grid of dimension 100 x 100 x 100 using a sampling rate of
|
705
|
+
2.5 Angstrom per voxel.
|
706
|
+
|
707
|
+
>>> density = Density.from_structure(
|
708
|
+
>>> filename_or_structure = path_to_structure,
|
709
|
+
>>> shape = (100, 100, 100),
|
710
|
+
>>> sampling_rate = 2.5,
|
711
|
+
>>> chain = "A"
|
712
|
+
>>> )
|
713
|
+
|
714
|
+
We can restrict the generated :py:class:`Density` instance to only contain
|
715
|
+
specific elements like carbon and nitrogen:
|
716
|
+
|
717
|
+
>>> density = Density.from_structure(
|
718
|
+
>>> filename_or_structure = path_to_structure,
|
719
|
+
>>> filter_by_elements = {"C", "N"}
|
720
|
+
>>> )
|
721
|
+
|
722
|
+
or specified residues such as polar amino acids:
|
723
|
+
|
724
|
+
>>> density = Density.from_structure(
|
725
|
+
>>> filename_or_structure = path_to_structure,
|
726
|
+
>>> filter_by_residues = {"SER", "THR", "CYS", "ASN", "GLN", "TYR"}
|
727
|
+
>>> )
|
728
|
+
|
729
|
+
In addtion, :py:meth:`Density.from_structure` supports a variety of methods
|
730
|
+
to convert atoms into densities, such as Gaussians
|
731
|
+
|
732
|
+
>>> density = Density.from_structure(
|
733
|
+
>>> filename_or_structure = path_to_structure,
|
734
|
+
>>> weight_type = "gaussian",
|
735
|
+
>>> weight_type_args={"resolution": 20}
|
736
|
+
>>> )
|
737
|
+
|
738
|
+
experimentally determined scattering factors
|
739
|
+
|
740
|
+
>>> density = Density.from_structure(
|
741
|
+
>>> filename_or_structure = path_to_structure,
|
742
|
+
>>> weight_type = "scattering_factors",
|
743
|
+
>>> weight_type_args={"source": "dt1969"}
|
744
|
+
>>> )
|
745
|
+
|
746
|
+
and their lowpass filtered representation introduced in [1]_:
|
747
|
+
|
748
|
+
>>> density = Density.from_structure(
|
749
|
+
>>> filename_or_structure = path_to_structure,
|
750
|
+
>>> weight_type = "lowpass_scattering_factors",
|
751
|
+
>>> weight_type_args={"source": "dt1969"}
|
752
|
+
>>> )
|
753
|
+
|
754
|
+
See Also
|
755
|
+
--------
|
756
|
+
:py:meth:`tme.structure.Structure.from_file`
|
757
|
+
:py:meth:`tme.structure.Structure.to_volume`
|
758
|
+
"""
|
759
|
+
structure = filename_or_structure
|
760
|
+
if isinstance(filename_or_structure, str):
|
761
|
+
structure = Structure.from_file(
|
762
|
+
filename=filename_or_structure,
|
763
|
+
filter_by_elements=filter_by_elements,
|
764
|
+
filter_by_residues=filter_by_residues,
|
765
|
+
)
|
766
|
+
|
767
|
+
volume, origin, sampling_rate = structure.to_volume(
|
768
|
+
shape=shape,
|
769
|
+
sampling_rate=sampling_rate,
|
770
|
+
origin=origin,
|
771
|
+
chain=chain,
|
772
|
+
weight_type=weight_type,
|
773
|
+
weight_type_args=weight_type_args,
|
774
|
+
)
|
775
|
+
|
776
|
+
return cls(
|
777
|
+
data=volume,
|
778
|
+
origin=origin,
|
779
|
+
sampling_rate=sampling_rate,
|
780
|
+
metadata=structure.metadata.copy(),
|
781
|
+
)
|
782
|
+
|
783
|
+
def to_file(self, filename: str, gzip: bool = False) -> None:
|
784
|
+
"""
|
785
|
+
Writes class instance to disk.
|
786
|
+
|
787
|
+
Parameters
|
788
|
+
----------
|
789
|
+
filename : str
|
790
|
+
Path to write to.
|
791
|
+
gzip : bool, optional
|
792
|
+
Gzip compress the output and add corresponding suffix to filename
|
793
|
+
if not present. False by default.
|
794
|
+
|
795
|
+
References
|
796
|
+
----------
|
797
|
+
.. [1] Burnley T et al., Acta Cryst. D, 2017
|
798
|
+
.. [2] Nickell S. et al, Journal of Structural Biology, 2005
|
799
|
+
.. [3] https://scikit-image.org/docs/stable/api/skimage.io.html
|
800
|
+
|
801
|
+
Examples
|
802
|
+
--------
|
803
|
+
The following creates a :py:class:`Density` instance `dens` holding
|
804
|
+
random data values and writes it to disk:
|
805
|
+
|
806
|
+
>>> import numpy as np
|
807
|
+
>>> from tme import Density
|
808
|
+
>>> data = np.random.rand(50,50,50)
|
809
|
+
>>> dens = Density(data=data, origin=(0, 0, 0), sampling_rate=(1, 1, 1))
|
810
|
+
>>> dens.to_file("example.mrc")
|
811
|
+
|
812
|
+
The output file can also be directly ``gzip`` compressed. The corresponding
|
813
|
+
".gz" extension will be automatically added if absent [1]_.
|
814
|
+
|
815
|
+
>>> dens.to_file("example.mrc", gzip=True)
|
816
|
+
|
817
|
+
The :py:meth:`Density.to_file` method also supports writing EM files [2]_:
|
818
|
+
|
819
|
+
>>> dens.to_file("example.em")
|
820
|
+
|
821
|
+
In addition, a variety of image file formats are supported [3]_:
|
822
|
+
|
823
|
+
>>> data = np.random.rand(50,50)
|
824
|
+
>>> dens = Density(data=data, origin=(0, 0), sampling_rate=(1, 1))
|
825
|
+
>>> dens.to_file("example.tiff")
|
826
|
+
|
827
|
+
Notes
|
828
|
+
-----
|
829
|
+
If ``filename`` endswith ".em" or ".h5" a EM file or HDF5 file will be created.
|
830
|
+
The default output format is CCP4/MRC and on failure, :obj:`skimage.io.imsave`
|
831
|
+
is used.
|
832
|
+
|
833
|
+
See Also
|
834
|
+
--------
|
835
|
+
:py:meth:`Density.from_file`
|
836
|
+
"""
|
837
|
+
if gzip:
|
838
|
+
filename = filename if filename.endswith(".gz") else f"{filename}.gz"
|
839
|
+
|
840
|
+
try:
|
841
|
+
func = self._save_mrc
|
842
|
+
if filename.endswith("em") or filename.endswith("em.gz"):
|
843
|
+
func = self._save_em
|
844
|
+
elif filename.endswith("h5") or filename.endswith("h5.gz"):
|
845
|
+
func = self._save_hdf5
|
846
|
+
_ = func(filename=filename, gzip=gzip)
|
847
|
+
except ValueError:
|
848
|
+
_ = self._save_skio(filename=filename, gzip=gzip)
|
849
|
+
|
850
|
+
def _save_mrc(self, filename: str, gzip: bool = False) -> None:
|
851
|
+
"""
|
852
|
+
Writes class instance to disk as mrc file.
|
853
|
+
|
854
|
+
Parameters
|
855
|
+
----------
|
856
|
+
filename : str
|
857
|
+
Path to write to.
|
858
|
+
gzip : bool, optional
|
859
|
+
If True, the output will be gzip compressed.
|
860
|
+
|
861
|
+
References
|
862
|
+
----------
|
863
|
+
.. [1] Burnley T et al., Acta Cryst. D, 2017
|
864
|
+
"""
|
865
|
+
compression = "gzip" if gzip else None
|
866
|
+
data = np.swapaxes(self.data, 0, 2)
|
867
|
+
try:
|
868
|
+
_ = mrcfile.utils.mode_from_dtype(data.dtype)
|
869
|
+
except ValueError:
|
870
|
+
warnings.warn(
|
871
|
+
"Current data type not supported by MRC format. Defaulting to float32."
|
872
|
+
)
|
873
|
+
data = data.astype(np.float32)
|
874
|
+
|
875
|
+
with mrcfile.new(filename, overwrite=True, compression=compression) as mrc:
|
876
|
+
mrc.set_data(data)
|
877
|
+
mrc.header.nxstart, mrc.header.nystart, mrc.header.nzstart = np.rint(
|
878
|
+
np.divide(self.origin, self.sampling_rate)
|
879
|
+
)
|
880
|
+
mrc.header.mapc, mrc.header.mapr, mrc.header.maps = (1, 2, 3)
|
881
|
+
mrc.header["origin"] = tuple(self.origin)
|
882
|
+
mrc.voxel_size = tuple(self.sampling_rate)
|
883
|
+
|
884
|
+
def _save_em(self, filename: str, gzip: bool = False) -> None:
|
885
|
+
"""
|
886
|
+
Writes data to disk as an .em file.
|
887
|
+
|
888
|
+
Parameters
|
889
|
+
----------
|
890
|
+
filename : str
|
891
|
+
Path to write to.
|
892
|
+
gzip : bool, optional
|
893
|
+
If True, the output will be gzip compressed.
|
894
|
+
|
895
|
+
References
|
896
|
+
----------
|
897
|
+
.. [1] Nickell S. et al, Journal of Structural Biology, 2005.
|
898
|
+
"""
|
899
|
+
DATA_TYPE_MAPPING = {
|
900
|
+
np.dtype(np.int8): 1,
|
901
|
+
np.dtype(np.int16): 2,
|
902
|
+
np.dtype(np.int32): 3,
|
903
|
+
np.dtype(np.float32): 5,
|
904
|
+
np.dtype(np.float64): 6,
|
905
|
+
np.dtype(np.complex64): 8,
|
906
|
+
np.dtype(np.complex128): 9,
|
907
|
+
}
|
908
|
+
|
909
|
+
data_type_code = DATA_TYPE_MAPPING.get(self.data.dtype, 5)
|
910
|
+
|
911
|
+
func = gzip_open if gzip else open
|
912
|
+
with func(filename, "wb") as f:
|
913
|
+
f.write(np.array([0], dtype=np.int8).tobytes())
|
914
|
+
f.write(np.array([0, 0, data_type_code], dtype=np.int8).tobytes())
|
915
|
+
f.write(np.array(self.data.shape, dtype="<i4").tobytes())
|
916
|
+
f.write(b" " * 80)
|
917
|
+
user_params = np.zeros(40, dtype="<i4")
|
918
|
+
user_params[6] = int(self.sampling_rate[0] * 1000)
|
919
|
+
f.write(user_params.tobytes())
|
920
|
+
f.write(b" " * 256)
|
921
|
+
f.write(self.data.tobytes())
|
922
|
+
|
923
|
+
def _save_skio(self, filename: str, gzip: bool = False) -> None:
|
924
|
+
"""
|
925
|
+
Uses :obj:`skimage.io.imsave` to write data to filename [1]_.
|
926
|
+
|
927
|
+
Parameters
|
928
|
+
----------
|
929
|
+
filename : str
|
930
|
+
Path to write to with a format supported by :obj:`skimage.io.imsave`.
|
931
|
+
gzip : bool, optional
|
932
|
+
If True, the output will be gzip compressed.
|
933
|
+
|
934
|
+
References
|
935
|
+
----------
|
936
|
+
.. [1] https://scikit-image.org/docs/stable/api/skimage.io.html
|
937
|
+
"""
|
938
|
+
swap, kwargs = filename, {}
|
939
|
+
if gzip:
|
940
|
+
swap = BytesIO()
|
941
|
+
kwargs["format"] = splitext(basename(filename.replace(".gz", "")))[
|
942
|
+
1
|
943
|
+
].replace(".", "")
|
944
|
+
skio.imsave(fname=swap, arr=self.data.astype("float32"), **kwargs)
|
945
|
+
if gzip:
|
946
|
+
with gzip_open(filename, "wb") as outfile:
|
947
|
+
outfile.write(swap.getvalue())
|
948
|
+
|
949
|
+
def _save_hdf5(self, filename: str, gzip: bool = False) -> None:
|
950
|
+
"""
|
951
|
+
Saves the Density instance data to an HDF5 file, with optional compression.
|
952
|
+
|
953
|
+
Parameters
|
954
|
+
----------
|
955
|
+
filename : str
|
956
|
+
Path to write to.
|
957
|
+
gzip : bool, optional
|
958
|
+
If True, the output will be gzip compressed.
|
959
|
+
|
960
|
+
See Also
|
961
|
+
--------
|
962
|
+
:py:meth:`Density._load_hdf5`
|
963
|
+
"""
|
964
|
+
compression = "gzip" if gzip else None
|
965
|
+
with h5py.File(filename, mode="w") as f:
|
966
|
+
f.create_dataset(
|
967
|
+
"data",
|
968
|
+
data=self.data,
|
969
|
+
shape=self.data.shape,
|
970
|
+
dtype=self.data.dtype,
|
971
|
+
compression=compression,
|
972
|
+
)
|
973
|
+
f.create_dataset("origin", data=self.origin)
|
974
|
+
f.create_dataset("sampling_rate", data=self.sampling_rate)
|
975
|
+
|
976
|
+
self.metadata["mean"] = self.metadata.get("mean", 0)
|
977
|
+
self.metadata["std"] = self.metadata.get("std", 0)
|
978
|
+
self.metadata["min"] = self.metadata.get("min", 0)
|
979
|
+
self.metadata["max"] = self.metadata.get("max", 0)
|
980
|
+
if not isinstance(self.data, np.memmap):
|
981
|
+
self.metadata["mean"] = self.data.mean()
|
982
|
+
self.metadata["std"] = self.data.std()
|
983
|
+
self.metadata["min"] = self.data.min()
|
984
|
+
self.metadata["max"] = self.data.max()
|
985
|
+
|
986
|
+
for key, val in self.metadata.items():
|
987
|
+
f.attrs[key] = val
|
988
|
+
|
989
|
+
@property
|
990
|
+
def empty(self) -> "Density":
|
991
|
+
"""
|
992
|
+
Returns a copy of the class instance with all elements in
|
993
|
+
:py:attr:`Density.data` set to zero. :py:attr:`Density.metadata` will be
|
994
|
+
initialized accordingly. :py:attr:`Density.origin` and
|
995
|
+
:py:attr:`Density.sampling_rate` are copied.
|
996
|
+
|
997
|
+
Returns
|
998
|
+
-------
|
999
|
+
:py:class:`Density`
|
1000
|
+
Empty class instance.
|
1001
|
+
|
1002
|
+
Examples
|
1003
|
+
--------
|
1004
|
+
>>> import numpy as np
|
1005
|
+
>>> from tme import Density
|
1006
|
+
>>> original_density = Density.from_file("/path/to/file.mrc")
|
1007
|
+
>>> empty_density = original_density.empty
|
1008
|
+
>>> np.all(empty_density.data == 0)
|
1009
|
+
True
|
1010
|
+
"""
|
1011
|
+
return Density(
|
1012
|
+
data=np.zeros_like(self.data),
|
1013
|
+
origin=deepcopy(self.origin),
|
1014
|
+
sampling_rate=deepcopy(self.sampling_rate),
|
1015
|
+
metadata={"min": 0, "max": 0, "mean": 0, "std": 0},
|
1016
|
+
)
|
1017
|
+
|
1018
|
+
def copy(self) -> "Density":
|
1019
|
+
"""
|
1020
|
+
Create a copy of the class instance.
|
1021
|
+
|
1022
|
+
Returns
|
1023
|
+
-------
|
1024
|
+
:py:class:`Density`
|
1025
|
+
A copy of the class instance.
|
1026
|
+
|
1027
|
+
Examples
|
1028
|
+
--------
|
1029
|
+
>>> from tme import Density
|
1030
|
+
>>> original_density = Density.from_file("/path/to/file.mrc")
|
1031
|
+
>>> copied_density = original_density.copy
|
1032
|
+
>>> np.all(copied_density.data == original_density.data)
|
1033
|
+
True
|
1034
|
+
"""
|
1035
|
+
return Density(
|
1036
|
+
data=self.data.copy(),
|
1037
|
+
origin=deepcopy(self.origin[:]),
|
1038
|
+
sampling_rate=self.sampling_rate,
|
1039
|
+
metadata=deepcopy(self.metadata),
|
1040
|
+
)
|
1041
|
+
|
1042
|
+
def to_memmap(self) -> None:
|
1043
|
+
"""
|
1044
|
+
Converts :py:attr:`Density.data` to a :obj:`numpy.memmap`.
|
1045
|
+
|
1046
|
+
Examples
|
1047
|
+
--------
|
1048
|
+
The following outlines how to use the :py:meth:`Density.to_memmap` method.
|
1049
|
+
|
1050
|
+
>>> from tme import Density
|
1051
|
+
>>> large_density = Density.from_file("/path/to/large_file.mrc")
|
1052
|
+
>>> large_density.to_memmap()
|
1053
|
+
|
1054
|
+
A more efficient solution to achieve the result outlined above is to
|
1055
|
+
provide the ``use_memmap`` flag in :py:meth:`Density.from_file`.
|
1056
|
+
|
1057
|
+
>>> Density.from_file("/path/to/large_file.mrc", use_memmap = True)
|
1058
|
+
|
1059
|
+
In practice, the :py:meth:`Density.to_memmap` method finds application, if a
|
1060
|
+
large number of :py:class:`Density` instances need to be in memory at once,
|
1061
|
+
without occupying the full phyiscal memory required to store
|
1062
|
+
:py:attr:`Density.data`.
|
1063
|
+
|
1064
|
+
|
1065
|
+
See Also
|
1066
|
+
--------
|
1067
|
+
:py:meth:`Density.to_numpy`
|
1068
|
+
"""
|
1069
|
+
if isinstance(self.data, np.memmap):
|
1070
|
+
return None
|
1071
|
+
self.data = array_to_memmap(arr=self.data)
|
1072
|
+
|
1073
|
+
def to_numpy(self) -> None:
|
1074
|
+
"""
|
1075
|
+
Converts :py:attr:`Density.data` to an in-memory :obj:`numpy.ndarray`.
|
1076
|
+
|
1077
|
+
Examples
|
1078
|
+
--------
|
1079
|
+
>>> from tme import Density
|
1080
|
+
>>> density = Density.from_file("/path/to/large_file.mrc")
|
1081
|
+
>>> density.to_memmap() # Convert to memory-mapped array first
|
1082
|
+
>>> density.to_numpy() # Now, convert back to an in-memory array
|
1083
|
+
|
1084
|
+
See Also
|
1085
|
+
--------
|
1086
|
+
:py:meth:`Density.to_memmap`
|
1087
|
+
"""
|
1088
|
+
self.data = memmap_to_array(self.data)
|
1089
|
+
|
1090
|
+
@property
|
1091
|
+
def shape(self) -> Tuple[int]:
|
1092
|
+
"""
|
1093
|
+
Returns the dimensions of :py:attr:`Density.data`.
|
1094
|
+
|
1095
|
+
Returns
|
1096
|
+
-------
|
1097
|
+
tuple
|
1098
|
+
The dimensions of :py:attr:`Density.data`.
|
1099
|
+
|
1100
|
+
Examples
|
1101
|
+
--------
|
1102
|
+
>>> import numpy as np
|
1103
|
+
>>> from tme import Density
|
1104
|
+
>>> dens = Density(np.array([0, 1, 1, 1, 0]))
|
1105
|
+
>>> dens.shape
|
1106
|
+
(5,)
|
1107
|
+
"""
|
1108
|
+
return self.data.shape
|
1109
|
+
|
1110
|
+
@property
|
1111
|
+
def data(self) -> NDArray:
|
1112
|
+
"""
|
1113
|
+
Returns the value of :py:attr:`Density.data`.
|
1114
|
+
|
1115
|
+
Returns
|
1116
|
+
-------
|
1117
|
+
NDArray
|
1118
|
+
Value of the instance's :py:attr:`Density.data` attribute.
|
1119
|
+
|
1120
|
+
Examples
|
1121
|
+
--------
|
1122
|
+
The following outlines the usage of :py:attr:`Density.data`:
|
1123
|
+
|
1124
|
+
>>> import numpy as np
|
1125
|
+
>>> from tme import Density
|
1126
|
+
>>> dens = Density(np.array([0, 1, 1, 1, 0]))
|
1127
|
+
>>> dens.data
|
1128
|
+
array([0, 1, 1, 1, 0])
|
1129
|
+
|
1130
|
+
"""
|
1131
|
+
return self._data
|
1132
|
+
|
1133
|
+
@data.setter
|
1134
|
+
def data(self, data: NDArray) -> None:
|
1135
|
+
"""
|
1136
|
+
Sets the value of the instance's :py:attr:`Density.data` attribute.
|
1137
|
+
"""
|
1138
|
+
self._data = data
|
1139
|
+
|
1140
|
+
@property
|
1141
|
+
def origin(self) -> NDArray:
|
1142
|
+
"""
|
1143
|
+
Returns the value of the instance's :py:attr:`Density.origin`
|
1144
|
+
attribute.
|
1145
|
+
|
1146
|
+
Returns
|
1147
|
+
-------
|
1148
|
+
NDArray
|
1149
|
+
Value of the instance's :py:attr:`Density.origin` attribute.
|
1150
|
+
|
1151
|
+
Examples
|
1152
|
+
--------
|
1153
|
+
The following outlines the usage of :py:attr:`Density.origin`:
|
1154
|
+
|
1155
|
+
>>> import numpy as np
|
1156
|
+
>>> from tme import Density
|
1157
|
+
>>> dens = Density(np.array([0, 1, 1, 1, 0]))
|
1158
|
+
>>> dens.origin
|
1159
|
+
array([0.])
|
1160
|
+
"""
|
1161
|
+
return self._origin
|
1162
|
+
|
1163
|
+
@origin.setter
|
1164
|
+
def origin(self, origin: NDArray) -> None:
|
1165
|
+
"""
|
1166
|
+
Sets the origin of the class instance.
|
1167
|
+
"""
|
1168
|
+
origin = np.asarray(origin)
|
1169
|
+
origin = np.repeat(origin, self.data.ndim // origin.size)
|
1170
|
+
self._origin = origin
|
1171
|
+
|
1172
|
+
@property
|
1173
|
+
def sampling_rate(self) -> NDArray:
|
1174
|
+
"""
|
1175
|
+
Returns the value of the instance's :py:attr:`Density.sampling_rate` attribute.
|
1176
|
+
|
1177
|
+
Returns
|
1178
|
+
-------
|
1179
|
+
NDArray
|
1180
|
+
Sampling rate along axis.
|
1181
|
+
"""
|
1182
|
+
return self._sampling_rate
|
1183
|
+
|
1184
|
+
@sampling_rate.setter
|
1185
|
+
def sampling_rate(self, sampling_rate: NDArray) -> None:
|
1186
|
+
"""
|
1187
|
+
Sets the sampling rate of the class instance.
|
1188
|
+
"""
|
1189
|
+
sampling_rate = np.asarray(sampling_rate)
|
1190
|
+
sampling_rate = np.repeat(sampling_rate, self.data.ndim // sampling_rate.size)
|
1191
|
+
self._sampling_rate = sampling_rate
|
1192
|
+
|
1193
|
+
@property
|
1194
|
+
def metadata(self) -> Dict:
|
1195
|
+
"""
|
1196
|
+
Returns the instance's :py:attr:`Density.metadata` attribute.
|
1197
|
+
|
1198
|
+
Returns
|
1199
|
+
-------
|
1200
|
+
Dict
|
1201
|
+
Metadata dictionary. Empty by default.
|
1202
|
+
"""
|
1203
|
+
return self._metadata
|
1204
|
+
|
1205
|
+
@metadata.setter
|
1206
|
+
def metadata(self, metadata: Dict) -> None:
|
1207
|
+
"""
|
1208
|
+
Sets the metadata of the class instance.
|
1209
|
+
"""
|
1210
|
+
self._metadata = metadata
|
1211
|
+
|
1212
|
+
def to_pointcloud(self, threshold: float = 0) -> NDArray:
|
1213
|
+
"""
|
1214
|
+
Returns data indices that are larger than the given threshold.
|
1215
|
+
|
1216
|
+
Parameters
|
1217
|
+
----------
|
1218
|
+
threshold : float, optional
|
1219
|
+
The cutoff value to determine the indices. Default is 0.
|
1220
|
+
|
1221
|
+
Returns
|
1222
|
+
-------
|
1223
|
+
NDArray
|
1224
|
+
Data indices that are larger than the given threshold with shape
|
1225
|
+
(dimensions, indices).
|
1226
|
+
|
1227
|
+
Examples
|
1228
|
+
--------
|
1229
|
+
>>> density.to_pointcloud(0)
|
1230
|
+
"""
|
1231
|
+
return np.array(np.where(self.data > threshold))
|
1232
|
+
|
1233
|
+
def _pad_slice(self, box: Tuple[slice], pad_kwargs: Dict = {}) -> NDArray:
|
1234
|
+
"""
|
1235
|
+
Pads the internal data array according to box.
|
1236
|
+
|
1237
|
+
Negative slices indices will result in a left-hand padding, while
|
1238
|
+
slice indices larger than the box_size property of the class
|
1239
|
+
instance will result in a right-hand padding.
|
1240
|
+
|
1241
|
+
Parameters
|
1242
|
+
----------
|
1243
|
+
box : tuple of slice
|
1244
|
+
Tuple of slice objects that define the box dimensions.
|
1245
|
+
pad_kwargs: dict, optional
|
1246
|
+
Parameter dictionary passed to numpy pad.
|
1247
|
+
|
1248
|
+
Returns
|
1249
|
+
-------
|
1250
|
+
NDArray
|
1251
|
+
The padded internal data array.
|
1252
|
+
"""
|
1253
|
+
box_start = np.array([b.start for b in box])
|
1254
|
+
box_stop = np.array([b.stop for b in box])
|
1255
|
+
left_pad = -np.minimum(box_start, np.zeros(len(box), dtype=int))
|
1256
|
+
|
1257
|
+
right_pad = box_stop - box_start * (box_start > 0)
|
1258
|
+
right_pad -= np.array(self.shape, dtype=int)
|
1259
|
+
right_pad = np.maximum(right_pad, np.zeros_like(right_pad))
|
1260
|
+
padding = tuple((left, right) for left, right in zip(left_pad, right_pad))
|
1261
|
+
|
1262
|
+
ret = np.pad(self.data, padding, **pad_kwargs)
|
1263
|
+
return ret
|
1264
|
+
|
1265
|
+
def adjust_box(self, box: Tuple[slice], pad_kwargs: Dict = {}) -> None:
|
1266
|
+
"""
|
1267
|
+
Adjusts :py:attr:`Density.data` and :py:attr:`Density.origin`
|
1268
|
+
according to the provided box.
|
1269
|
+
|
1270
|
+
Parameters
|
1271
|
+
----------
|
1272
|
+
box : tuple of slices
|
1273
|
+
Description of how each axis of :py:attr:`Density.data` should be sliced.
|
1274
|
+
pad_kwargs: dict, optional
|
1275
|
+
Parameter dictionary passed to :obj:`numpy.pad`.
|
1276
|
+
|
1277
|
+
See Also
|
1278
|
+
--------
|
1279
|
+
:py:meth:`Density.trim_box`
|
1280
|
+
|
1281
|
+
Examples
|
1282
|
+
--------
|
1283
|
+
The following demonstrates the ability of :py:meth:`Density.adjust_box`
|
1284
|
+
to extract a subdensity from the current :py:class:`Density` instance.
|
1285
|
+
:py:meth:`Density.adjust_box` not only operats on :py:attr:`Density.data`,
|
1286
|
+
but also modifies :py:attr:`Density.origin` according to ``box``.
|
1287
|
+
|
1288
|
+
>>> import numpy as np
|
1289
|
+
>>> from tme import Density
|
1290
|
+
>>> dens = Density(np.ones((5, 5)))
|
1291
|
+
>>> box = (slice(1, 4), slice(2, 5))
|
1292
|
+
>>> dens.adjust_box(box)
|
1293
|
+
>>> dens
|
1294
|
+
Origin: (1.0, 2.0), sampling_rate: (1, 1), Shape: (3, 3)
|
1295
|
+
|
1296
|
+
:py:meth:`Density.adjust_box` can also extend the box of the current
|
1297
|
+
:py:class:`Density` instance. This is achieved by negative start or
|
1298
|
+
stops that exceed the dimension of the current :py:attr:`Density.data` array.
|
1299
|
+
|
1300
|
+
>>> box = (slice(-1, 10), slice(2, 10))
|
1301
|
+
>>> dens.adjust_box(box)
|
1302
|
+
>>> dens
|
1303
|
+
Origin: (0.0, 4.0), sampling_rate: (1, 1), Shape: (11, 8)
|
1304
|
+
|
1305
|
+
However, do note that only the start coordinate of each slice in ``box``
|
1306
|
+
can be negative.
|
1307
|
+
|
1308
|
+
>>> box = (slice(-1, 10), slice(2, -10))
|
1309
|
+
>>> dens.adjust_box(box)
|
1310
|
+
>>> dens
|
1311
|
+
Origin: (-1.0, 6.0), sampling_rate: (1, 1), Shape: (11, 0)
|
1312
|
+
"""
|
1313
|
+
crop_box = tuple(
|
1314
|
+
slice(max(b.start, 0), min(b.stop, shape))
|
1315
|
+
for b, shape in zip(box, self.data.shape)
|
1316
|
+
)
|
1317
|
+
self.data = self.data[crop_box].copy()
|
1318
|
+
|
1319
|
+
# In case the box is larger than the current map
|
1320
|
+
self.data = self._pad_slice(box, pad_kwargs=pad_kwargs)
|
1321
|
+
|
1322
|
+
# Adjust the origin
|
1323
|
+
left_shift = np.array([-1 * box[i].start for i in range(len(box))])
|
1324
|
+
self.origin = self.origin - np.multiply(left_shift, self.sampling_rate)
|
1325
|
+
|
1326
|
+
def trim_box(self, cutoff: float, margin: int = 0) -> Tuple[slice]:
|
1327
|
+
"""
|
1328
|
+
Computes a rectangle with sufficient dimension that encloses all
|
1329
|
+
values of the internal data array larger than the specified cutoff,
|
1330
|
+
expanded by the specified margin.
|
1331
|
+
|
1332
|
+
The output can be passed to :py:meth:`Density.adjust_box` to crop
|
1333
|
+
the internal data array.
|
1334
|
+
|
1335
|
+
Parameters
|
1336
|
+
----------
|
1337
|
+
cutoff : float
|
1338
|
+
The threshold value for determining the minimum enclosing box. Default is 0.
|
1339
|
+
margin : int, optional
|
1340
|
+
The margin to add to the box dimensions. Default is 0.
|
1341
|
+
|
1342
|
+
Returns
|
1343
|
+
-------
|
1344
|
+
tuple
|
1345
|
+
A tuple containing slice objects representing the box.
|
1346
|
+
|
1347
|
+
Raises
|
1348
|
+
------
|
1349
|
+
ValueError
|
1350
|
+
If the cutoff is larger than or equal to the maximum density value.
|
1351
|
+
|
1352
|
+
Examples
|
1353
|
+
--------
|
1354
|
+
The following will compute the bounding box that encloses all values
|
1355
|
+
in the example array that are larger than zero:
|
1356
|
+
|
1357
|
+
>>> import numpy as np
|
1358
|
+
>>> from tme import Density
|
1359
|
+
>>> dens = Density(np.array([0,1,1,1,0]))
|
1360
|
+
>>> dens.trim_box(0)
|
1361
|
+
(slice(1, 4, None),)
|
1362
|
+
|
1363
|
+
The resulting tuple can be passed to :py:meth:`Density.adjust_box` to trim the
|
1364
|
+
current :py:class:`Density` instance:
|
1365
|
+
|
1366
|
+
>>> dens.adjust_box(dens.trim_box(0))
|
1367
|
+
>>> dens.data.shape
|
1368
|
+
(3,)
|
1369
|
+
|
1370
|
+
See Also
|
1371
|
+
--------
|
1372
|
+
:py:meth:`Density.adjust_box`
|
1373
|
+
"""
|
1374
|
+
if cutoff >= self.data.max():
|
1375
|
+
raise ValueError(
|
1376
|
+
f"Cutoff exceeds data range ({cutoff} >= {self.data.max()})."
|
1377
|
+
)
|
1378
|
+
starts, stops = [], []
|
1379
|
+
for axis in range(self.data.ndim):
|
1380
|
+
projected_max = np.max(
|
1381
|
+
self.data, axis=tuple(i for i in range(self.data.ndim) if i != axis)
|
1382
|
+
)
|
1383
|
+
valid = np.where(projected_max > cutoff)[0]
|
1384
|
+
starts.append(max(0, valid[0] - margin))
|
1385
|
+
stops.append(min(self.data.shape[axis], valid[-1] + margin + 1))
|
1386
|
+
slices = tuple(slice(*coord) for coord in zip(starts, stops))
|
1387
|
+
return slices
|
1388
|
+
|
1389
|
+
def minimum_enclosing_box(
|
1390
|
+
self,
|
1391
|
+
cutoff: float,
|
1392
|
+
use_geometric_center: bool = False,
|
1393
|
+
) -> Tuple[slice]:
|
1394
|
+
"""
|
1395
|
+
Compute the enclosing box that holds all possible rotations of the internal
|
1396
|
+
data array.
|
1397
|
+
|
1398
|
+
Parameters
|
1399
|
+
----------
|
1400
|
+
cutoff : float
|
1401
|
+
Above this value arr elements are considered. Defaults to 0.
|
1402
|
+
use_geometric_center : bool, optional
|
1403
|
+
Whether the box should accommodate the geometric or the coordinate
|
1404
|
+
center. Defaults to False.
|
1405
|
+
|
1406
|
+
Returns
|
1407
|
+
-------
|
1408
|
+
tuple
|
1409
|
+
Tuple of slices corresponding to the minimum enclosing box.
|
1410
|
+
|
1411
|
+
See Also
|
1412
|
+
--------
|
1413
|
+
:py:meth:`Density.adjust_box`
|
1414
|
+
:py:meth:`tme.matching_utils.minimum_enclosing_box`
|
1415
|
+
"""
|
1416
|
+
coordinates = self.to_pointcloud(threshold=cutoff)
|
1417
|
+
starts, stops = coordinates.min(axis=1), coordinates.max(axis=1)
|
1418
|
+
|
1419
|
+
shape = minimum_enclosing_box(
|
1420
|
+
coordinates=coordinates,
|
1421
|
+
use_geometric_center=use_geometric_center,
|
1422
|
+
)
|
1423
|
+
difference = np.maximum(np.subtract(shape, np.subtract(stops, starts)), 0)
|
1424
|
+
|
1425
|
+
shift_start = np.divide(difference, 2).astype(int)
|
1426
|
+
shift_stop = shift_start + np.mod(difference, 2)
|
1427
|
+
|
1428
|
+
starts = (starts - shift_start).astype(int)
|
1429
|
+
stops = (stops + shift_stop).astype(int)
|
1430
|
+
|
1431
|
+
enclosing_box = tuple(slice(start, stop) for start, stop in zip(starts, stops))
|
1432
|
+
|
1433
|
+
return tuple(enclosing_box)
|
1434
|
+
|
1435
|
+
def pad(
|
1436
|
+
self, new_shape: Tuple[int], center: bool = True, padding_value: float = 0
|
1437
|
+
) -> None:
|
1438
|
+
"""
|
1439
|
+
:py:meth:`Density.pad` extends the internal :py:attr:`Density.data`
|
1440
|
+
array of the current :py:class:`Density` instance to ``new_shape`` and
|
1441
|
+
adapts :py:attr:`Density.origin` accordingly.
|
1442
|
+
|
1443
|
+
Parameters
|
1444
|
+
----------
|
1445
|
+
new_shape : tuple of int
|
1446
|
+
The desired shape for the new volume.
|
1447
|
+
center : bool, optional
|
1448
|
+
Whether the data should be centered in the new box. Default is True.
|
1449
|
+
padding_value : float, optional
|
1450
|
+
Value to pad the data array with. Default is zero.
|
1451
|
+
|
1452
|
+
Raises
|
1453
|
+
------
|
1454
|
+
ValueError
|
1455
|
+
If the length of ``new_shape`` does not match the dimensionality of
|
1456
|
+
:py:attr:`Density.data`.
|
1457
|
+
|
1458
|
+
Examples
|
1459
|
+
--------
|
1460
|
+
The following demonstrates the functionality of :py:meth:`Density.pad` on
|
1461
|
+
a one-dimensional array:
|
1462
|
+
|
1463
|
+
>>> import numpy as np
|
1464
|
+
>>> from tme import Density
|
1465
|
+
>>> dens = Density(np.array([1,1,1]))
|
1466
|
+
>>> dens.pad(new_shape = (5,), center = True)
|
1467
|
+
>>> dens.data
|
1468
|
+
array([0, 1, 1, 1, 0])
|
1469
|
+
|
1470
|
+
If ``center`` is set to False, the padding values will be appended:
|
1471
|
+
|
1472
|
+
>>> dens = Density(np.array([1,1,1]))
|
1473
|
+
>>> dens.pad(new_shape = (5,), center = False)
|
1474
|
+
>>> dens.data
|
1475
|
+
array([1, 1, 1, 0, 0])
|
1476
|
+
|
1477
|
+
It's also possible to pass a user-defined ``padding_value``:
|
1478
|
+
|
1479
|
+
>>> dens = Density(np.array([1,1,1]))
|
1480
|
+
>>> dens.pad(new_shape = (5,), center = True, padding_value = -1)
|
1481
|
+
>>> dens.data
|
1482
|
+
array([-1, 1, 1, 1, -1])
|
1483
|
+
"""
|
1484
|
+
if len(new_shape) != self.data.ndim:
|
1485
|
+
raise ValueError(
|
1486
|
+
f"new_shape has dimension {len(new_shape)}"
|
1487
|
+
f" but expected was {self.data.ndim}."
|
1488
|
+
)
|
1489
|
+
|
1490
|
+
new_box = tuple(slice(0, stop) for stop in new_shape)
|
1491
|
+
if center:
|
1492
|
+
overhang = np.subtract(new_shape, self.shape).astype(int)
|
1493
|
+
padding = overhang // 2
|
1494
|
+
left = -padding
|
1495
|
+
right = np.add(self.shape, padding + overhang % 2)
|
1496
|
+
new_box = tuple(slice(*box) for box in zip(left, right))
|
1497
|
+
|
1498
|
+
self.adjust_box(new_box, pad_kwargs={"constant_values": padding_value})
|
1499
|
+
|
1500
|
+
def centered(self, cutoff: float = 0) -> Tuple["Density", NDArray]:
|
1501
|
+
"""
|
1502
|
+
Shifts the data center of mass to the center of the data array using linear
|
1503
|
+
interpolation. The box size of the returned :py:class:`Density` object is at
|
1504
|
+
least equal to the box size of the class instance.
|
1505
|
+
|
1506
|
+
Parameters
|
1507
|
+
----------
|
1508
|
+
cutoff : float, optional
|
1509
|
+
Only elements in data larger than cutoff will be considered for
|
1510
|
+
computing the new box. By default considers only positive elements.
|
1511
|
+
|
1512
|
+
Notes
|
1513
|
+
-----
|
1514
|
+
Should any axis of the class instance data array be smaller than the return
|
1515
|
+
value of :py:meth:`Density.minimum_enclosing_box`, the size of the internal
|
1516
|
+
data array is adapted to avoid array elements larger than cutoff to fall
|
1517
|
+
outside the data array.
|
1518
|
+
|
1519
|
+
Returns
|
1520
|
+
-------
|
1521
|
+
:py:class:`Density`
|
1522
|
+
A centered copy of the class instance.
|
1523
|
+
NDArray
|
1524
|
+
The offset between array center and center of mass.
|
1525
|
+
|
1526
|
+
See Also
|
1527
|
+
--------
|
1528
|
+
:py:meth:`Density.trim_box`
|
1529
|
+
:py:meth:`Density.minimum_enclosing_box`
|
1530
|
+
|
1531
|
+
Examples
|
1532
|
+
--------
|
1533
|
+
:py:meth:`Density.centered` returns a tuple containing a centered version
|
1534
|
+
of the current :py:class:`Density` instance, as well as an array with
|
1535
|
+
translations. The translation corresponds to the shift between the original and
|
1536
|
+
current center of mass.
|
1537
|
+
|
1538
|
+
>>> import numpy as np
|
1539
|
+
>>> from tme import Density
|
1540
|
+
>>> dens = Density(np.ones((5,5,5)))
|
1541
|
+
>>> centered_dens, translation = dens.centered(0)
|
1542
|
+
>>> translation
|
1543
|
+
array([0., 0., 0.])
|
1544
|
+
|
1545
|
+
:py:meth:`Density.centered` extended the :py:attr:`Density.data` attribute
|
1546
|
+
of the current :py:class:`Density` instance and modified
|
1547
|
+
:py:attr:`Density.origin` accordingly.
|
1548
|
+
|
1549
|
+
>>> centered_dens
|
1550
|
+
Origin: (-2.0, -2.0, -2.0), sampling_rate: (1, 1, 1), Shape: (9, 9, 9)
|
1551
|
+
|
1552
|
+
:py:meth:`Density.centered` achieves centering via zero-padding and
|
1553
|
+
rigid-transform of the internal :py:attr:`Density.data` attribute.
|
1554
|
+
`centered_dens` is sufficiently large to represent all rotations of the
|
1555
|
+
:py:attr:`Density.data` attribute, such as ones obtained from
|
1556
|
+
:py:meth:`tme.matching_utils.get_rotation_matrices`.
|
1557
|
+
|
1558
|
+
>>> from tme.matching_utils import get_rotation_matrices
|
1559
|
+
>>> rotation_matrix = get_rotation_matrices(dim = 3 ,angular_sampling = 10)[0]
|
1560
|
+
>>> rotated_centered_dens = centered_dens.rigid_transform(
|
1561
|
+
>>> rotation_matrix = rotation_matrix,
|
1562
|
+
>>> order = None
|
1563
|
+
>>> )
|
1564
|
+
>>> print(centered_dens.data.sum(), rotated_centered_dens.data.sum())
|
1565
|
+
125.0 125.0
|
1566
|
+
|
1567
|
+
"""
|
1568
|
+
ret = self.copy()
|
1569
|
+
|
1570
|
+
box = ret.minimum_enclosing_box(cutoff=cutoff, use_geometric_center=False)
|
1571
|
+
ret.adjust_box(box)
|
1572
|
+
|
1573
|
+
new_shape = np.maximum(ret.shape, self.shape)
|
1574
|
+
new_shape = np.add(new_shape, 1 - np.mod(new_shape, 2))
|
1575
|
+
ret.pad(new_shape)
|
1576
|
+
|
1577
|
+
center = self.center_of_mass(ret.data, cutoff)
|
1578
|
+
shift = np.subtract(np.divide(np.subtract(ret.shape, 1), 2), center)
|
1579
|
+
|
1580
|
+
ret = ret.rigid_transform(
|
1581
|
+
translation=shift,
|
1582
|
+
rotation_matrix=np.eye(ret.data.ndim),
|
1583
|
+
use_geometric_center=False,
|
1584
|
+
order=1,
|
1585
|
+
)
|
1586
|
+
|
1587
|
+
shift = np.subtract(center, self.center_of_mass(ret.data, cutoff))
|
1588
|
+
return ret, shift
|
1589
|
+
|
1590
|
+
def rigid_transform(
|
1591
|
+
self,
|
1592
|
+
rotation_matrix: NDArray,
|
1593
|
+
translation: NDArray = None,
|
1594
|
+
order: int = 3,
|
1595
|
+
use_geometric_center: bool = True,
|
1596
|
+
) -> "Density":
|
1597
|
+
"""
|
1598
|
+
Performs a rigid transform of the class instance.
|
1599
|
+
|
1600
|
+
Parameters
|
1601
|
+
----------
|
1602
|
+
rotation_matrix : NDArray
|
1603
|
+
Rotation matrix to apply.
|
1604
|
+
translation : NDArray
|
1605
|
+
Translation to apply.
|
1606
|
+
order : int, optional
|
1607
|
+
Interpolation order to use. Default is 3, has to be in range 0-5.
|
1608
|
+
use_geometric_center : bool, optional
|
1609
|
+
Use geometric or mass center as rotation center.
|
1610
|
+
|
1611
|
+
Returns
|
1612
|
+
-------
|
1613
|
+
Density
|
1614
|
+
The transformed instance of :py:class:`Density`.
|
1615
|
+
|
1616
|
+
Examples
|
1617
|
+
--------
|
1618
|
+
Define the :py:class:`Density` instance
|
1619
|
+
|
1620
|
+
>>> import numpy as np
|
1621
|
+
>>> from tme import Density
|
1622
|
+
>>> dens = Density(np.arange(9).reshape(3,3).astype(np.float32))
|
1623
|
+
>>> dens, translation = dens.centered(0)
|
1624
|
+
|
1625
|
+
and apply the rotation, in this case a mirror around the z-axis
|
1626
|
+
|
1627
|
+
>>> rotation_matrix = np.eye(dens.data.ndim)
|
1628
|
+
>>> rotation_matrix[0, 0] = -1
|
1629
|
+
>>> dens_transform = dens.rigid_transform(rotation_matrix = rotation_matrix)
|
1630
|
+
>>> dens_transform.data
|
1631
|
+
array([[0. , 0. , 0. , 0. , 0. ],
|
1632
|
+
[0.5 , 3.0833333 , 3.5833333 , 3.3333333 , 0. ],
|
1633
|
+
[0.75 , 4.6666665 , 5.6666665 , 5.4166665 , 0. ],
|
1634
|
+
[0.25 , 1.6666666 , 2.6666667 , 2.9166667 , 0. ],
|
1635
|
+
[0. , 0.08333334, 0.5833333 , 0.8333333 , 0. ]],
|
1636
|
+
dtype=float32)
|
1637
|
+
|
1638
|
+
Notes
|
1639
|
+
-----
|
1640
|
+
This function assumes the internal :py:attr:`Density.data` attribute is
|
1641
|
+
sufficiently sized to hold the transformation.
|
1642
|
+
|
1643
|
+
See Also
|
1644
|
+
--------
|
1645
|
+
:py:meth:`Density.centered`, :py:meth:`Density.minimum_enclosing_box`
|
1646
|
+
"""
|
1647
|
+
ret = self.empty
|
1648
|
+
data = self.data
|
1649
|
+
if not isinstance(data.dtype, np.floating):
|
1650
|
+
data = data.astype(np.float32)
|
1651
|
+
|
1652
|
+
ret.data = ret.data.astype(data.dtype)
|
1653
|
+
NumpyFFTWBackend().rigid_transform(
|
1654
|
+
arr=data,
|
1655
|
+
rotation_matrix=rotation_matrix,
|
1656
|
+
translation=translation,
|
1657
|
+
use_geometric_center=use_geometric_center,
|
1658
|
+
out=ret.data,
|
1659
|
+
order=order,
|
1660
|
+
)
|
1661
|
+
|
1662
|
+
eps = np.finfo(ret.data.dtype).eps
|
1663
|
+
ret.data[np.abs(ret.data) < eps] = 0
|
1664
|
+
return ret
|
1665
|
+
|
1666
|
+
def resample(
|
1667
|
+
self, new_sampling_rate: Tuple[float], method: str = "spline", order: int = 1
|
1668
|
+
) -> "Density":
|
1669
|
+
"""
|
1670
|
+
Resamples :py:attr:`Density.data` to ``new_sampling_rate``.
|
1671
|
+
|
1672
|
+
Parameters
|
1673
|
+
----------
|
1674
|
+
new_sampling_rate : tuple of floats or float
|
1675
|
+
Sampling rate to resample to for a single or all axes.
|
1676
|
+
method: str, optional
|
1677
|
+
Resampling method to use, defaults to `spline`. Availabe options are:
|
1678
|
+
|
1679
|
+
+---------+----------------------------------------------------------+
|
1680
|
+
| spline | Spline interpolation using :obj:`scipy.ndimage.zoom` |
|
1681
|
+
+---------+----------------------------------------------------------+
|
1682
|
+
| fourier | Fourier cropping |
|
1683
|
+
+---------+----------------------------------------------------------+
|
1684
|
+
order : int, optional
|
1685
|
+
Order of spline used for interpolation, by default 1. Ignored when
|
1686
|
+
``method`` is `fourier`.
|
1687
|
+
|
1688
|
+
Raises
|
1689
|
+
------
|
1690
|
+
ValueError
|
1691
|
+
If ``method`` is not supported.
|
1692
|
+
|
1693
|
+
Returns
|
1694
|
+
-------
|
1695
|
+
:py:class:`Density`
|
1696
|
+
A resampled copy of the class instance.
|
1697
|
+
|
1698
|
+
Examples
|
1699
|
+
--------
|
1700
|
+
The following makes use of :py:meth:`tme.matching_utils.create_mask`
|
1701
|
+
to define a :py:class:`Density` instance containing a 2D circle with
|
1702
|
+
a sampling rate of 2
|
1703
|
+
|
1704
|
+
>>> from tme import Density
|
1705
|
+
>>> from tme.matching_utils import create_mask
|
1706
|
+
>>> mask = create_mask(
|
1707
|
+
>>> mask_type="ellipse",
|
1708
|
+
>>> shape=(11,11),
|
1709
|
+
>>> center=(5,5),
|
1710
|
+
>>> radius=3
|
1711
|
+
>>> )
|
1712
|
+
>>> dens = Density(mask, sampling_rate=2)
|
1713
|
+
>>> dens
|
1714
|
+
Origin: (0.0, 0.0), sampling_rate: (2, 2), Shape: (11, 11)
|
1715
|
+
|
1716
|
+
Using :py:meth:`Density.resample` we can modulate the sampling rate
|
1717
|
+
using spline interpolation of desired order
|
1718
|
+
|
1719
|
+
>>> dens.resample(new_sampling_rate= 4, method="spline", order=3)
|
1720
|
+
Origin: (0.0, 0.0), sampling_rate: (4, 4), Shape: (6, 6)
|
1721
|
+
|
1722
|
+
Or Fourier cropping which results in a less smooth output, but more faithfully
|
1723
|
+
captures the contained frequency information
|
1724
|
+
|
1725
|
+
>>> dens.resample(new_sampling_rate=4, method="fourier")
|
1726
|
+
Origin: (0.0, 0.0), sampling_rate: (4, 4), Shape: (6, 6)
|
1727
|
+
|
1728
|
+
``new_sampling_rate`` can also be specified per axis
|
1729
|
+
|
1730
|
+
>>> dens.resample(new_sampling_rate=(4,1), method="spline", order=3)
|
1731
|
+
Origin: (0.0, 0.0), sampling_rate: (4, 1), Shape: (6, 22)
|
1732
|
+
|
1733
|
+
"""
|
1734
|
+
_supported_methods = ("spline", "fourier")
|
1735
|
+
if method not in _supported_methods:
|
1736
|
+
raise ValueError(
|
1737
|
+
f"Expected method to be one of {_supported_methods}, got '{method}'."
|
1738
|
+
)
|
1739
|
+
new_sampling_rate = np.array(new_sampling_rate)
|
1740
|
+
new_sampling_rate = np.repeat(
|
1741
|
+
new_sampling_rate, self.data.ndim // new_sampling_rate.size
|
1742
|
+
)
|
1743
|
+
|
1744
|
+
ret = self.copy()
|
1745
|
+
scale_factor = np.divide(ret.sampling_rate, new_sampling_rate)
|
1746
|
+
if method == "spline":
|
1747
|
+
ret.data = zoom(ret.data, scale_factor, order=order)
|
1748
|
+
elif method == "fourier":
|
1749
|
+
ret_shape = np.round(np.multiply(scale_factor, ret.shape)).astype(int)
|
1750
|
+
|
1751
|
+
axis = range(len(ret_shape))
|
1752
|
+
mask = np.zeros(self.shape, dtype=bool)
|
1753
|
+
mask[tuple(slice(0, x) for x in ret_shape)] = 1
|
1754
|
+
mask = np.roll(
|
1755
|
+
mask, shift=-np.floor(np.divide(ret_shape, 2)).astype(int), axis=axis
|
1756
|
+
)
|
1757
|
+
mask_ret = np.zeros(ret_shape, dtype=bool)
|
1758
|
+
mask_ret[tuple(slice(0, x) for x in self.shape)] = 1
|
1759
|
+
mask_ret = np.roll(
|
1760
|
+
mask_ret,
|
1761
|
+
shift=-np.floor(np.divide(self.shape, 2)).astype(int),
|
1762
|
+
axis=axis,
|
1763
|
+
)
|
1764
|
+
|
1765
|
+
arr_ft = np.fft.fftn(self.data)
|
1766
|
+
arr_ft *= np.prod(ret_shape) / np.prod(self.shape)
|
1767
|
+
ret_ft = np.zeros(ret_shape, dtype=arr_ft.dtype)
|
1768
|
+
ret_ft[mask_ret] = arr_ft[mask]
|
1769
|
+
ret.data = np.real(np.fft.ifftn(ret_ft))
|
1770
|
+
|
1771
|
+
ret.sampling_rate = new_sampling_rate
|
1772
|
+
return ret
|
1773
|
+
|
1774
|
+
def density_boundary(
|
1775
|
+
self, weight: float, fraction_surface: float = 0.1, volume_factor: float = 1.21
|
1776
|
+
) -> Tuple[float]:
|
1777
|
+
"""
|
1778
|
+
Computes the density boundary of the class instance. The density
|
1779
|
+
boundary in this setting is defined as minimal and maximal density value
|
1780
|
+
enclosing a certain ``weight``.
|
1781
|
+
|
1782
|
+
Parameters
|
1783
|
+
----------
|
1784
|
+
weight : float
|
1785
|
+
Density weight to compute volume cutoff on. This could e.g. be the
|
1786
|
+
sum of contained atomic weights.
|
1787
|
+
fraction_surface : float, optional
|
1788
|
+
Approximate fraction of surface voxels on all voxels enclosing
|
1789
|
+
``weight``, by default 0.1. Decreasing this value increases the
|
1790
|
+
upper volume boundary.
|
1791
|
+
volume_factor : float, optional
|
1792
|
+
Factor used to compute how many distinct density values
|
1793
|
+
can be used to represent ``weight``, by default 1.21.
|
1794
|
+
|
1795
|
+
Returns
|
1796
|
+
-------
|
1797
|
+
tuple
|
1798
|
+
Tuple containing lower and upper bound on densities.
|
1799
|
+
|
1800
|
+
References
|
1801
|
+
----------
|
1802
|
+
.. [1] Cragnolini T, Sahota H, Joseph AP, Sweeney A, Malhotra S,
|
1803
|
+
Vasishtan D, Topf M (2021a) TEMPy2: A Python library with
|
1804
|
+
improved 3D electron microscopy density-fitting and validation
|
1805
|
+
workflows. Acta Crystallogr Sect D Struct Biol 77:41–47.
|
1806
|
+
https://doi.org/10.1107/S2059798320014928
|
1807
|
+
|
1808
|
+
Raises
|
1809
|
+
------
|
1810
|
+
ValueError
|
1811
|
+
If input any input parameter is <= 0.
|
1812
|
+
"""
|
1813
|
+
if weight <= 0 or fraction_surface <= 0 or volume_factor <= 0:
|
1814
|
+
raise ValueError(
|
1815
|
+
"weight, fraction_surface and volume_factor need to be >= 0."
|
1816
|
+
)
|
1817
|
+
num_voxels = np.min(
|
1818
|
+
volume_factor * weight / np.power(self.sampling_rate, self.data.ndim)
|
1819
|
+
).astype(int)
|
1820
|
+
surface_included_voxels = int(num_voxels * (1 + fraction_surface))
|
1821
|
+
|
1822
|
+
map_partition = np.partition(
|
1823
|
+
self.data.flatten(), (-num_voxels, -surface_included_voxels)
|
1824
|
+
)
|
1825
|
+
upper_limit = map_partition[-num_voxels]
|
1826
|
+
lower_limit = map_partition[-surface_included_voxels]
|
1827
|
+
|
1828
|
+
return (lower_limit, upper_limit)
|
1829
|
+
|
1830
|
+
def surface_coordinates(
|
1831
|
+
self, density_boundaries: Tuple[float], method: str = "ConvexHull"
|
1832
|
+
) -> NDArray:
|
1833
|
+
"""
|
1834
|
+
Calculates the surface coordinates of the class instance using
|
1835
|
+
different boundary and surface detection methods. This method is relevant
|
1836
|
+
for determining coordinates used in non-exhaustive template matching,
|
1837
|
+
see :py:class:`tme.matching_optimization.optimize_match`.
|
1838
|
+
|
1839
|
+
Parameters
|
1840
|
+
----------
|
1841
|
+
density_boundaries : tuple
|
1842
|
+
Lower and upper bound of density values to be considered
|
1843
|
+
(can be obtained from :py:meth:`Density.density_boundary`).
|
1844
|
+
method : str, optional
|
1845
|
+
Method to use for surface coordinate computation
|
1846
|
+
|
1847
|
+
+--------------+-----------------------------------------------------+
|
1848
|
+
| ConvexHull | Use the lower bound density convex hull vertices. |
|
1849
|
+
+--------------+-----------------------------------------------------+
|
1850
|
+
| Weight | Use all coordinates within ``density_boundaries``. |
|
1851
|
+
+--------------+-----------------------------------------------------+
|
1852
|
+
| Sobel | Set densities below the lower bound density to zero |
|
1853
|
+
| | apply a sobel filter and return density coordinates |
|
1854
|
+
| | larger than 0.5 times the maximum filter value. |
|
1855
|
+
+--------------+-----------------------------------------------------+
|
1856
|
+
| Laplace | Like 'Sobel', but with a Laplace filter. |
|
1857
|
+
+--------------+-----------------------------------------------------+
|
1858
|
+
| Minimum | Like 'Sobel' and 'Laplace' but with a spherical |
|
1859
|
+
| | minimum filter on the lower density bound. |
|
1860
|
+
+--------------+-----------------------------------------------------+
|
1861
|
+
|
1862
|
+
Raises
|
1863
|
+
------
|
1864
|
+
ValueError
|
1865
|
+
If the chosen method is not available.
|
1866
|
+
|
1867
|
+
Returns
|
1868
|
+
-------
|
1869
|
+
NDArray
|
1870
|
+
An array of surface coordinates with shape (points, dimensions).
|
1871
|
+
|
1872
|
+
References
|
1873
|
+
----------
|
1874
|
+
.. [1] Cragnolini T, et al. (2021) Acta Crys Sect D Struct Biol
|
1875
|
+
|
1876
|
+
See Also
|
1877
|
+
--------
|
1878
|
+
:py:class:`tme.matching_optimization.NormalVectorScore`
|
1879
|
+
:py:class:`tme.matching_optimization.PartialLeastSquareDifference`
|
1880
|
+
:py:class:`tme.matching_optimization.MutualInformation`
|
1881
|
+
:py:class:`tme.matching_optimization.Envelope`
|
1882
|
+
:py:class:`tme.matching_optimization.Chamfer`
|
1883
|
+
"""
|
1884
|
+
_available_methods = ["ConvexHull", "Weight", "Sobel", "Laplace", "Minimum"]
|
1885
|
+
|
1886
|
+
if method not in _available_methods:
|
1887
|
+
raise ValueError(
|
1888
|
+
"Argument method has to be one of the following: %s"
|
1889
|
+
% ", ".join(_available_methods)
|
1890
|
+
)
|
1891
|
+
|
1892
|
+
lower_bound, upper_bound = density_boundaries
|
1893
|
+
if method == "ConvexHull":
|
1894
|
+
binary = np.transpose(np.where(self.data > lower_bound))
|
1895
|
+
hull = ConvexHull(binary)
|
1896
|
+
surface_points = binary[hull.vertices[:]]
|
1897
|
+
|
1898
|
+
elif method == "Sobel":
|
1899
|
+
filtered_map = np.multiply(self.data, (self.data > lower_bound))
|
1900
|
+
magn = generic_gradient_magnitude(filtered_map, sobel)
|
1901
|
+
surface_points = np.argwhere(magn > 0.5 * magn.max())
|
1902
|
+
|
1903
|
+
elif method == "Laplace":
|
1904
|
+
filtered_map = self.data > lower_bound
|
1905
|
+
magn = laplace(filtered_map)
|
1906
|
+
surface_points = np.argwhere(magn > 0.5 * magn.max())
|
1907
|
+
|
1908
|
+
elif method == "Minimum":
|
1909
|
+
fp = np.zeros((self.data.ndim,) * self.data.ndim)
|
1910
|
+
center = np.ones(self.data.ndim, dtype=int)
|
1911
|
+
fp[tuple(center)] = 1
|
1912
|
+
for i in range(self.data.ndim):
|
1913
|
+
offset = np.zeros(self.data.ndim, dtype=int)
|
1914
|
+
offset[i] = 1
|
1915
|
+
fp[tuple(center + offset)] = 1
|
1916
|
+
fp[tuple(center - offset)] = 1
|
1917
|
+
|
1918
|
+
filtered_map = (self.data > lower_bound).astype(int)
|
1919
|
+
filtered_map_surface = minimum_filter(
|
1920
|
+
filtered_map, footprint=fp, mode="constant", cval=0.8
|
1921
|
+
)
|
1922
|
+
filtered_map_surface = ((filtered_map - filtered_map_surface) == 1).astype(
|
1923
|
+
int
|
1924
|
+
)
|
1925
|
+
surface_points = np.argwhere(filtered_map_surface == 1)
|
1926
|
+
|
1927
|
+
elif method == "Weight":
|
1928
|
+
surface_points = np.argwhere(
|
1929
|
+
np.logical_and(self.data < upper_bound, self.data > lower_bound)
|
1930
|
+
)
|
1931
|
+
|
1932
|
+
return surface_points
|
1933
|
+
|
1934
|
+
def normal_vectors(self, coordinates: NDArray) -> NDArray:
|
1935
|
+
"""
|
1936
|
+
Calculates the normal vectors for the given coordinates on the densities
|
1937
|
+
of the class instance. If the normal vector to a given coordinate
|
1938
|
+
can not be computed, the zero vector is returned instead. The output of this
|
1939
|
+
function can e.g. be used in
|
1940
|
+
:py:class:`tme.matching_optimization.NormalVectorScore`.
|
1941
|
+
|
1942
|
+
Parameters
|
1943
|
+
----------
|
1944
|
+
coordinates : NDArray
|
1945
|
+
An array of integer coordinates with shape (dimensions, coordinates)
|
1946
|
+
of which to calculate the normal vectors.
|
1947
|
+
|
1948
|
+
Returns
|
1949
|
+
-------
|
1950
|
+
NDArray
|
1951
|
+
An array with unit normal vectors with same shape as coordinates.
|
1952
|
+
|
1953
|
+
References
|
1954
|
+
----------
|
1955
|
+
.. [1] Cragnolini T, Sahota H, Joseph AP, Sweeney A, Malhotra S,
|
1956
|
+
Vasishtan D, Topf M (2021a) TEMPy2: A Python library with
|
1957
|
+
improved 3D electron microscopy density-fitting and validation
|
1958
|
+
workflows. Acta Crystallogr Sect D Struct Biol 77:41–47.
|
1959
|
+
https://doi.org/10.1107/S2059798320014928
|
1960
|
+
|
1961
|
+
Raises
|
1962
|
+
------
|
1963
|
+
ValueError
|
1964
|
+
If coordinates.shape[1] does not match self.data.ndim,
|
1965
|
+
coordinates.ndim != 2 or lies outside self.data.
|
1966
|
+
|
1967
|
+
See Also
|
1968
|
+
--------
|
1969
|
+
:py:class:`tme.matching_optimization.NormalVectorScore`
|
1970
|
+
:py:class:`tme.matching_optimization.PartialLeastSquareDifference`
|
1971
|
+
:py:class:`tme.matching_optimization.MutualInformation`
|
1972
|
+
:py:class:`tme.matching_optimization.Envelope`
|
1973
|
+
:py:class:`tme.matching_optimization.Chamfer`
|
1974
|
+
"""
|
1975
|
+
normal_vectors, coordinates = [], np.asarray(coordinates, dtype=int)
|
1976
|
+
|
1977
|
+
if coordinates.ndim != 2:
|
1978
|
+
raise ValueError("Coordinates should have shape point x dimension.")
|
1979
|
+
if coordinates.shape[1] != self.data.ndim:
|
1980
|
+
raise ValueError(
|
1981
|
+
f"Expected coordinate dimension {self.data.ndim}, "
|
1982
|
+
f"got {coordinates.shape[1]}."
|
1983
|
+
)
|
1984
|
+
in_box = np.logical_and(
|
1985
|
+
coordinates < np.array(self.shape), coordinates >= 0
|
1986
|
+
).min(axis=1)
|
1987
|
+
coordinates = coordinates[in_box, :]
|
1988
|
+
for index in range(coordinates.shape[0]):
|
1989
|
+
point = coordinates[index, :]
|
1990
|
+
start = np.maximum(point - 1, 0)
|
1991
|
+
stop = np.minimum(point + 2, self.data.shape)
|
1992
|
+
slc = tuple(slice(*coords) for coords in zip(start, stop))
|
1993
|
+
|
1994
|
+
inner_facing = np.array(np.where(self.data[slc] > self.data[tuple(point)]))
|
1995
|
+
if inner_facing.size == 0:
|
1996
|
+
normal_vectors.append(np.zeros_like(point))
|
1997
|
+
continue
|
1998
|
+
inner_facing -= np.ones_like(point)[:, None]
|
1999
|
+
inner_facing = inner_facing.sum(axis=1)
|
2000
|
+
inner_facing = inner_facing / np.linalg.norm(inner_facing)
|
2001
|
+
normal_vectors.append(inner_facing)
|
2002
|
+
|
2003
|
+
return np.array(normal_vectors)
|
2004
|
+
|
2005
|
+
def core_mask(self) -> NDArray:
|
2006
|
+
"""
|
2007
|
+
Calculates a weighted core mask by performing iterative binary erosion on
|
2008
|
+
:py:attr:`Density.data`. In each iteration, all mask elements corresponding
|
2009
|
+
to a non-zero data elemnt are incremented by one. Therefore, a mask element
|
2010
|
+
with value N corresponds to a data value that remained non-zero for N iterations.
|
2011
|
+
Mask elements with high values are likely part of the core density [1]_.
|
2012
|
+
|
2013
|
+
Returns
|
2014
|
+
-------
|
2015
|
+
NDArray
|
2016
|
+
Core-weighted mask with shape of :py:attr:`Density.data`.
|
2017
|
+
|
2018
|
+
References
|
2019
|
+
----------
|
2020
|
+
.. [1] Gydo Zundert and Alexandre Bonvin. Fast and sensitive rigid-body
|
2021
|
+
fitting into cryo-em density maps with powerfit. AIMS Biophysics,
|
2022
|
+
2:73–87, 04 2015. doi:10.3934/biophy.2015.2.73
|
2023
|
+
"""
|
2024
|
+
core_indices = np.zeros(self.shape)
|
2025
|
+
eroded_mask = self.data > 0
|
2026
|
+
while eroded_mask.sum() > 0:
|
2027
|
+
core_indices += eroded_mask
|
2028
|
+
eroded_mask = binary_erosion(eroded_mask)
|
2029
|
+
return core_indices
|
2030
|
+
|
2031
|
+
@staticmethod
|
2032
|
+
def center_of_mass(arr: NDArray, cutoff: float = None) -> NDArray:
|
2033
|
+
"""
|
2034
|
+
Computes the center of mass of a numpy ndarray instance using all available
|
2035
|
+
elements. For template matching it typically makes sense to only input
|
2036
|
+
positive densities.
|
2037
|
+
|
2038
|
+
Parameters
|
2039
|
+
----------
|
2040
|
+
arr : NDArray
|
2041
|
+
Array to compute the center of mass of.
|
2042
|
+
cutoff : float, optional
|
2043
|
+
Densities less than or equal to cutoff are nullified for center
|
2044
|
+
of mass computation. By default considers all values.
|
2045
|
+
|
2046
|
+
Returns
|
2047
|
+
-------
|
2048
|
+
NDArray
|
2049
|
+
Center of mass with shape (arr.ndim).
|
2050
|
+
"""
|
2051
|
+
return NumpyFFTWBackend().center_of_mass(arr**2, cutoff)
|
2052
|
+
|
2053
|
+
@classmethod
|
2054
|
+
def match_densities(
|
2055
|
+
cls,
|
2056
|
+
target: "Density",
|
2057
|
+
template: "Density",
|
2058
|
+
cutoff_target: float = 0,
|
2059
|
+
cutoff_template: float = 0,
|
2060
|
+
scoring_method: str = "NormalizedCrossCorrelation",
|
2061
|
+
**kwargs,
|
2062
|
+
) -> Tuple["Density", NDArray, NDArray, NDArray]:
|
2063
|
+
"""
|
2064
|
+
Aligns two :py:class:`Density` instances target and template and returns
|
2065
|
+
the aligned template.
|
2066
|
+
|
2067
|
+
If voxel sizes of target and template dont match coordinates are scaled
|
2068
|
+
to the numerically smaller voxel size. Instances are prealigned based on their
|
2069
|
+
center of mass. Finally :py:meth:`tme.matching_optimization.optimize_match` is
|
2070
|
+
used to determine translation and rotation to map template to target.
|
2071
|
+
|
2072
|
+
Parameters
|
2073
|
+
----------
|
2074
|
+
target : Density
|
2075
|
+
The target map for alignment.
|
2076
|
+
template : Density
|
2077
|
+
The template that should be aligned to the target.
|
2078
|
+
cutoff_target : float, optional
|
2079
|
+
The cutoff value for the target map, by default 0.
|
2080
|
+
cutoff_template : float, optional
|
2081
|
+
The cutoff value for the template map, by default 0.
|
2082
|
+
scoring_method : str, optional
|
2083
|
+
The scoring method to use for alignment. See
|
2084
|
+
:py:class:`tme.matching_optimization.create_score_object` for available methods,
|
2085
|
+
by default "NormalizedCrossCorrelation".
|
2086
|
+
kwargs : dict, optional
|
2087
|
+
Optional keyword arguments passed to
|
2088
|
+
:py:meth:`tme.matching_optimization.optimize_match`.
|
2089
|
+
|
2090
|
+
Returns
|
2091
|
+
-------
|
2092
|
+
Tuple
|
2093
|
+
Tuple containing template aligned to target as :py:class:`Density` object,
|
2094
|
+
translation in voxels and rotation matrix used for the transformation.
|
2095
|
+
|
2096
|
+
Notes
|
2097
|
+
-----
|
2098
|
+
No densities below cutoff_template are present in the returned Density object.
|
2099
|
+
"""
|
2100
|
+
from .matching_utils import normalize_template
|
2101
|
+
from .matching_optimization import optimize_match, create_score_object
|
2102
|
+
|
2103
|
+
template_mask = template.empty
|
2104
|
+
template_mask.data.fill(1)
|
2105
|
+
|
2106
|
+
normalize_template(
|
2107
|
+
template=template.data,
|
2108
|
+
mask=template_mask.data,
|
2109
|
+
n_observations=template_mask.data.sum(),
|
2110
|
+
)
|
2111
|
+
|
2112
|
+
target_sampling_rate = np.array(target.sampling_rate)
|
2113
|
+
template_sampling_rate = np.array(template.sampling_rate)
|
2114
|
+
target_sampling_rate = np.repeat(
|
2115
|
+
target_sampling_rate, target.data.ndim // target_sampling_rate.size
|
2116
|
+
)
|
2117
|
+
template_sampling_rate = np.repeat(
|
2118
|
+
template_sampling_rate, template.data.ndim // template_sampling_rate.size
|
2119
|
+
)
|
2120
|
+
if not np.allclose(target_sampling_rate, template_sampling_rate):
|
2121
|
+
print(
|
2122
|
+
"Voxel size of target and template do not match. "
|
2123
|
+
"Using smaller voxel size for refinement."
|
2124
|
+
)
|
2125
|
+
|
2126
|
+
target_coordinates = target.to_pointcloud(cutoff_target)
|
2127
|
+
|
2128
|
+
template_coordinates = template.to_pointcloud(cutoff_template)
|
2129
|
+
template_weights = template.data[tuple(template_coordinates)]
|
2130
|
+
|
2131
|
+
refinement_sampling_rate = np.minimum(
|
2132
|
+
target_sampling_rate, template_sampling_rate
|
2133
|
+
)
|
2134
|
+
target_scaling = np.divide(target_sampling_rate, refinement_sampling_rate)
|
2135
|
+
template_scaling = np.divide(template_sampling_rate, refinement_sampling_rate)
|
2136
|
+
target_coordinates = target_coordinates * target_scaling[:, None]
|
2137
|
+
template_coordinates = template_coordinates * template_scaling[:, None]
|
2138
|
+
|
2139
|
+
mass_center_difference = np.subtract(
|
2140
|
+
cls.center_of_mass(target.data, cutoff_target),
|
2141
|
+
cls.center_of_mass(template.data, cutoff_template),
|
2142
|
+
).astype(int)
|
2143
|
+
template_coordinates += mass_center_difference[:, None]
|
2144
|
+
|
2145
|
+
coordinates_mask = template_mask.to_pointcloud()
|
2146
|
+
coordinates_mask = coordinates_mask * template_scaling[:, None]
|
2147
|
+
coordinates_mask += mass_center_difference[:, None]
|
2148
|
+
|
2149
|
+
score_object = create_score_object(
|
2150
|
+
score=scoring_method,
|
2151
|
+
target=target.data,
|
2152
|
+
template_coordinates=template_coordinates,
|
2153
|
+
template_mask_coordinates=coordinates_mask,
|
2154
|
+
template_weights=template_weights,
|
2155
|
+
sampling_rate=np.ones(template.data.ndim),
|
2156
|
+
)
|
2157
|
+
|
2158
|
+
translation, rotation_matrix, score = optimize_match(
|
2159
|
+
score_object=score_object, **kwargs
|
2160
|
+
)
|
2161
|
+
|
2162
|
+
translation += mass_center_difference
|
2163
|
+
translation = np.divide(translation, template_scaling)
|
2164
|
+
|
2165
|
+
template.sampling_rate = template_sampling_rate.copy()
|
2166
|
+
ret = template.rigid_transform(
|
2167
|
+
rotation_matrix=rotation_matrix, use_geometric_center=False
|
2168
|
+
)
|
2169
|
+
ret.origin = target.origin.copy()
|
2170
|
+
ret.origin = ret.origin + np.multiply(translation, target_sampling_rate)
|
2171
|
+
|
2172
|
+
return ret, translation, rotation_matrix
|
2173
|
+
|
2174
|
+
@classmethod
|
2175
|
+
def match_structure_to_density(
|
2176
|
+
cls,
|
2177
|
+
target: "Density",
|
2178
|
+
template: "Structure",
|
2179
|
+
cutoff_target: float = 0,
|
2180
|
+
scoring_method: str = "NormalizedCrossCorrelation",
|
2181
|
+
optimization_method: str = "basinhopping",
|
2182
|
+
maxiter: int = 500,
|
2183
|
+
) -> Tuple["Structure", NDArray, NDArray]:
|
2184
|
+
"""
|
2185
|
+
Aligns a :py:class:`tme.structure.Structure` template to :py:class:`Density`
|
2186
|
+
target and returns an aligned :py:class:`tme.structure.Structure` instance.
|
2187
|
+
|
2188
|
+
If voxel sizes of target and template dont match coordinates are scaled
|
2189
|
+
to the numerically smaller voxel size. Prealignment is done by center's
|
2190
|
+
of mass. Finally :py:class:`tme.matching_optimization.optimize_match` is used to
|
2191
|
+
determine translation and rotation to match a template to target.
|
2192
|
+
|
2193
|
+
Parameters
|
2194
|
+
----------
|
2195
|
+
target : Density
|
2196
|
+
The target map for template matching.
|
2197
|
+
template : Structure
|
2198
|
+
The template that should be aligned to the target.
|
2199
|
+
cutoff_target : float, optional
|
2200
|
+
The cutoff value for the target map, by default 0.
|
2201
|
+
cutoff_template : float, optional
|
2202
|
+
The cutoff value for the template map, by default 0.
|
2203
|
+
scoring_method : str, optional
|
2204
|
+
The scoring method to use for template matching. See
|
2205
|
+
:py:class:`tme.matching_optimization.create_score_object` for available methods,
|
2206
|
+
by default "NormalizedCrossCorrelation".
|
2207
|
+
optimization_method : str, optional
|
2208
|
+
Optimizer that is used.
|
2209
|
+
See :py:meth:`tme.matching_optimization.optimize_match`.
|
2210
|
+
maxiter : int, optional
|
2211
|
+
Maximum number of iterations for the optimizer.
|
2212
|
+
See :py:meth:`tme.matching_optimization.optimize_match`.
|
2213
|
+
|
2214
|
+
Returns
|
2215
|
+
-------
|
2216
|
+
Structure
|
2217
|
+
Tuple containing template aligned to target as
|
2218
|
+
:py:class:`tme.structure.Structure` object, translation and rotation
|
2219
|
+
matrix used for the transformation.
|
2220
|
+
|
2221
|
+
Notes
|
2222
|
+
-----
|
2223
|
+
Translation and rotation are in xyz format, different from
|
2224
|
+
:py:meth:`match_densities`, which is zyx.
|
2225
|
+
"""
|
2226
|
+
template_density = cls.from_structure(
|
2227
|
+
filename_or_structure=template, sampling_rate=target.sampling_rate
|
2228
|
+
)
|
2229
|
+
|
2230
|
+
ret, translation, rotation_matrix = cls.match_densities(
|
2231
|
+
target=target,
|
2232
|
+
template=template_density,
|
2233
|
+
cutoff_target=cutoff_target,
|
2234
|
+
cutoff_template=0,
|
2235
|
+
scoring_method=scoring_method,
|
2236
|
+
optimization_method=optimization_method,
|
2237
|
+
maxiter=maxiter,
|
2238
|
+
)
|
2239
|
+
out = template.copy()
|
2240
|
+
final_translation = np.subtract(ret.origin, template_density.origin)
|
2241
|
+
|
2242
|
+
final_translation = final_translation
|
2243
|
+
rotation_matrix = rotation_matrix
|
2244
|
+
|
2245
|
+
out = out.rigid_transform(
|
2246
|
+
translation=final_translation, rotation_matrix=rotation_matrix
|
2247
|
+
)
|
2248
|
+
|
2249
|
+
return out, final_translation, rotation_matrix
|
2250
|
+
|
2251
|
+
def align_to_axis(self, data: NDArray = None, axis: int = 2, flip: bool = False):
|
2252
|
+
if data is None:
|
2253
|
+
data = self.data
|
2254
|
+
|
2255
|
+
coordinates = np.array(np.where(data > 0))
|
2256
|
+
weights = self.data[tuple(coordinates)]
|
2257
|
+
return align_to_axis(coordinates.T, weights=weights, axis=axis, flip=flip)
|
2258
|
+
|
2259
|
+
|
2260
|
+
def is_gzipped(filename: str) -> bool:
|
2261
|
+
"""Check if a file is a gzip file by reading its magic number."""
|
2262
|
+
with open(filename, "rb") as f:
|
2263
|
+
return f.read(2) == b"\x1f\x8b"
|