pytme 0.1.5__cp311-cp311-macosx_14_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytme-0.1.5.data/scripts/estimate_ram_usage.py +81 -0
- pytme-0.1.5.data/scripts/match_template.py +744 -0
- pytme-0.1.5.data/scripts/postprocess.py +279 -0
- pytme-0.1.5.data/scripts/preprocess.py +93 -0
- pytme-0.1.5.data/scripts/preprocessor_gui.py +729 -0
- pytme-0.1.5.dist-info/LICENSE +153 -0
- pytme-0.1.5.dist-info/METADATA +69 -0
- pytme-0.1.5.dist-info/RECORD +63 -0
- pytme-0.1.5.dist-info/WHEEL +5 -0
- pytme-0.1.5.dist-info/entry_points.txt +6 -0
- pytme-0.1.5.dist-info/top_level.txt +2 -0
- scripts/__init__.py +0 -0
- scripts/estimate_ram_usage.py +81 -0
- scripts/match_template.py +744 -0
- scripts/match_template_devel.py +788 -0
- scripts/postprocess.py +279 -0
- scripts/preprocess.py +93 -0
- scripts/preprocessor_gui.py +729 -0
- tme/__init__.py +6 -0
- tme/__version__.py +1 -0
- tme/analyzer.py +1144 -0
- tme/backends/__init__.py +134 -0
- tme/backends/cupy_backend.py +309 -0
- tme/backends/matching_backend.py +1154 -0
- tme/backends/npfftw_backend.py +763 -0
- tme/backends/pytorch_backend.py +526 -0
- tme/data/__init__.py +0 -0
- tme/data/c48n309.npy +0 -0
- tme/data/c48n527.npy +0 -0
- tme/data/c48n9.npy +0 -0
- tme/data/c48u1.npy +0 -0
- tme/data/c48u1153.npy +0 -0
- tme/data/c48u1201.npy +0 -0
- tme/data/c48u1641.npy +0 -0
- tme/data/c48u181.npy +0 -0
- tme/data/c48u2219.npy +0 -0
- tme/data/c48u27.npy +0 -0
- tme/data/c48u2947.npy +0 -0
- tme/data/c48u3733.npy +0 -0
- tme/data/c48u4749.npy +0 -0
- tme/data/c48u5879.npy +0 -0
- tme/data/c48u7111.npy +0 -0
- tme/data/c48u815.npy +0 -0
- tme/data/c48u83.npy +0 -0
- tme/data/c48u8649.npy +0 -0
- tme/data/c600v.npy +0 -0
- tme/data/c600vc.npy +0 -0
- tme/data/metadata.yaml +80 -0
- tme/data/quat_to_numpy.py +42 -0
- tme/data/scattering_factors.pickle +0 -0
- tme/density.py +2314 -0
- tme/extensions.cpython-311-darwin.so +0 -0
- tme/helpers.py +881 -0
- tme/matching_data.py +377 -0
- tme/matching_exhaustive.py +1553 -0
- tme/matching_memory.py +382 -0
- tme/matching_optimization.py +1123 -0
- tme/matching_utils.py +1180 -0
- tme/parser.py +429 -0
- tme/preprocessor.py +1291 -0
- tme/scoring.py +866 -0
- tme/structure.py +1428 -0
- tme/types.py +10 -0
tme/density.py
ADDED
@@ -0,0 +1,2314 @@
|
|
1
|
+
""" Implements class to represent electron density maps.
|
2
|
+
|
3
|
+
Copyright (c) 2023 European Molecular Biology Laboratory
|
4
|
+
|
5
|
+
Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
|
6
|
+
"""
|
7
|
+
|
8
|
+
import warnings
|
9
|
+
from io import BytesIO
|
10
|
+
from copy import deepcopy
|
11
|
+
from gzip import open as gzip_open
|
12
|
+
from typing import Tuple, Dict, Set
|
13
|
+
from os.path import splitext, basename
|
14
|
+
|
15
|
+
import mrcfile
|
16
|
+
import numpy as np
|
17
|
+
import skimage.io as skio
|
18
|
+
|
19
|
+
from scipy.ndimage import (
|
20
|
+
laplace,
|
21
|
+
generic_gradient_magnitude,
|
22
|
+
minimum_filter,
|
23
|
+
sobel,
|
24
|
+
binary_erosion,
|
25
|
+
zoom,
|
26
|
+
)
|
27
|
+
from scipy.spatial import ConvexHull
|
28
|
+
|
29
|
+
from .matching_optimization import FitRefinement
|
30
|
+
from .structure import Structure
|
31
|
+
from .matching_utils import (
|
32
|
+
minimum_enclosing_box,
|
33
|
+
array_to_memmap,
|
34
|
+
memmap_to_array,
|
35
|
+
)
|
36
|
+
from .types import NDArray
|
37
|
+
from .helpers import is_gzipped
|
38
|
+
from .backends import NumpyFFTWBackend
|
39
|
+
|
40
|
+
|
41
|
+
class Density:
|
42
|
+
"""
|
43
|
+
Contains electron density data and implements operations on it.
|
44
|
+
|
45
|
+
Parameters
|
46
|
+
----------
|
47
|
+
data : NDArray
|
48
|
+
Electron density data.
|
49
|
+
origin : NDArray, optional
|
50
|
+
Origin of the coordinate system. Defaults to zero.
|
51
|
+
sampling_rate : NDArray, optional
|
52
|
+
Sampling rate along data axis. Defaults to one.
|
53
|
+
metadata : dict, optional
|
54
|
+
Dictionary with metadata information, empty by default.
|
55
|
+
|
56
|
+
Raises
|
57
|
+
------
|
58
|
+
ValueError
|
59
|
+
The metadata parameter is not a dictionary.
|
60
|
+
|
61
|
+
Examples
|
62
|
+
--------
|
63
|
+
The following achieves the minimal definition of a :py:class:`Density` instance.
|
64
|
+
|
65
|
+
>>> import numpy as np
|
66
|
+
>>> from tme import Density
|
67
|
+
>>> data = np.random.rand(50,70,40)
|
68
|
+
>>> Density(data = data)
|
69
|
+
|
70
|
+
Optional parameters are ``origin`` and ``sampling_rate`` that correspond
|
71
|
+
to the coordinate system reference and the edge length per axis element,
|
72
|
+
as well as the ``metadata`` dictionary. By default,
|
73
|
+
:py:attr:`Density.origin` is set to zero and :py:attr:`Density.sampling_rate`
|
74
|
+
to 1. If provided, origin or sampling_rate either need to be a single value:
|
75
|
+
|
76
|
+
>>> Density(data = data, origin = 0, sampling_rate = 1)
|
77
|
+
|
78
|
+
Be specified along each data axis:
|
79
|
+
|
80
|
+
>>> Density(data = data, origin = (0, 0, 0), sampling_rate = (1.5, 1.1, 1.2))
|
81
|
+
|
82
|
+
Or a combination of both:
|
83
|
+
|
84
|
+
>>> Density(data = data, origin = 0, sampling_rate = (1.5, 1.1, 1.2))
|
85
|
+
"""
|
86
|
+
|
87
|
+
def __init__(
|
88
|
+
self,
|
89
|
+
data: NDArray,
|
90
|
+
origin: NDArray = None,
|
91
|
+
sampling_rate: NDArray = None,
|
92
|
+
metadata: Dict = {},
|
93
|
+
):
|
94
|
+
origin = np.zeros(data.ndim) if origin is None else origin
|
95
|
+
sampling_rate = 1 if sampling_rate is None else sampling_rate
|
96
|
+
origin, sampling_rate = np.asarray(origin), np.asarray(sampling_rate)
|
97
|
+
origin = np.repeat(origin, data.ndim // origin.size)
|
98
|
+
sampling_rate = np.repeat(sampling_rate, data.ndim // sampling_rate.size)
|
99
|
+
|
100
|
+
if sampling_rate.size != data.ndim:
|
101
|
+
raise ValueError(
|
102
|
+
"sampling_rate size should be 1 or "
|
103
|
+
f"{data.ndim}, not {sampling_rate.size}."
|
104
|
+
)
|
105
|
+
if origin.size != data.ndim:
|
106
|
+
raise ValueError(f"Expected origin size : {data.ndim}, got {origin.size}.")
|
107
|
+
if type(metadata) != dict:
|
108
|
+
raise ValueError("Argument metadata has to be of class dict.")
|
109
|
+
|
110
|
+
self.data, self.origin, self.sampling_rate = data, origin, sampling_rate
|
111
|
+
self.metadata = metadata
|
112
|
+
|
113
|
+
def __repr__(self):
|
114
|
+
response = "Density object at {}\nOrigin: {}, sampling_rate: {}, Shape: {}"
|
115
|
+
return response.format(
|
116
|
+
hex(id(self)),
|
117
|
+
tuple(np.round(self.origin, 3)),
|
118
|
+
tuple(np.round(self.sampling_rate, 3)),
|
119
|
+
self.shape,
|
120
|
+
)
|
121
|
+
|
122
|
+
@classmethod
|
123
|
+
def from_file(
|
124
|
+
cls, filename: str, subset: Tuple[slice] = None, use_memmap: bool = False
|
125
|
+
) -> "Density":
|
126
|
+
"""
|
127
|
+
Reads in a file and converts it into :py:class:`Density` instance.
|
128
|
+
|
129
|
+
Parameters
|
130
|
+
----------
|
131
|
+
filename : str
|
132
|
+
Path to a file in CCP4/MRC, EM or a format supported by skimage.io.imread.
|
133
|
+
The file can be gzip compressed.
|
134
|
+
subset : tuple of slices, optional
|
135
|
+
Slices representing the desired subset along each dimension.
|
136
|
+
use_memmap : bool, optional
|
137
|
+
Whether the Density objects data attribute should be memmory mapped.
|
138
|
+
|
139
|
+
Returns
|
140
|
+
-------
|
141
|
+
Density
|
142
|
+
An instance of the :py:class:`Density` class.
|
143
|
+
|
144
|
+
References
|
145
|
+
----------
|
146
|
+
.. [1] Burnley T et al., Acta Cryst. D, 2017
|
147
|
+
.. [2] Nickell S. et al, Journal of Structural Biology, 2005.
|
148
|
+
.. [3] https://scikit-image.org/docs/stable/api/skimage.io.html
|
149
|
+
|
150
|
+
Examples
|
151
|
+
--------
|
152
|
+
:py:meth:`Density.from_file` reads files in CCP4/MRC, EM, or a format supported
|
153
|
+
by skimage.io.imread and converts them into a :py:class:`Density` instance. The
|
154
|
+
following outlines how to read a file in the CCP4/MRC format [1]_:
|
155
|
+
|
156
|
+
>>> from tme import Density
|
157
|
+
>>> Density.from_file("/path/to/file.mrc")
|
158
|
+
|
159
|
+
In some cases, you might want to read only a specific subset of the data.
|
160
|
+
This can be achieved by passing a tuple of slices to the ``subset`` parameter.
|
161
|
+
For example, to read only the first 50 voxels along each dimension:
|
162
|
+
|
163
|
+
>>> subset_slices = (slice(0, 50), slice(0, 50), slice(0, 50))
|
164
|
+
>>> Density.from_file("/path/to/file.mrc", subset=subset_slices)
|
165
|
+
|
166
|
+
For large density maps, memory mapping can be used to read the file directly
|
167
|
+
from disk without loading it entirely into memory. This is particularly useful
|
168
|
+
for large datasets or when working with limited memory resources:
|
169
|
+
|
170
|
+
>>> Density.from_file("/path/to/large_file.mrc", use_memmap=True)
|
171
|
+
|
172
|
+
Note that use_memmap will be ignored if the file is gzip compressed.
|
173
|
+
|
174
|
+
If the input file has an `.em` or `.em.gz` extension, it will automatically
|
175
|
+
be parsed as EM file [2]_.
|
176
|
+
|
177
|
+
>>> Density.from_file("/path/to/file.em")
|
178
|
+
>>> Density.from_file("/path/to/file.em.gz")
|
179
|
+
|
180
|
+
If the file format is not CCP4/MRC or EM, :py:meth:`Density.from_file` attempts
|
181
|
+
to use skimage.io.imread to read the file [3]_. This fallback does not extract
|
182
|
+
origin or sampling_rate information from the file:
|
183
|
+
|
184
|
+
>>> Density.from_file("/path/to/other_format.tif")
|
185
|
+
|
186
|
+
Notes
|
187
|
+
-----
|
188
|
+
If ``filename`` ends with ".em" or ".em.gz" the method will parse it as EM file.
|
189
|
+
Otherwise it defaults to the CCP4/MRC format and on failure, defaults to
|
190
|
+
skimage.io.imread regardless of the extension. Currently, the later does not
|
191
|
+
extract origin or sampling_rate information from the file.
|
192
|
+
|
193
|
+
See Also
|
194
|
+
--------
|
195
|
+
:py:meth:`Density.to_file`
|
196
|
+
|
197
|
+
"""
|
198
|
+
try:
|
199
|
+
func = cls._load_mrc
|
200
|
+
if filename.endswith(".em") or filename.endswith(".em.gz"):
|
201
|
+
func = cls._load_em
|
202
|
+
data, origin, sampling_rate = func(
|
203
|
+
filename=filename, subset=subset, use_memmap=use_memmap
|
204
|
+
)
|
205
|
+
except ValueError:
|
206
|
+
data, origin, sampling_rate = cls._load_skio(filename=filename)
|
207
|
+
if subset is not None:
|
208
|
+
cls._validate_slices(slices=subset, shape=data.shape)
|
209
|
+
data = data[subset].copy()
|
210
|
+
|
211
|
+
return cls(data=data, origin=origin, sampling_rate=sampling_rate)
|
212
|
+
|
213
|
+
@classmethod
|
214
|
+
def _load_mrc(
|
215
|
+
cls, filename: str, subset: Tuple[int] = None, use_memmap: bool = False
|
216
|
+
) -> Tuple[NDArray]:
|
217
|
+
"""
|
218
|
+
Extracts data from a CCP4/MRC file.
|
219
|
+
|
220
|
+
Parameters
|
221
|
+
----------
|
222
|
+
filename : str
|
223
|
+
Path to a file in CCP4/MRC format.
|
224
|
+
subset : tuple of slices, optional
|
225
|
+
Slices representing the desired subset along each dimension.
|
226
|
+
use_memmap : bool, optional
|
227
|
+
Whether the Density objects data attribute should be memmory mapped.
|
228
|
+
|
229
|
+
Returns
|
230
|
+
-------
|
231
|
+
NDArray
|
232
|
+
The data attribute of the CCP4/MRC file.
|
233
|
+
NDArray
|
234
|
+
The coordinate origin of the data.
|
235
|
+
NDArray
|
236
|
+
The sampling rate of the data.
|
237
|
+
|
238
|
+
References
|
239
|
+
----------
|
240
|
+
.. [1] Burnley T, Palmer C & Winn M (2017) Recent developments in the
|
241
|
+
CCP-EM software suite. Acta Cryst. D73:469–477.
|
242
|
+
doi: 10.1107/S2059798317007859
|
243
|
+
|
244
|
+
Raises
|
245
|
+
------
|
246
|
+
ValueError
|
247
|
+
If the mrcfile is malformatted.
|
248
|
+
If the subset starts below zero, exceeds the data dimension or does not
|
249
|
+
have the same length as the data dimensions.
|
250
|
+
|
251
|
+
See Also
|
252
|
+
--------
|
253
|
+
:py:meth:`Density.from_file`
|
254
|
+
|
255
|
+
"""
|
256
|
+
with mrcfile.open(filename, header_only=True) as mrc:
|
257
|
+
data_shape = mrc.header.nz, mrc.header.ny, mrc.header.nx
|
258
|
+
data_type = mrcfile.utils.data_dtype_from_header(mrc.header)
|
259
|
+
|
260
|
+
# All map related parameters should be in zyx order
|
261
|
+
origin = (
|
262
|
+
mrc.header["origin"]
|
263
|
+
.astype([("x", "<f4"), ("y", "<f4"), ("z", "<f4")])
|
264
|
+
.view(("<f4", 3))
|
265
|
+
)
|
266
|
+
origin = origin[::-1]
|
267
|
+
|
268
|
+
# nx := column; ny := row; nz := section
|
269
|
+
start = np.array(
|
270
|
+
[
|
271
|
+
int(mrc.header["nxstart"]),
|
272
|
+
int(mrc.header["nystart"]),
|
273
|
+
int(mrc.header["nzstart"]),
|
274
|
+
]
|
275
|
+
)
|
276
|
+
|
277
|
+
crs_index = (
|
278
|
+
np.array(
|
279
|
+
[
|
280
|
+
int(mrc.header["mapc"]),
|
281
|
+
int(mrc.header["mapr"]),
|
282
|
+
int(mrc.header["maps"]),
|
283
|
+
]
|
284
|
+
)
|
285
|
+
- 1
|
286
|
+
)
|
287
|
+
|
288
|
+
# mapc := column; mapr := row; maps := section;
|
289
|
+
if not (0 in crs_index and 1 in crs_index and 2 in crs_index):
|
290
|
+
raise ValueError(f"Malformatted CRS array in {filename}")
|
291
|
+
|
292
|
+
sampling_rate = mrc.voxel_size.astype(
|
293
|
+
[("x", "<f4"), ("y", "<f4"), ("z", "<f4")]
|
294
|
+
).view(("<f4", 3))
|
295
|
+
sampling_rate = sampling_rate[::-1]
|
296
|
+
sampling_rate = np.array(sampling_rate)
|
297
|
+
|
298
|
+
if np.all(origin == start):
|
299
|
+
pass
|
300
|
+
elif np.all(origin == 0) and not np.all(start == 0):
|
301
|
+
origin = np.multiply(start, sampling_rate)
|
302
|
+
elif np.all(
|
303
|
+
np.abs(origin.astype(int))
|
304
|
+
!= np.abs((start * sampling_rate).astype(int))
|
305
|
+
) and not np.all(start == 0):
|
306
|
+
origin = np.multiply(start, sampling_rate)
|
307
|
+
|
308
|
+
if is_gzipped(filename):
|
309
|
+
if use_memmap:
|
310
|
+
warnings.warn(
|
311
|
+
f"Cannot open gzipped file {filename} as memmap."
|
312
|
+
f" Please gunzip {filename} to use memmap functionality."
|
313
|
+
)
|
314
|
+
use_memmap = False
|
315
|
+
|
316
|
+
if subset is not None:
|
317
|
+
subset_shape = [x.stop - x.start for x in subset]
|
318
|
+
if np.allclose(subset_shape, data_shape):
|
319
|
+
return cls._load_mrc(
|
320
|
+
filename=filename, subset=None, use_memmap=use_memmap
|
321
|
+
)
|
322
|
+
|
323
|
+
data = cls._read_binary_subset(
|
324
|
+
filename=filename,
|
325
|
+
slices=subset,
|
326
|
+
data_shape=data_shape,
|
327
|
+
dtype=data_type,
|
328
|
+
header_size=1024,
|
329
|
+
)
|
330
|
+
return data, origin, sampling_rate
|
331
|
+
|
332
|
+
if not use_memmap:
|
333
|
+
with mrcfile.open(filename, header_only=False) as mrc:
|
334
|
+
data = mrc.data.astype(np.float32, copy=False)
|
335
|
+
else:
|
336
|
+
with mrcfile.mrcmemmap.MrcMemmap(filename, header_only=False) as mrc:
|
337
|
+
data = mrc.data
|
338
|
+
|
339
|
+
if not np.all(crs_index == (0, 1, 2)):
|
340
|
+
data.setflags(write=True)
|
341
|
+
data = np.transpose(data, crs_index)
|
342
|
+
start = np.take(start, crs_index)
|
343
|
+
|
344
|
+
return data, origin, sampling_rate
|
345
|
+
|
346
|
+
@classmethod
|
347
|
+
def _load_em(
|
348
|
+
cls, filename: str, subset: Tuple[int] = None, use_memmap: bool = False
|
349
|
+
) -> Tuple[NDArray]:
|
350
|
+
"""
|
351
|
+
Extracts data from a EM file.
|
352
|
+
|
353
|
+
Parameters
|
354
|
+
----------
|
355
|
+
filename : str
|
356
|
+
Path to a file in EM format.
|
357
|
+
subset : tuple of slices, optional
|
358
|
+
Slices representing the desired subset along each dimension.
|
359
|
+
use_memmap : bool, optional
|
360
|
+
Whether the Density objects data attribute should be memmory mapped.
|
361
|
+
|
362
|
+
Returns
|
363
|
+
-------
|
364
|
+
NDArray
|
365
|
+
The data attribute of the EM file.
|
366
|
+
NDArray
|
367
|
+
The coordinate origin of the data.
|
368
|
+
NDArray
|
369
|
+
The sampling rate of the data.
|
370
|
+
|
371
|
+
References
|
372
|
+
----------
|
373
|
+
.. [1] Nickell S. et al, Journal of Structural Biology, 2005.
|
374
|
+
|
375
|
+
Warns
|
376
|
+
-----
|
377
|
+
Warns if the pixel size is zero.
|
378
|
+
|
379
|
+
Notes
|
380
|
+
-----
|
381
|
+
A pixel size of zero will be treated as missing value and changed to one. This
|
382
|
+
function does not yet extract an origin like :py:meth:`Density._load_mrc`.
|
383
|
+
|
384
|
+
See Also
|
385
|
+
--------
|
386
|
+
:py:meth:`Density.from_file`
|
387
|
+
"""
|
388
|
+
DATA_TYPE_CODING = {
|
389
|
+
1: np.byte,
|
390
|
+
2: np.int16,
|
391
|
+
3: np.int32,
|
392
|
+
5: np.float32,
|
393
|
+
6: np.float64,
|
394
|
+
8: np.complex64,
|
395
|
+
9: np.complex128,
|
396
|
+
}
|
397
|
+
|
398
|
+
func = gzip_open if is_gzipped(filename) else open
|
399
|
+
with func(filename, mode="rb") as f:
|
400
|
+
if is_gzipped(filename):
|
401
|
+
f = BytesIO(f.read())
|
402
|
+
|
403
|
+
f.seek(3, 1)
|
404
|
+
data_type_code = np.frombuffer(f.read(1), dtype="<i1")[0]
|
405
|
+
data_type = DATA_TYPE_CODING.get(data_type_code)
|
406
|
+
|
407
|
+
data_shape = np.frombuffer(f.read(3 * 4), dtype="<i4")[::-1]
|
408
|
+
|
409
|
+
f.seek(80, 1)
|
410
|
+
user_params = np.frombuffer(f.read(40 * 4), dtype="<i4")
|
411
|
+
|
412
|
+
pixel_size = user_params[6] / 1000.0
|
413
|
+
f.seek(256, 1)
|
414
|
+
|
415
|
+
if use_memmap and subset is None:
|
416
|
+
data = np.memmap(f, dtype=data_type, mode="r", offset=f.tell()).reshape(
|
417
|
+
data_shape
|
418
|
+
)
|
419
|
+
elif subset is None:
|
420
|
+
data_size = np.prod(data_shape) * np.dtype(data_type).itemsize
|
421
|
+
data = np.frombuffer(f.read(data_size), dtype=data_type).reshape(
|
422
|
+
data_shape
|
423
|
+
)
|
424
|
+
data = data.astype(np.float32)
|
425
|
+
else:
|
426
|
+
subset_shape = [x.stop - x.start for x in subset]
|
427
|
+
if np.allclose(subset_shape, data_shape):
|
428
|
+
return cls._load_em(
|
429
|
+
filename=filename, subset=None, use_memmap=use_memmap
|
430
|
+
)
|
431
|
+
|
432
|
+
data = cls._read_binary_subset(
|
433
|
+
filename=filename,
|
434
|
+
slices=subset,
|
435
|
+
data_shape=data_shape,
|
436
|
+
dtype=data_type(),
|
437
|
+
header_size=f.tell(),
|
438
|
+
)
|
439
|
+
|
440
|
+
origin = np.zeros(3, dtype=data.dtype)
|
441
|
+
|
442
|
+
if pixel_size == 0:
|
443
|
+
warnings.warn(
|
444
|
+
f"Got invalid sampling rate {pixel_size}, overwriting it to 1."
|
445
|
+
)
|
446
|
+
pixel_size = 1
|
447
|
+
sampling_rate = np.repeat(pixel_size, data.ndim).astype(data.dtype)
|
448
|
+
|
449
|
+
return data, origin, sampling_rate
|
450
|
+
|
451
|
+
@staticmethod
|
452
|
+
def _validate_slices(slices: Tuple[slice], shape: Tuple[int]):
|
453
|
+
"""
|
454
|
+
Validate whether the given slices fit within the provided data shape.
|
455
|
+
|
456
|
+
Parameters
|
457
|
+
----------
|
458
|
+
slices : Tuple[slice]
|
459
|
+
A tuple of slice objects, one per dimension of the data.
|
460
|
+
shape : Tuple[int]
|
461
|
+
The shape of the data being sliced, as a tuple of integers.
|
462
|
+
|
463
|
+
Raises
|
464
|
+
------
|
465
|
+
ValueError
|
466
|
+
- If the length of `slices` doesn't match the dimension of shape.
|
467
|
+
- If any slice has a stop value exceeding any dimension in shape.
|
468
|
+
- If any slice has a stop value that is negative.
|
469
|
+
"""
|
470
|
+
|
471
|
+
n_dims = len(shape)
|
472
|
+
if len(slices) != n_dims:
|
473
|
+
raise ValueError(
|
474
|
+
f"Expected length of slices : {n_dims}, got : {len(slices)}"
|
475
|
+
)
|
476
|
+
|
477
|
+
if any([slices[i].stop > shape[i] for i in range(n_dims)]):
|
478
|
+
raise ValueError(f"Subset exceeds data dimensions ({shape}).")
|
479
|
+
|
480
|
+
if any([slices[i].stop < 0 for i in range(n_dims)]):
|
481
|
+
raise ValueError("Subsets have to be non-negative.")
|
482
|
+
|
483
|
+
@classmethod
|
484
|
+
def _read_binary_subset(
|
485
|
+
cls,
|
486
|
+
filename: str,
|
487
|
+
slices: Tuple[slice],
|
488
|
+
data_shape: Tuple[int],
|
489
|
+
dtype: type,
|
490
|
+
header_size: int,
|
491
|
+
) -> NDArray:
|
492
|
+
"""
|
493
|
+
Read a subset of data from a binary file with a header.
|
494
|
+
|
495
|
+
Parameters
|
496
|
+
----------
|
497
|
+
filename : str
|
498
|
+
Path to the binary file.
|
499
|
+
slices : tuple of slice objects
|
500
|
+
Slices representing the desired subset in each dimension.
|
501
|
+
data_shape : tuple of ints
|
502
|
+
Shape of the complete dataset in the file.
|
503
|
+
dtype : numpy dtype
|
504
|
+
Data type of the dataset in the file.
|
505
|
+
header_size : int
|
506
|
+
Size of the file's header in bytes.
|
507
|
+
|
508
|
+
Returns
|
509
|
+
-------
|
510
|
+
NDArray
|
511
|
+
Subset of the dataset as specified by the slices.
|
512
|
+
|
513
|
+
Raises
|
514
|
+
------
|
515
|
+
NotImplementedError
|
516
|
+
If the data is not three dimensional.
|
517
|
+
|
518
|
+
See Also
|
519
|
+
--------
|
520
|
+
:py:meth:`Density._load_mrc`
|
521
|
+
:py:meth:`Density._load_em`
|
522
|
+
"""
|
523
|
+
n_dims = len(data_shape)
|
524
|
+
if n_dims != 3:
|
525
|
+
raise NotImplementedError("Only 3-dimensional data can be subsetted.")
|
526
|
+
|
527
|
+
cls._validate_slices(slices=slices, shape=data_shape)
|
528
|
+
bytes_per_item = dtype.itemsize
|
529
|
+
|
530
|
+
subset_shape = [s.stop - s.start for s in slices]
|
531
|
+
subset_data = np.empty(subset_shape, dtype=dtype)
|
532
|
+
|
533
|
+
row_bytes = (slices[2].stop - slices[2].start) * bytes_per_item
|
534
|
+
full_row_bytes = data_shape[2] * bytes_per_item
|
535
|
+
x_offset = slices[2].start * bytes_per_item
|
536
|
+
|
537
|
+
func = gzip_open if is_gzipped(filename) else open
|
538
|
+
with func(filename, mode="rb") as f:
|
539
|
+
if is_gzipped(filename):
|
540
|
+
f = BytesIO(f.read())
|
541
|
+
|
542
|
+
for z in range(slices[0].start, slices[0].stop):
|
543
|
+
base_offset_z = header_size + z * data_shape[1] * full_row_bytes
|
544
|
+
|
545
|
+
for y in range(slices[1].start, slices[1].stop):
|
546
|
+
offset = base_offset_z + y * full_row_bytes + x_offset
|
547
|
+
f.seek(offset)
|
548
|
+
row = np.frombuffer(f.read(row_bytes), dtype=dtype)
|
549
|
+
subset_data[z - slices[0].start, y - slices[1].start] = row
|
550
|
+
|
551
|
+
return subset_data
|
552
|
+
|
553
|
+
@staticmethod
|
554
|
+
def _load_skio(filename: str) -> Tuple[NDArray]:
|
555
|
+
"""
|
556
|
+
Uses skimage.io.imread to extract data from filename.
|
557
|
+
|
558
|
+
Parameters
|
559
|
+
----------
|
560
|
+
filename : str
|
561
|
+
Path to a file whose format is supported by skimage.io.imread.
|
562
|
+
|
563
|
+
Returns
|
564
|
+
-------
|
565
|
+
NDArray
|
566
|
+
The data attribute of the file.
|
567
|
+
NDArray
|
568
|
+
The coordinate origin of the data.
|
569
|
+
NDArray
|
570
|
+
The sampling rate of the data.
|
571
|
+
|
572
|
+
References
|
573
|
+
----------
|
574
|
+
.. [1] https://scikit-image.org/docs/stable/api/skimage.io.html
|
575
|
+
|
576
|
+
Warns
|
577
|
+
-----
|
578
|
+
Warns that origin and sampling_rate are not yet extracted from ``filename``.
|
579
|
+
|
580
|
+
See Also
|
581
|
+
--------
|
582
|
+
:py:meth:`Density.from_file`
|
583
|
+
"""
|
584
|
+
swap = filename
|
585
|
+
if is_gzipped(filename):
|
586
|
+
with gzip_open(filename, "rb") as infile:
|
587
|
+
swap = BytesIO(infile.read())
|
588
|
+
|
589
|
+
data = skio.imread(swap)
|
590
|
+
warnings.warn(
|
591
|
+
"origin and sampling_rate are not yet extracted from non CCP4/MRC files."
|
592
|
+
)
|
593
|
+
return data, np.zeros(data.ndim), np.ones(data.ndim)
|
594
|
+
|
595
|
+
@classmethod
|
596
|
+
def from_structure(
|
597
|
+
cls,
|
598
|
+
filename_or_structure: str,
|
599
|
+
shape: Tuple[int] = None,
|
600
|
+
sampling_rate: NDArray = np.ones(1),
|
601
|
+
origin: Tuple[float] = None,
|
602
|
+
weight_type: str = "atomic_weight",
|
603
|
+
scattering_args: Dict = dict(),
|
604
|
+
chain: str = None,
|
605
|
+
filter_by_elements: Set = None,
|
606
|
+
filter_by_residues: Set = None,
|
607
|
+
) -> "Density":
|
608
|
+
"""
|
609
|
+
Reads in an atomic structure and converts it into a :py:class:`Density`
|
610
|
+
instance.
|
611
|
+
|
612
|
+
Parameters
|
613
|
+
----------
|
614
|
+
filename_or_structure : str or :py:class:`tme.structure.Structure`
|
615
|
+
Either :py:class:`tme.structure.Structure` instance or path to
|
616
|
+
structure file that can be read by
|
617
|
+
:py:meth:`tme.structure.Structure.from_file`.
|
618
|
+
shape : tuple of int, optional
|
619
|
+
Shape of the new :py:class:`Density` instance. By default,
|
620
|
+
computes the minimum 3D box holding all atoms.
|
621
|
+
sampling_rate : float, optional
|
622
|
+
Sampling rate of the output array along each axis, in the same unit
|
623
|
+
as the atoms in the structure. Defaults to one Ångstroms
|
624
|
+
per axis unit.
|
625
|
+
origin : tuple of float, optional
|
626
|
+
Origin of the coordinate system. If provided, its expected to be in
|
627
|
+
z, y, x form in the same unit as the atoms in the structure.
|
628
|
+
By default, computes origin as distance between minimal coordinate
|
629
|
+
and coordinate system origin.
|
630
|
+
weight_type : str, optional
|
631
|
+
Which weight should be given to individual atoms. For valid values
|
632
|
+
see :py:meth:`tme.structure.Structure.to_volume`.
|
633
|
+
chain : str, optional
|
634
|
+
The chain that should be extracted from the structure. If multiple chains
|
635
|
+
should be selected, they needto be a comma separated string,
|
636
|
+
e.g. 'A,B,CE'. If chain None, all chains are returned. Default is None.
|
637
|
+
filter_by_elements : set, optional
|
638
|
+
Set of atomic elements to keep. Default is all atoms.
|
639
|
+
filter_by_residues : set, optional
|
640
|
+
Set of residues to keep. Default is all residues.
|
641
|
+
scattering_args : dict, optional
|
642
|
+
Additional arguments for scattering factor computation.
|
643
|
+
|
644
|
+
Returns
|
645
|
+
-------
|
646
|
+
:py:class:`Density`
|
647
|
+
Newly created :py:class:`Density` instance.
|
648
|
+
|
649
|
+
References
|
650
|
+
----------
|
651
|
+
.. [1] Sorzano, Carlos et al (Mar. 2015). Fast and accurate conversion
|
652
|
+
of atomic models into electron density maps. AIMS Biophysics
|
653
|
+
2, 8–20.
|
654
|
+
|
655
|
+
Examples
|
656
|
+
--------
|
657
|
+
The following outlines the minimal parameters needed to read in an
|
658
|
+
atomic structure and convert it into a :py:class:`Density` instance. For
|
659
|
+
specification on supported formats refer to
|
660
|
+
:py:meth:`tme.structure.Structure.from_file`.
|
661
|
+
|
662
|
+
>>> path_to_structure = "/path/to/structure.cif"
|
663
|
+
>>> density = Density.from_structure(path_to_structure)
|
664
|
+
|
665
|
+
:py:meth:`Density.from_structure` will automatically determine the appropriate
|
666
|
+
density dimensions based on the structure. The origin will be computed as
|
667
|
+
minimal distance required to move the closest atom of the structure to the
|
668
|
+
coordinate system origin. Furthermore, all chains will be used and the atom
|
669
|
+
densities will be represented by their atomic weight and accumulated
|
670
|
+
on a per-voxel basis.
|
671
|
+
|
672
|
+
The following will read in chain A of an atomic structure and discretize
|
673
|
+
it on a grid of dimension 100 x 100 x 100 using a sampling rate of
|
674
|
+
2.5 Angstrom per voxel.
|
675
|
+
|
676
|
+
>>> density = Density.from_structure(
|
677
|
+
>>> filename_or_structure = path_to_structure,
|
678
|
+
>>> shape = (100, 100, 100),
|
679
|
+
>>> sampling_rate = 2.5,
|
680
|
+
>>> chain = "A"
|
681
|
+
>>> )
|
682
|
+
|
683
|
+
We can restrict the generated py:class:`Density` instance to only contain
|
684
|
+
specific elements like carbon and nitrogen:
|
685
|
+
|
686
|
+
>>> density = Density.from_structure(
|
687
|
+
>>> filename_or_structure = path_to_structure,
|
688
|
+
>>> filter_by_elements = {"C", "N"}
|
689
|
+
>>> )
|
690
|
+
|
691
|
+
or specified residues such as polar amino acids:
|
692
|
+
|
693
|
+
>>> density = Density.from_structure(
|
694
|
+
>>> filename_or_structure = path_to_structure,
|
695
|
+
>>> filter_by_residues = {"SER", "THR", "CYS", "ASN", "GLN", "TYR"}
|
696
|
+
>>> )
|
697
|
+
|
698
|
+
:py:meth:`Density.from_structure` supports a variety of methods to convert
|
699
|
+
atoms into densities. In additino to 'atomic_weight', 'atomic_number',
|
700
|
+
'van_der_waals_radius' its possible to use experimentally determined scattering
|
701
|
+
factors from various sources:
|
702
|
+
|
703
|
+
>>> density = Density.from_structure(
|
704
|
+
>>> filename_or_structure = path_to_structure,
|
705
|
+
>>> weight_type = "scattering_factors",
|
706
|
+
>>> scattering_args={"source": "dt1969"}
|
707
|
+
>>> )
|
708
|
+
|
709
|
+
or a lowpass filtered representation introduced in [1]_:
|
710
|
+
|
711
|
+
>>> density = Density.from_structure(
|
712
|
+
>>> filename_or_structure = path_to_structure,
|
713
|
+
>>> weight_type = "lowpass_scattering_factors",
|
714
|
+
>>> scattering_args={"source": "dt1969"}
|
715
|
+
>>> )
|
716
|
+
|
717
|
+
See Also
|
718
|
+
--------
|
719
|
+
:py:meth:`tme.structure.Structure.from_file`
|
720
|
+
:py:meth:`tme.structure.Structure.to_volume`
|
721
|
+
"""
|
722
|
+
structure = filename_or_structure
|
723
|
+
if type(filename_or_structure) == str:
|
724
|
+
structure = Structure.from_file(
|
725
|
+
filename=filename_or_structure,
|
726
|
+
filter_by_elements=filter_by_elements,
|
727
|
+
filter_by_residues=filter_by_residues,
|
728
|
+
)
|
729
|
+
|
730
|
+
volume, origin, sampling_rate = structure.to_volume(
|
731
|
+
shape=shape,
|
732
|
+
sampling_rate=sampling_rate,
|
733
|
+
origin=origin,
|
734
|
+
chain=chain,
|
735
|
+
weight_type=weight_type,
|
736
|
+
scattering_args=scattering_args,
|
737
|
+
)
|
738
|
+
|
739
|
+
return cls(
|
740
|
+
data=volume,
|
741
|
+
origin=origin,
|
742
|
+
sampling_rate=sampling_rate,
|
743
|
+
metadata=structure.details.copy(),
|
744
|
+
)
|
745
|
+
|
746
|
+
def to_file(self, filename: str, gzip: bool = False) -> None:
|
747
|
+
"""
|
748
|
+
Writes current class instance to disk.
|
749
|
+
|
750
|
+
Parameters
|
751
|
+
----------
|
752
|
+
filename : str
|
753
|
+
Path to write to.
|
754
|
+
gzip : bool, optional
|
755
|
+
If True, the output will be gzip compressed and "gz" will be added
|
756
|
+
to the filename if not already present. By default False.
|
757
|
+
|
758
|
+
References
|
759
|
+
----------
|
760
|
+
.. [1] Burnley T et al., Acta Cryst. D, 2017
|
761
|
+
.. [2] Nickell S. et al, Journal of Structural Biology, 2005
|
762
|
+
.. [3] https://scikit-image.org/docs/stable/api/skimage.io.html
|
763
|
+
|
764
|
+
Examples
|
765
|
+
--------
|
766
|
+
The following creates a :py:class:`Density` instance `dens` holding
|
767
|
+
random data values and writes it to disk:
|
768
|
+
|
769
|
+
>>> import numpy as np
|
770
|
+
>>> from tme import Density
|
771
|
+
>>> data = np.random.rand(50,50,50)
|
772
|
+
>>> dens = Density(data = data, origin = (0, 0, 0), sampling_rate = (1, 1, 1))
|
773
|
+
>>> dens.to_file("example.mrc")
|
774
|
+
|
775
|
+
The output file can also be directly ``gzip`` compressed. The corresponding
|
776
|
+
".gz" extension will be automatically added if absent [1]_.
|
777
|
+
|
778
|
+
>>> dens.to_file("example.mrc", gzip=True)
|
779
|
+
|
780
|
+
The :py:meth:`Density.to_file` method also supports writing EM files [2]_:
|
781
|
+
|
782
|
+
>>> dens.to_file("example.em")
|
783
|
+
|
784
|
+
In addition, a variety of image file formats are supported [3]_:
|
785
|
+
|
786
|
+
>>> data = np.random.rand(50,50)
|
787
|
+
>>> dens = Density(data = data, origin = (0, 0), sampling_rate = (1, 1))
|
788
|
+
>>> dens.to_file("example.tiff")
|
789
|
+
|
790
|
+
Notes
|
791
|
+
-----
|
792
|
+
If ``filename`` ends with ".em" or ".em.gz", the method will create an EM file.
|
793
|
+
Otherwise, it defaults to the CCP4/MRC format, and on failure, it falls back
|
794
|
+
to `skimage.io.imsave`.
|
795
|
+
|
796
|
+
See Also
|
797
|
+
--------
|
798
|
+
:py:meth:`Density.from_file`
|
799
|
+
"""
|
800
|
+
if gzip:
|
801
|
+
filename = filename if filename.endswith(".gz") else f"{filename}.gz"
|
802
|
+
|
803
|
+
try:
|
804
|
+
func = self._save_mrc
|
805
|
+
if filename.endswith(".em") or filename.endswith(".em.gz"):
|
806
|
+
func = self._save_em
|
807
|
+
_ = func(filename=filename, gzip=gzip)
|
808
|
+
except ValueError:
|
809
|
+
_ = self._save_skio(filename=filename, gzip=gzip)
|
810
|
+
|
811
|
+
def _save_mrc(self, filename: str, gzip: bool) -> None:
|
812
|
+
"""
|
813
|
+
Writes current class instance to disk as mrc file.
|
814
|
+
|
815
|
+
Parameters
|
816
|
+
----------
|
817
|
+
filename : str
|
818
|
+
Path to write to.
|
819
|
+
gzip : bool, optional
|
820
|
+
If True, the output will be gzip compressed.
|
821
|
+
|
822
|
+
References
|
823
|
+
----------
|
824
|
+
.. [1] Burnley T et al., Acta Cryst. D, 2017
|
825
|
+
"""
|
826
|
+
compression = "gzip" if gzip else None
|
827
|
+
with mrcfile.new(filename, overwrite=True, compression=compression) as mrc:
|
828
|
+
mrc.set_data(self.data.astype("float32"))
|
829
|
+
mrc.header.nzstart, mrc.header.nystart, mrc.header.nxstart = np.ceil(
|
830
|
+
np.divide(self.origin, self.sampling_rate)
|
831
|
+
)
|
832
|
+
# mrcfile library expects origin to be in xyz format
|
833
|
+
mrc.header.mapc, mrc.header.mapr, mrc.header.maps = (1, 2, 3)
|
834
|
+
mrc.header["origin"] = tuple(self.origin[::-1])
|
835
|
+
mrc.voxel_size = tuple(self.sampling_rate[::-1])
|
836
|
+
|
837
|
+
def _save_em(self, filename: str, gzip: bool) -> None:
|
838
|
+
"""
|
839
|
+
Writes data to disk as an .em file.
|
840
|
+
|
841
|
+
Parameters
|
842
|
+
----------
|
843
|
+
filename : str
|
844
|
+
Path to write the .em file to.
|
845
|
+
data : NDArray
|
846
|
+
Data to be saved.
|
847
|
+
origin : NDArray
|
848
|
+
Coordinate origin of the data.
|
849
|
+
sampling_rate : NDArray
|
850
|
+
Sampling rate of the data.
|
851
|
+
|
852
|
+
References
|
853
|
+
----------
|
854
|
+
.. [1] Nickell S. et al, Journal of Structural Biology, 2005.
|
855
|
+
"""
|
856
|
+
DATA_TYPE_MAPPING = {
|
857
|
+
np.dtype(np.int8): 1,
|
858
|
+
np.dtype(np.int16): 2,
|
859
|
+
np.dtype(np.int32): 3,
|
860
|
+
np.dtype(np.float32): 5,
|
861
|
+
np.dtype(np.float64): 6,
|
862
|
+
np.dtype(np.complex64): 8,
|
863
|
+
np.dtype(np.complex128): 9,
|
864
|
+
}
|
865
|
+
|
866
|
+
data_type_code = DATA_TYPE_MAPPING.get(self.data.dtype, 5)
|
867
|
+
|
868
|
+
func = gzip_open if gzip else open
|
869
|
+
with func(filename, "wb") as f:
|
870
|
+
f.write(np.array([0], dtype=np.int8).tobytes())
|
871
|
+
f.write(np.array([0, 0, data_type_code], dtype=np.int8).tobytes())
|
872
|
+
f.write(np.array(self.data.shape, dtype="<i4").tobytes())
|
873
|
+
f.write(b" " * 80)
|
874
|
+
user_params = np.zeros(40, dtype="<i4")
|
875
|
+
user_params[6] = int(self.sampling_rate[0] * 1000)
|
876
|
+
f.write(user_params.tobytes())
|
877
|
+
f.write(b" " * 256)
|
878
|
+
f.write(self.data.tobytes())
|
879
|
+
|
880
|
+
def _save_skio(self, filename: str, gzip: bool) -> None:
|
881
|
+
"""
|
882
|
+
Uses skimage.io.imsave to write data to filename.
|
883
|
+
|
884
|
+
Parameters
|
885
|
+
----------
|
886
|
+
filename : str
|
887
|
+
Path to write to with a format supported by skimage.io.imsave.
|
888
|
+
gzip : bool, optional
|
889
|
+
If True, the output will be gzip compressed.
|
890
|
+
|
891
|
+
References
|
892
|
+
----------
|
893
|
+
.. [1] https://scikit-image.org/docs/stable/api/skimage.io.html
|
894
|
+
"""
|
895
|
+
swap, kwargs = filename, {}
|
896
|
+
if gzip:
|
897
|
+
swap = BytesIO()
|
898
|
+
kwargs["format"] = splitext(basename(filename.replace(".gz", "")))[1]
|
899
|
+
skio.imsave(fname=swap, arr=self.data.astype("float32"), **kwargs)
|
900
|
+
if gzip:
|
901
|
+
with gzip_open(filename, "wb") as outfile:
|
902
|
+
outfile.write(swap.getvalue())
|
903
|
+
|
904
|
+
@property
|
905
|
+
def empty(self) -> "Density":
|
906
|
+
"""
|
907
|
+
Returns a copy of the current class instance with all elements in
|
908
|
+
:py:attr:`Density.data` set to zero. :py:attr:`Density.origin` and
|
909
|
+
:py:attr:`Density.sampling_rate` will be copied, while
|
910
|
+
:py:attr:`Density.metadata` will be initialized to an empty dictionary.
|
911
|
+
|
912
|
+
Examples
|
913
|
+
--------
|
914
|
+
>>> import numpy as np
|
915
|
+
>>> from tme import Density
|
916
|
+
>>> original_density = Density.from_file("/path/to/file.mrc")
|
917
|
+
>>> empty_density = original_density.empty
|
918
|
+
>>> np.all(empty_density.data == 0)
|
919
|
+
True
|
920
|
+
"""
|
921
|
+
return Density(
|
922
|
+
data=np.zeros_like(self.data),
|
923
|
+
origin=deepcopy(self.origin),
|
924
|
+
sampling_rate=deepcopy(self.sampling_rate),
|
925
|
+
)
|
926
|
+
|
927
|
+
def copy(self) -> "Density":
|
928
|
+
"""
|
929
|
+
Returns a copy of the current :py:class:`Density` instance.
|
930
|
+
|
931
|
+
Examples
|
932
|
+
--------
|
933
|
+
>>> from tme import Density
|
934
|
+
>>> original_density = Density.from_file("/path/to/file.mrc")
|
935
|
+
>>> copied_density = original_density.copy
|
936
|
+
>>> np.all(copied_density.data == original_density.data)
|
937
|
+
True
|
938
|
+
"""
|
939
|
+
return Density(
|
940
|
+
data=self.data.copy(),
|
941
|
+
origin=deepcopy(self.origin[:]),
|
942
|
+
sampling_rate=self.sampling_rate,
|
943
|
+
metadata=deepcopy(self.metadata),
|
944
|
+
)
|
945
|
+
|
946
|
+
def to_memmap(self) -> None:
|
947
|
+
"""
|
948
|
+
Converts the current class instance's :py:attr:`Density.data` attribute to
|
949
|
+
a :obj:`numpy.memmap` instance.
|
950
|
+
|
951
|
+
Examples
|
952
|
+
--------
|
953
|
+
The following outlines how to use the :py:meth:`Density.to_memmap` method.
|
954
|
+
|
955
|
+
>>> from tme import Density
|
956
|
+
>>> large_density = Density.from_file("/path/to/large_file.mrc")
|
957
|
+
>>> large_density.to_memmap()
|
958
|
+
|
959
|
+
A more efficient solution to achieve the result outlined above is to
|
960
|
+
provide the ``use_memmap`` flag in :py:meth:`Density.from_file`.
|
961
|
+
|
962
|
+
>>> Density.from_file("/path/to/large_file.mrc", use_memmap = True)
|
963
|
+
|
964
|
+
In practice, the :py:meth:`Density.to_memmap` method finds application, if a
|
965
|
+
large number of :py:class:`Density` instances need to be in memory at once,
|
966
|
+
without occupying the full phyiscal memory required to store
|
967
|
+
:py:attr:`Density.data`.
|
968
|
+
|
969
|
+
|
970
|
+
See Also
|
971
|
+
--------
|
972
|
+
:py:meth:`Density.to_numpy`
|
973
|
+
"""
|
974
|
+
if type(self.data) == np.memmap:
|
975
|
+
return None
|
976
|
+
|
977
|
+
filename = array_to_memmap(arr=self.data)
|
978
|
+
|
979
|
+
self.data = np.memmap(
|
980
|
+
filename, mode="r", dtype=self.data.dtype, shape=self.data.shape
|
981
|
+
)
|
982
|
+
|
983
|
+
def to_numpy(self) -> None:
|
984
|
+
"""
|
985
|
+
Converts the current class instance's :py:attr:`Density.data` attribute to
|
986
|
+
an in-memory :obj:`numpy.ndarray`.
|
987
|
+
|
988
|
+
Examples
|
989
|
+
--------
|
990
|
+
>>> from tme import Density
|
991
|
+
>>> density = Density.from_file("/path/to/large_file.mrc")
|
992
|
+
>>> density.to_memmap() # Convert to memory-mapped array first
|
993
|
+
>>> density.to_numpy() # Now, convert back to an in-memory array
|
994
|
+
|
995
|
+
See Also
|
996
|
+
--------
|
997
|
+
:py:meth:`Density.to_memmap`
|
998
|
+
"""
|
999
|
+
self.data = memmap_to_array(self.data)
|
1000
|
+
|
1001
|
+
@property
|
1002
|
+
def shape(self) -> Tuple[int]:
|
1003
|
+
"""
|
1004
|
+
Returns the dimensions of current instance's :py:attr:`Density.data`
|
1005
|
+
attribute.
|
1006
|
+
|
1007
|
+
Returns
|
1008
|
+
-------
|
1009
|
+
tuple
|
1010
|
+
The dimensions of :py:attr:`Density.data`.
|
1011
|
+
|
1012
|
+
Examples
|
1013
|
+
--------
|
1014
|
+
The following outlines the usage of :py:attr:`Density.shape`:
|
1015
|
+
|
1016
|
+
>>> import numpy as np
|
1017
|
+
>>> from tme import Density
|
1018
|
+
>>> dens = Density(np.array([0, 1, 1, 1, 0]))
|
1019
|
+
>>> dens.shape
|
1020
|
+
(5,)
|
1021
|
+
"""
|
1022
|
+
return self.data.shape
|
1023
|
+
|
1024
|
+
@property
|
1025
|
+
def data(self) -> NDArray:
|
1026
|
+
"""
|
1027
|
+
Returns the value of the current instance's :py:attr:`Density.data`
|
1028
|
+
attribute.
|
1029
|
+
|
1030
|
+
Returns
|
1031
|
+
-------
|
1032
|
+
NDArray
|
1033
|
+
Value of the current instance's :py:attr:`Density.data` attribute.
|
1034
|
+
|
1035
|
+
Examples
|
1036
|
+
--------
|
1037
|
+
The following outlines the usage of :py:attr:`Density.data`:
|
1038
|
+
|
1039
|
+
>>> import numpy as np
|
1040
|
+
>>> from tme import Density
|
1041
|
+
>>> dens = Density(np.array([0, 1, 1, 1, 0]))
|
1042
|
+
>>> dens.data
|
1043
|
+
array([0, 1, 1, 1, 0])
|
1044
|
+
|
1045
|
+
"""
|
1046
|
+
return self._data
|
1047
|
+
|
1048
|
+
@data.setter
|
1049
|
+
def data(self, data: NDArray) -> None:
|
1050
|
+
"""
|
1051
|
+
Sets the value of the current instance's :py:attr:`Density.data` attribute.
|
1052
|
+
"""
|
1053
|
+
self._data = data
|
1054
|
+
|
1055
|
+
@property
|
1056
|
+
def origin(self) -> NDArray:
|
1057
|
+
"""
|
1058
|
+
Returns the value of the current instance's :py:attr:`Density.origin`
|
1059
|
+
attribute.
|
1060
|
+
|
1061
|
+
Returns
|
1062
|
+
-------
|
1063
|
+
NDArray
|
1064
|
+
Value of the current instance's :py:attr:`Density.origin` attribute.
|
1065
|
+
|
1066
|
+
Examples
|
1067
|
+
--------
|
1068
|
+
The following outlines the usage of :py:attr:`Density.origin`:
|
1069
|
+
|
1070
|
+
>>> import numpy as np
|
1071
|
+
>>> from tme import Density
|
1072
|
+
>>> dens = Density(np.array([0, 1, 1, 1, 0]))
|
1073
|
+
>>> dens.origin
|
1074
|
+
array([0.])
|
1075
|
+
"""
|
1076
|
+
return self._origin
|
1077
|
+
|
1078
|
+
@origin.setter
|
1079
|
+
def origin(self, origin: NDArray) -> None:
|
1080
|
+
"""
|
1081
|
+
Sets the origin of the class instance.
|
1082
|
+
"""
|
1083
|
+
origin = np.asarray(origin)
|
1084
|
+
origin = np.repeat(origin, self.data.ndim // origin.size)
|
1085
|
+
self._origin = origin
|
1086
|
+
|
1087
|
+
@property
|
1088
|
+
def sampling_rate(self) -> NDArray:
|
1089
|
+
"""
|
1090
|
+
Returns sampling rate along data axis.
|
1091
|
+
"""
|
1092
|
+
return self._sampling_rate
|
1093
|
+
|
1094
|
+
@sampling_rate.setter
|
1095
|
+
def sampling_rate(self, sampling_rate: NDArray) -> None:
|
1096
|
+
"""
|
1097
|
+
Sets the sampling rate of the class instance.
|
1098
|
+
"""
|
1099
|
+
sampling_rate = np.asarray(sampling_rate)
|
1100
|
+
sampling_rate = np.repeat(sampling_rate, self.data.ndim // sampling_rate.size)
|
1101
|
+
self._sampling_rate = sampling_rate
|
1102
|
+
|
1103
|
+
@property
|
1104
|
+
def metadata(self) -> Dict:
|
1105
|
+
"""
|
1106
|
+
Returns dictionary with metadata information, empty by default.
|
1107
|
+
"""
|
1108
|
+
return self._metadata
|
1109
|
+
|
1110
|
+
@metadata.setter
|
1111
|
+
def metadata(self, metadata: Dict) -> None:
|
1112
|
+
"""
|
1113
|
+
Sets the metadata of the class instance.
|
1114
|
+
"""
|
1115
|
+
self._metadata = metadata
|
1116
|
+
|
1117
|
+
def to_pointcloud(self, threshold: float = 0) -> NDArray:
|
1118
|
+
"""
|
1119
|
+
Returns data indices that are larger than the given threshold.
|
1120
|
+
|
1121
|
+
Parameters
|
1122
|
+
----------
|
1123
|
+
threshold : float, optional
|
1124
|
+
The cutoff value to determine the indices. Default is 0.
|
1125
|
+
|
1126
|
+
Returns
|
1127
|
+
-------
|
1128
|
+
NDArray
|
1129
|
+
Data indices that are larger than the given threshold with shape
|
1130
|
+
(dimensions, indices).
|
1131
|
+
|
1132
|
+
Examples
|
1133
|
+
--------
|
1134
|
+
>>> density.to_pointcloud(0)
|
1135
|
+
"""
|
1136
|
+
return np.array(np.where(self.data > threshold))
|
1137
|
+
|
1138
|
+
def _pad_slice(self, box: Tuple[slice], pad_kwargs: Dict = {}) -> NDArray:
|
1139
|
+
"""
|
1140
|
+
Pads the internal data array according to box.
|
1141
|
+
|
1142
|
+
Negative slices indices will result in a left-hand padding, while
|
1143
|
+
slice indices larger than the box_size property of the current class
|
1144
|
+
instance will result in a right-hand padding.
|
1145
|
+
|
1146
|
+
Parameters
|
1147
|
+
----------
|
1148
|
+
box : tuple of slice
|
1149
|
+
Tuple of slice objects that define the box dimensions.
|
1150
|
+
pad_kwargs: dict, optional
|
1151
|
+
Parameter dictionary passed to numpy pad.
|
1152
|
+
|
1153
|
+
Returns
|
1154
|
+
-------
|
1155
|
+
NDArray
|
1156
|
+
The padded internal data array.
|
1157
|
+
"""
|
1158
|
+
box_start = np.array([b.start for b in box])
|
1159
|
+
box_stop = np.array([b.stop for b in box])
|
1160
|
+
left_pad = -np.minimum(box_start, np.zeros(len(box), dtype=int))
|
1161
|
+
|
1162
|
+
right_pad = box_stop - box_start * (box_start > 0)
|
1163
|
+
right_pad -= np.array(self.shape, dtype=int)
|
1164
|
+
right_pad = np.maximum(right_pad, np.zeros_like(right_pad))
|
1165
|
+
padding = tuple((left, right) for left, right in zip(left_pad, right_pad))
|
1166
|
+
|
1167
|
+
ret = np.pad(self.data, padding, **pad_kwargs)
|
1168
|
+
return ret
|
1169
|
+
|
1170
|
+
def adjust_box(self, box: Tuple[slice], pad_kwargs: Dict = {}) -> None:
|
1171
|
+
"""
|
1172
|
+
Adjusts the internal data array and origin of the current class instance
|
1173
|
+
according to the provided box.
|
1174
|
+
|
1175
|
+
Parameters
|
1176
|
+
----------
|
1177
|
+
box : tuple of slices
|
1178
|
+
A tuple of slices describing how each axis of the volume array
|
1179
|
+
should be sliced. See :py:meth:`Density.trim_box` on how to produce
|
1180
|
+
such an object.
|
1181
|
+
pad_kwargs: dict, optional
|
1182
|
+
Parameter dictionary passed to numpy pad.
|
1183
|
+
|
1184
|
+
See Also
|
1185
|
+
--------
|
1186
|
+
:py:meth:`Density.trim_box`
|
1187
|
+
|
1188
|
+
Examples
|
1189
|
+
--------
|
1190
|
+
The following demonstrates the ability of :py:meth:`Density.adjust_box`
|
1191
|
+
to extract a subdensity from the current :py:class:`Density` instance.
|
1192
|
+
:py:meth:`Density.adjust_box` not only operats on :py:attr:`Density.data`,
|
1193
|
+
but also modifies :py:attr:`Density.origin` according to ``box``.
|
1194
|
+
|
1195
|
+
>>> import numpy as np
|
1196
|
+
>>> from tme import Density
|
1197
|
+
>>> dens = Density(np.ones((5, 5)))
|
1198
|
+
>>> box = (slice(1, 4), slice(2, 5))
|
1199
|
+
>>> dens.adjust_box(box)
|
1200
|
+
>>> dens
|
1201
|
+
Origin: (1.0, 2.0), sampling_rate: (1, 1), Shape: (3, 3)
|
1202
|
+
|
1203
|
+
:py:meth:`Density.adjust_box` can also extend the box of the current
|
1204
|
+
:py:class:`Density` instance. This is achieved by negative start or
|
1205
|
+
stops that exceed the dimension of the current :py:attr:`Density.data` array.
|
1206
|
+
|
1207
|
+
>>> box = (slice(-1, 10), slice(2, 10))
|
1208
|
+
>>> dens.adjust_box(box)
|
1209
|
+
>>> dens
|
1210
|
+
Origin: (0.0, 4.0), sampling_rate: (1, 1), Shape: (11, 8)
|
1211
|
+
|
1212
|
+
However, do note that only the start coordinate of each slice in ``box``
|
1213
|
+
can be negative.
|
1214
|
+
|
1215
|
+
>>> box = (slice(-1, 10), slice(2, -10))
|
1216
|
+
>>> dens.adjust_box(box)
|
1217
|
+
>>> dens
|
1218
|
+
Origin: (-1.0, 6.0), sampling_rate: (1, 1), Shape: (11, 0)
|
1219
|
+
"""
|
1220
|
+
crop_box = tuple(
|
1221
|
+
slice(max(b.start, 0), min(b.stop, shape))
|
1222
|
+
for b, shape in zip(box, self.data.shape)
|
1223
|
+
)
|
1224
|
+
self.data = self.data[crop_box].copy()
|
1225
|
+
|
1226
|
+
# In case the box is larger than the current map
|
1227
|
+
self.data = self._pad_slice(box, pad_kwargs=pad_kwargs)
|
1228
|
+
|
1229
|
+
# Adjust the origin
|
1230
|
+
left_shift = np.array([-1 * box[i].start for i in range(len(box))])
|
1231
|
+
self.origin = self.origin - np.multiply(left_shift, self.sampling_rate)
|
1232
|
+
|
1233
|
+
def trim_box(self, cutoff: float, margin: int = 0) -> Tuple[slice]:
|
1234
|
+
"""
|
1235
|
+
Computes a rectangle with sufficient dimension that encloses all
|
1236
|
+
values of the internal data array larger than the specified cutoff,
|
1237
|
+
expanded by the specified margin.
|
1238
|
+
|
1239
|
+
The output can be passed to :py:meth:`Density.adjust_box` to crop
|
1240
|
+
the internal data array.
|
1241
|
+
|
1242
|
+
Parameters
|
1243
|
+
----------
|
1244
|
+
cutoff : float
|
1245
|
+
The threshold value for determining the minimum enclosing box. Default is 0.
|
1246
|
+
margin : int, optional
|
1247
|
+
The margin to add to the box dimensions. Default is 0.
|
1248
|
+
|
1249
|
+
Returns
|
1250
|
+
-------
|
1251
|
+
tuple
|
1252
|
+
A tuple containing slice objects representing the box.
|
1253
|
+
|
1254
|
+
Raises
|
1255
|
+
------
|
1256
|
+
ValueError
|
1257
|
+
If the cutoff is larger than or equal to the maximum density value.
|
1258
|
+
|
1259
|
+
Examples
|
1260
|
+
--------
|
1261
|
+
The following will compute the bounding box that encloses all values
|
1262
|
+
in the example array that are larger than zero:
|
1263
|
+
|
1264
|
+
>>> import numpy as np
|
1265
|
+
>>> from tme import Density
|
1266
|
+
>>> dens = Density(np.array([0,1,1,1,0]))
|
1267
|
+
>>> dens.trim_box(0)
|
1268
|
+
(slice(1, 4, None),)
|
1269
|
+
|
1270
|
+
The resulting tuple can be passed to :py:meth:`Density.adjust_box` to trim the
|
1271
|
+
current :py:class:`Density` instance:
|
1272
|
+
|
1273
|
+
>>> dens.adjust_box(dens.trim_box(0))
|
1274
|
+
>>> dens.data.shape
|
1275
|
+
(3,)
|
1276
|
+
|
1277
|
+
See Also
|
1278
|
+
--------
|
1279
|
+
:py:meth:`Density.adjust_box`
|
1280
|
+
"""
|
1281
|
+
if cutoff >= self.data.max():
|
1282
|
+
raise ValueError(
|
1283
|
+
f"Cutoff exceeds data range ({cutoff} >= {self.data.max()})."
|
1284
|
+
)
|
1285
|
+
starts, stops = [], []
|
1286
|
+
for axis in range(self.data.ndim):
|
1287
|
+
projected_max = np.max(
|
1288
|
+
self.data, axis=tuple(i for i in range(self.data.ndim) if i != axis)
|
1289
|
+
)
|
1290
|
+
valid = np.where(projected_max > cutoff)[0]
|
1291
|
+
starts.append(max(0, valid[0] - margin))
|
1292
|
+
stops.append(min(self.data.shape[axis], valid[-1] + margin + 1))
|
1293
|
+
slices = tuple(slice(*coord) for coord in zip(starts, stops))
|
1294
|
+
return slices
|
1295
|
+
|
1296
|
+
def minimum_enclosing_box(
|
1297
|
+
self,
|
1298
|
+
cutoff: float,
|
1299
|
+
use_geometric_center: bool = False,
|
1300
|
+
) -> Tuple[slice]:
|
1301
|
+
"""
|
1302
|
+
Compute the enclosing box that holds all possible rotations of the internal
|
1303
|
+
data array.
|
1304
|
+
|
1305
|
+
Parameters
|
1306
|
+
----------
|
1307
|
+
cutoff : float
|
1308
|
+
Above this value arr elements are considered. Defaults to 0.
|
1309
|
+
use_geometric_center : bool, optional
|
1310
|
+
Whether the box should accommodate the geometric or the coordinate
|
1311
|
+
center. Defaults to False.
|
1312
|
+
|
1313
|
+
Returns
|
1314
|
+
-------
|
1315
|
+
tuple
|
1316
|
+
Tuple of slices corresponding to the minimum enclosing box.
|
1317
|
+
|
1318
|
+
See Also
|
1319
|
+
--------
|
1320
|
+
:py:meth:`Density.adjust_box`
|
1321
|
+
:py:meth:`tme.matching_utils.minimum_enclosing_box`
|
1322
|
+
"""
|
1323
|
+
coordinates = self.to_pointcloud(threshold=cutoff)
|
1324
|
+
starts, stops = coordinates.min(axis=1), coordinates.max(axis=1)
|
1325
|
+
|
1326
|
+
shape = minimum_enclosing_box(
|
1327
|
+
coordinates=coordinates,
|
1328
|
+
use_geometric_center=use_geometric_center,
|
1329
|
+
)
|
1330
|
+
difference = np.maximum(np.subtract(shape, np.subtract(stops, starts)), 0)
|
1331
|
+
|
1332
|
+
shift_start = np.divide(difference, 2).astype(int)
|
1333
|
+
shift_stop = shift_start + np.mod(difference, 2)
|
1334
|
+
|
1335
|
+
starts = (starts - shift_start).astype(int)
|
1336
|
+
stops = (stops + shift_stop).astype(int)
|
1337
|
+
|
1338
|
+
enclosing_box = tuple(slice(start, stop) for start, stop in zip(starts, stops))
|
1339
|
+
|
1340
|
+
return tuple(enclosing_box)
|
1341
|
+
|
1342
|
+
def pad(
|
1343
|
+
self, new_shape: Tuple[int], center: bool = True, padding_value: float = 0
|
1344
|
+
) -> None:
|
1345
|
+
"""
|
1346
|
+
:py:meth:`Density.pad` extends the internal :py:attr:`Density.data`
|
1347
|
+
array of the current :py:class:`Density` instance to ``new_shape`` and
|
1348
|
+
adapts :py:attr:`Density.origin` accordingly:
|
1349
|
+
|
1350
|
+
Parameters
|
1351
|
+
----------
|
1352
|
+
new_shape : tuple of int
|
1353
|
+
The desired shape for the new volume.
|
1354
|
+
center : bool, optional
|
1355
|
+
Whether the data should be centered in the new box. Default is True.
|
1356
|
+
padding_value : float, optional
|
1357
|
+
Value to pad the data array with. Default is zero.
|
1358
|
+
|
1359
|
+
Raises
|
1360
|
+
------
|
1361
|
+
ValueError
|
1362
|
+
If the length of `new_shape` does not match the dimensionality of the
|
1363
|
+
internal data array.
|
1364
|
+
|
1365
|
+
Examples
|
1366
|
+
--------
|
1367
|
+
The following demonstrates the functionality of :py:meth:`Density.pad` on
|
1368
|
+
a one-dimensional array:
|
1369
|
+
|
1370
|
+
>>> import numpy as np
|
1371
|
+
>>> from tme import Density
|
1372
|
+
>>> dens = Density(np.array([1,1,1]))
|
1373
|
+
>>> dens.pad(new_shape = (5,), center = True)
|
1374
|
+
>>> dens.data
|
1375
|
+
array([0, 1, 1, 1, 0])
|
1376
|
+
|
1377
|
+
It's also possible to pass a user-defined ``padding_value``:
|
1378
|
+
|
1379
|
+
>>> dens = Density(np.array([1,1,1]))
|
1380
|
+
>>> dens.pad(new_shape = (5,), center = True, padding_value = -1)
|
1381
|
+
>>> dens.data
|
1382
|
+
array([-1, 1, 1, 1, -1])
|
1383
|
+
|
1384
|
+
If ``center`` is set to False, the padding values will be appended:
|
1385
|
+
|
1386
|
+
>>> dens = Density(np.array([1,1,1]))
|
1387
|
+
>>> dens.pad(new_shape = (5,), center = False)
|
1388
|
+
>>> dens.data
|
1389
|
+
array([1, 1, 1, 0, 0])
|
1390
|
+
|
1391
|
+
"""
|
1392
|
+
if len(new_shape) != self.data.ndim:
|
1393
|
+
raise ValueError(
|
1394
|
+
f"new_shape has dimension {len(new_shape)}"
|
1395
|
+
f" but expected was {self.data.ndim}."
|
1396
|
+
)
|
1397
|
+
|
1398
|
+
new_box = tuple(slice(0, stop) for stop in new_shape)
|
1399
|
+
if center:
|
1400
|
+
overhang = np.subtract(new_shape, self.shape).astype(int)
|
1401
|
+
padding = overhang // 2
|
1402
|
+
left = -padding
|
1403
|
+
right = np.add(self.shape, padding + overhang % 2)
|
1404
|
+
new_box = tuple(slice(*box) for box in zip(left, right))
|
1405
|
+
|
1406
|
+
self.adjust_box(new_box, pad_kwargs={"constant_values": padding_value})
|
1407
|
+
|
1408
|
+
def centered(self, cutoff: float = 0) -> Tuple["Density", NDArray]:
|
1409
|
+
"""
|
1410
|
+
Shifts the data center of mass to the center of the data array. The box size
|
1411
|
+
of the return Density object is at least equal to the box size of the class
|
1412
|
+
instance.
|
1413
|
+
|
1414
|
+
Parameters
|
1415
|
+
----------
|
1416
|
+
cutoff : float, optional
|
1417
|
+
Only elements in data larger than cutoff will be considered for
|
1418
|
+
computing the new box. By default considers only positive elements.
|
1419
|
+
|
1420
|
+
Notes
|
1421
|
+
-----
|
1422
|
+
Should any axis of the class instance data array be smaller than the return
|
1423
|
+
value of :py:meth:`Density.minimum_enclosing_box`, the size of the internal
|
1424
|
+
data array is adapted to avoid array elements larger than cutoff to fall
|
1425
|
+
outside the data array.
|
1426
|
+
|
1427
|
+
Returns
|
1428
|
+
-------
|
1429
|
+
Density
|
1430
|
+
A copy of the class instance whose data center of mass is in the
|
1431
|
+
center of the data array.
|
1432
|
+
NDArray
|
1433
|
+
The coordinate translation.
|
1434
|
+
|
1435
|
+
See Also
|
1436
|
+
--------
|
1437
|
+
:py:meth:`Density.trim_box`
|
1438
|
+
:py:meth:`Density.minimum_enclosing_box`
|
1439
|
+
|
1440
|
+
|
1441
|
+
Examples
|
1442
|
+
--------
|
1443
|
+
:py:meth:`Density.centered` returns a tuple containing a centered version
|
1444
|
+
of the current :py:class:`Density` instance, as well as an array with
|
1445
|
+
translations. The translation corresponds to the shift that was used to
|
1446
|
+
center the current :py:class:`Density` instance.
|
1447
|
+
|
1448
|
+
>>> import numpy as np
|
1449
|
+
>>> from tme import Density
|
1450
|
+
>>> dens = Density(np.ones((5,5)))
|
1451
|
+
>>> centered_dens, translation = dens.centered(0)
|
1452
|
+
>>> translation
|
1453
|
+
array([-4.4408921e-16, 4.4408921e-16])
|
1454
|
+
|
1455
|
+
:py:meth:`Density.centered` extended the :py:attr:`Density.data` attribute
|
1456
|
+
of the current :py:class:`Density` instance and modified
|
1457
|
+
:py:attr:`Density.origin` accordingly.
|
1458
|
+
|
1459
|
+
>>> centered_dens
|
1460
|
+
Origin: (-1.0, -1.0), sampling_rate: (1, 1), Shape: (7, 7)
|
1461
|
+
|
1462
|
+
:py:meth:`Density.centered` achieves centering via zero-padding the
|
1463
|
+
internal :py:attr:`Density.data` attribute:
|
1464
|
+
|
1465
|
+
>>> centered_dens.data
|
1466
|
+
array([[0., 0., 0., 0., 0., 0.],
|
1467
|
+
[0., 1., 1., 1., 1., 1.],
|
1468
|
+
[0., 1., 1., 1., 1., 1.],
|
1469
|
+
[0., 1., 1., 1., 1., 1.],
|
1470
|
+
[0., 1., 1., 1., 1., 1.],
|
1471
|
+
[0., 1., 1., 1., 1., 1.]])
|
1472
|
+
|
1473
|
+
`centered_dens` is sufficiently large to represent all rotations that
|
1474
|
+
could be applied to the :py:attr:`Density.data` attribute. Lets look
|
1475
|
+
at a random rotation obtained from
|
1476
|
+
:py:meth:`tme.matching_utils.get_rotation_matrices`.
|
1477
|
+
|
1478
|
+
>>> from tme.matching_utils import get_rotation_matrices
|
1479
|
+
>>> rotation_matrix = get_rotation_matrices(dim = 2 ,angular_sampling = 10)[0]
|
1480
|
+
>>> rotated_centered_dens = centered_dens.rigid_transform(
|
1481
|
+
>>> rotation_matrix = rotation_matrix,
|
1482
|
+
>>> order = None
|
1483
|
+
>>> )
|
1484
|
+
>>> print(centered_dens.data.sum(), rotated_centered_dens.data.sum())
|
1485
|
+
25.000000000000007 25.000000000000007
|
1486
|
+
|
1487
|
+
"""
|
1488
|
+
ret = self.copy()
|
1489
|
+
|
1490
|
+
box = ret.minimum_enclosing_box(cutoff=cutoff, use_geometric_center=False)
|
1491
|
+
ret.adjust_box(box)
|
1492
|
+
|
1493
|
+
new_shape = np.maximum(ret.shape, self.shape)
|
1494
|
+
ret.pad(new_shape)
|
1495
|
+
|
1496
|
+
center = self.center_of_mass(ret.data, cutoff)
|
1497
|
+
shift = np.subtract(np.divide(ret.shape, 2), center).astype(int)
|
1498
|
+
|
1499
|
+
ret = ret.rigid_transform(
|
1500
|
+
translation=shift,
|
1501
|
+
rotation_matrix=np.eye(ret.data.ndim),
|
1502
|
+
use_geometric_center=False,
|
1503
|
+
)
|
1504
|
+
offset = np.subtract(center, self.center_of_mass(ret.data))
|
1505
|
+
|
1506
|
+
return ret, offset
|
1507
|
+
|
1508
|
+
@classmethod
|
1509
|
+
def rotate_array(
|
1510
|
+
cls,
|
1511
|
+
arr: NDArray,
|
1512
|
+
rotation_matrix: NDArray,
|
1513
|
+
arr_mask: NDArray = None,
|
1514
|
+
translation: NDArray = None,
|
1515
|
+
use_geometric_center: bool = False,
|
1516
|
+
out: NDArray = None,
|
1517
|
+
out_mask: NDArray = None,
|
1518
|
+
order: int = 3,
|
1519
|
+
) -> None:
|
1520
|
+
"""
|
1521
|
+
Rotates coordinates of arr according to rotation_matrix.
|
1522
|
+
|
1523
|
+
If no output array is provided, this method will compute an array with
|
1524
|
+
sufficient space to hold all elements. If both `arr` and `arr_mask`
|
1525
|
+
are provided, `arr_mask` will be centered according to arr.
|
1526
|
+
|
1527
|
+
Parameters
|
1528
|
+
----------
|
1529
|
+
arr : NDArray
|
1530
|
+
The input array to be rotated.
|
1531
|
+
arr_mask : NDArray, optional
|
1532
|
+
The mask of `arr` that will be equivalently rotated.
|
1533
|
+
rotation_matrix : NDArray
|
1534
|
+
The rotation matrix to apply [d x d].
|
1535
|
+
translation : NDArray
|
1536
|
+
The translation to apply [d].
|
1537
|
+
use_geometric_center : bool, optional
|
1538
|
+
Whether the rotation should be centered around the geometric
|
1539
|
+
or mass center. Default is mass center.
|
1540
|
+
out : NDArray, optional
|
1541
|
+
The output array to write the rotation of `arr` to.
|
1542
|
+
out_mask : NDArray, optional
|
1543
|
+
The output array to write the rotation of `arr_mask` to.
|
1544
|
+
order : int, optional
|
1545
|
+
Spline interpolation order. Has to be in the range 0-5.
|
1546
|
+
"""
|
1547
|
+
|
1548
|
+
return NumpyFFTWBackend().rotate_array(
|
1549
|
+
arr=arr,
|
1550
|
+
rotation_matrix=rotation_matrix,
|
1551
|
+
arr_mask=arr_mask,
|
1552
|
+
translation=translation,
|
1553
|
+
use_geometric_center=use_geometric_center,
|
1554
|
+
out=out,
|
1555
|
+
out_mask=out_mask,
|
1556
|
+
order=order,
|
1557
|
+
)
|
1558
|
+
|
1559
|
+
@staticmethod
|
1560
|
+
def rotate_array_coordinates(
|
1561
|
+
arr: NDArray,
|
1562
|
+
coordinates: NDArray,
|
1563
|
+
rotation_matrix: NDArray,
|
1564
|
+
translation: NDArray = None,
|
1565
|
+
out: NDArray = None,
|
1566
|
+
use_geometric_center: bool = True,
|
1567
|
+
arr_mask: NDArray = None,
|
1568
|
+
mask_coordinates: NDArray = None,
|
1569
|
+
out_mask: NDArray = None,
|
1570
|
+
) -> None:
|
1571
|
+
"""
|
1572
|
+
Rotates coordinates of arr according to rotation_matrix.
|
1573
|
+
|
1574
|
+
If no output array is provided, this method will compute an array with
|
1575
|
+
sufficient space to hold all elements. If both `arr` and `arr_mask`
|
1576
|
+
are provided, `arr_mask` will be centered according to arr.
|
1577
|
+
|
1578
|
+
No centering will be performed if the rotation matrix is the identity matrix.
|
1579
|
+
|
1580
|
+
Parameters
|
1581
|
+
----------
|
1582
|
+
arr : NDArray
|
1583
|
+
The input array to be rotated.
|
1584
|
+
coordinates : NDArray
|
1585
|
+
The pointcloud [d x N] containing elements of `arr` that should be rotated.
|
1586
|
+
See :py:meth:`Density.to_pointcloud` on how to obtain the coordinates.
|
1587
|
+
rotation_matrix : NDArray
|
1588
|
+
The rotation matrix to apply [d x d].
|
1589
|
+
rotation_matrix : NDArray
|
1590
|
+
The translation to apply [d].
|
1591
|
+
out : NDArray, optional
|
1592
|
+
The output array to write the rotation of `arr` to.
|
1593
|
+
use_geometric_center : bool, optional
|
1594
|
+
Whether the rotation should be centered around the geometric
|
1595
|
+
or mass center.
|
1596
|
+
arr_mask : NDArray, optional
|
1597
|
+
The mask of `arr` that will be equivalently rotated.
|
1598
|
+
mask_coordinates : NDArray, optional
|
1599
|
+
Equivalent to `coordinates`, but containing elements of `arr_mask`
|
1600
|
+
that should be rotated.
|
1601
|
+
out_mask : NDArray, optional
|
1602
|
+
The output array to write the rotation of `arr_mask` to.
|
1603
|
+
"""
|
1604
|
+
return NumpyFFTWBackend().rotate_array_coordinates(
|
1605
|
+
arr=arr,
|
1606
|
+
coordinates=coordinates,
|
1607
|
+
rotation_matrix=rotation_matrix,
|
1608
|
+
translation=translation,
|
1609
|
+
out=out,
|
1610
|
+
use_geometric_center=use_geometric_center,
|
1611
|
+
arr_mask=arr_mask,
|
1612
|
+
mask_coordinates=mask_coordinates,
|
1613
|
+
out_mask=out_mask,
|
1614
|
+
)
|
1615
|
+
|
1616
|
+
def rigid_transform(
|
1617
|
+
self,
|
1618
|
+
rotation_matrix: NDArray,
|
1619
|
+
translation: NDArray = None,
|
1620
|
+
order: int = 3,
|
1621
|
+
use_geometric_center: bool = False,
|
1622
|
+
) -> "Density":
|
1623
|
+
"""
|
1624
|
+
Performs a rigid transform of the current class instance.
|
1625
|
+
|
1626
|
+
Parameters
|
1627
|
+
----------
|
1628
|
+
rotation_matrix : NDArray
|
1629
|
+
Rotation matrix to apply to the `Density` instance.
|
1630
|
+
translation : NDArray
|
1631
|
+
Translation to apply to the `Density` instance.
|
1632
|
+
order : int, optional
|
1633
|
+
Order of spline interpolation.
|
1634
|
+
use_geometric_center : bool, optional
|
1635
|
+
Whether to use geometric or coordinate center. If False,
|
1636
|
+
class instance should be centered using :py:meth:`Density.centered`.
|
1637
|
+
|
1638
|
+
Returns
|
1639
|
+
-------
|
1640
|
+
Density
|
1641
|
+
The transformed instance of :py:class:`tme.density.Density`.
|
1642
|
+
|
1643
|
+
Examples
|
1644
|
+
--------
|
1645
|
+
>>> import numpy as np
|
1646
|
+
>>> rotation_matrix = np.eye(3)
|
1647
|
+
>>> rotation_matrix[0] = -1
|
1648
|
+
>>> density.rotate(rotation_matrix = rotation_matrix)
|
1649
|
+
|
1650
|
+
Notes
|
1651
|
+
-----
|
1652
|
+
:py:meth:`Density.rigid_transform` that the internal data array is
|
1653
|
+
sufficiently sized to accomodate the transform.
|
1654
|
+
|
1655
|
+
See Also
|
1656
|
+
--------
|
1657
|
+
:py:meth:`Density.centered`, :py:meth:`Density.minimum_enclosing_box`
|
1658
|
+
"""
|
1659
|
+
transformed_map = self.empty
|
1660
|
+
|
1661
|
+
self.rotate_array(
|
1662
|
+
arr=self.data,
|
1663
|
+
rotation_matrix=rotation_matrix,
|
1664
|
+
translation=translation,
|
1665
|
+
order=order,
|
1666
|
+
use_geometric_center=use_geometric_center,
|
1667
|
+
out=transformed_map.data,
|
1668
|
+
)
|
1669
|
+
eps = np.finfo(transformed_map.data.dtype).eps
|
1670
|
+
transformed_map.data[transformed_map.data < eps] = 0
|
1671
|
+
return transformed_map
|
1672
|
+
|
1673
|
+
def align_origins(self, other_map: "Density") -> "Density":
|
1674
|
+
"""
|
1675
|
+
Aligns the origin of another to the origin of the current class instance.
|
1676
|
+
|
1677
|
+
Parameters
|
1678
|
+
----------
|
1679
|
+
other_map : Density
|
1680
|
+
An instance of :py:class:`Density` class to align with the current map.
|
1681
|
+
|
1682
|
+
Raises
|
1683
|
+
------
|
1684
|
+
ValueError
|
1685
|
+
If the sampling_rate of both class instances does not match.
|
1686
|
+
|
1687
|
+
Returns
|
1688
|
+
-------
|
1689
|
+
Density
|
1690
|
+
A modified copy of `other_map` with aligned origin.
|
1691
|
+
"""
|
1692
|
+
if not np.allclose(self.sampling_rate, other_map.sampling_rate):
|
1693
|
+
raise ValueError("sampling_rate of both maps have to match.")
|
1694
|
+
|
1695
|
+
origin_difference = np.divide(
|
1696
|
+
np.subtract(self.origin, other_map.origin), self.sampling_rate
|
1697
|
+
)
|
1698
|
+
origin_difference = origin_difference.astype(int)
|
1699
|
+
|
1700
|
+
box_start = np.minimum(origin_difference, other_map.shape)
|
1701
|
+
box_end = np.maximum(origin_difference, other_map.shape)
|
1702
|
+
|
1703
|
+
new_box = tuple(slice(*pos) for pos in zip(box_start, box_end))
|
1704
|
+
|
1705
|
+
ret = other_map.copy()
|
1706
|
+
ret.adjust_box(new_box)
|
1707
|
+
return ret
|
1708
|
+
|
1709
|
+
def resample(self, new_sampling_rate: Tuple[float], order: int = 1) -> "Density":
|
1710
|
+
"""
|
1711
|
+
Resamples the current class instance to ``new_sampling_rate`` using
|
1712
|
+
spline interpolation of order ``order``.
|
1713
|
+
|
1714
|
+
Parameters
|
1715
|
+
----------
|
1716
|
+
new_sampling_rate : Tuple[float]
|
1717
|
+
Sampling rate to resample to.
|
1718
|
+
order : int, optional
|
1719
|
+
Order of spline used for interpolation, by default 1.
|
1720
|
+
|
1721
|
+
Returns
|
1722
|
+
-------
|
1723
|
+
Density
|
1724
|
+
A resampled instance of `Density` class.
|
1725
|
+
"""
|
1726
|
+
map_copy, new_sampling_rate = self.copy(), np.array(new_sampling_rate)
|
1727
|
+
new_sampling_rate = np.repeat(
|
1728
|
+
new_sampling_rate, map_copy.data.ndim // new_sampling_rate.size
|
1729
|
+
)
|
1730
|
+
scale_factor = np.divide(map_copy.sampling_rate, new_sampling_rate)
|
1731
|
+
|
1732
|
+
map_copy.data = zoom(map_copy.data, scale_factor, order=order)
|
1733
|
+
map_copy.sampling_rate = new_sampling_rate
|
1734
|
+
|
1735
|
+
return map_copy
|
1736
|
+
|
1737
|
+
def density_boundary(
|
1738
|
+
self, weight: float, fraction_surface: float = 0.1, volume_factor: float = 1.21
|
1739
|
+
) -> Tuple[float]:
|
1740
|
+
"""
|
1741
|
+
Computes the density boundary of the current class instance. The density
|
1742
|
+
boundary in this setting is defined as minimal and maximal density value
|
1743
|
+
enclosing a certain ``weight``.
|
1744
|
+
|
1745
|
+
Parameters
|
1746
|
+
----------
|
1747
|
+
weight : float
|
1748
|
+
Density weight to compute volume cutoff on. This could e.g. be the
|
1749
|
+
sum of contained atomic weights.
|
1750
|
+
fraction_surface : float, optional
|
1751
|
+
Approximate fraction of surface voxels on all voxels enclosing
|
1752
|
+
``weight``, by default 0.1. Decreasing this value increases the
|
1753
|
+
upper volume boundary.
|
1754
|
+
volume_factor : float, optional
|
1755
|
+
Factor used to compute how many distinct density values
|
1756
|
+
can be used to represent ``weight``, by default 1.21.
|
1757
|
+
|
1758
|
+
Returns
|
1759
|
+
-------
|
1760
|
+
tuple
|
1761
|
+
Tuple containing lower and upper bound on densities.
|
1762
|
+
|
1763
|
+
References
|
1764
|
+
----------
|
1765
|
+
.. [1] Cragnolini T, Sahota H, Joseph AP, Sweeney A, Malhotra S,
|
1766
|
+
Vasishtan D, Topf M (2021a) TEMPy2: A Python library with
|
1767
|
+
improved 3D electron microscopy density-fitting and validation
|
1768
|
+
workflows. Acta Crystallogr Sect D Struct Biol 77:41–47.
|
1769
|
+
https://doi.org/10.1107/S2059798320014928
|
1770
|
+
|
1771
|
+
Raises
|
1772
|
+
------
|
1773
|
+
ValueError
|
1774
|
+
If input any input parameter is <= 0.
|
1775
|
+
"""
|
1776
|
+
if weight <= 0 or fraction_surface <= 0 or volume_factor <= 0:
|
1777
|
+
raise ValueError(
|
1778
|
+
"weight, fraction_surface and volume_factor need to be >= 0."
|
1779
|
+
)
|
1780
|
+
num_voxels = np.min(
|
1781
|
+
volume_factor * weight / np.power(self.sampling_rate, self.data.ndim)
|
1782
|
+
).astype(int)
|
1783
|
+
surface_included_voxels = int(num_voxels * (1 + fraction_surface))
|
1784
|
+
|
1785
|
+
map_partition = np.partition(
|
1786
|
+
self.data.flatten(), (-num_voxels, -surface_included_voxels)
|
1787
|
+
)
|
1788
|
+
upper_limit = map_partition[-num_voxels]
|
1789
|
+
lower_limit = map_partition[-surface_included_voxels]
|
1790
|
+
|
1791
|
+
return (lower_limit, upper_limit)
|
1792
|
+
|
1793
|
+
def surface_coordinates(
|
1794
|
+
self, density_boundaries: Tuple[float], method: str = "ConvexHull"
|
1795
|
+
) -> NDArray:
|
1796
|
+
"""
|
1797
|
+
Calculates the surface coordinates of the current class instance using
|
1798
|
+
different boundary and surface detection methods. This method is relevant
|
1799
|
+
for determining coordinates used in template matching,
|
1800
|
+
see :py:class:`tme.matching_exhaustive.FitRefinement`.
|
1801
|
+
|
1802
|
+
Parameters
|
1803
|
+
----------
|
1804
|
+
density_boundaries : tuple
|
1805
|
+
Tuple of two floats with lower and upper bounds of density values
|
1806
|
+
to be considered on the surface (see :py:meth:`Density.density_boundary`).
|
1807
|
+
method : str, optional
|
1808
|
+
Surface coordinates are determined using this method:
|
1809
|
+
|
1810
|
+
+--------------+-----------------------------------------------------+
|
1811
|
+
| 'ConvexHull' | Use the lower bound density convex hull vertices. |
|
1812
|
+
+--------------+-----------------------------------------------------+
|
1813
|
+
| 'Weight' | Use all coordinates within ``density_boundaries``. |
|
1814
|
+
+--------------+-----------------------------------------------------+
|
1815
|
+
| 'Sobel' | Set densities below the lower bound density to zero |
|
1816
|
+
| | apply a sobel filter and return density coordinates |
|
1817
|
+
| | larger than 0.5 times the maximum filter value. |
|
1818
|
+
+--------------+-----------------------------------------------------+
|
1819
|
+
| 'Laplace' | Like 'Sobel' but with a laplace filter. |
|
1820
|
+
+--------------+-----------------------------------------------------+
|
1821
|
+
| 'Minimum' | Like 'Sobel' and 'Laplace' but with a spherical |
|
1822
|
+
| | minimum filter on the lower density bound. |
|
1823
|
+
+--------------+-----------------------------------------------------+
|
1824
|
+
|
1825
|
+
Raises
|
1826
|
+
------
|
1827
|
+
ValueError
|
1828
|
+
If the chosen method is not available.
|
1829
|
+
|
1830
|
+
Returns
|
1831
|
+
-------
|
1832
|
+
NDArray
|
1833
|
+
An array of surface coordinates with shape (number_of_points, dimensions).
|
1834
|
+
|
1835
|
+
References
|
1836
|
+
----------
|
1837
|
+
.. [1] Cragnolini T, Sahota H, Joseph AP, Sweeney A, Malhotra S,
|
1838
|
+
Vasishtan D, Topf M (2021a) TEMPy2: A Python library with
|
1839
|
+
improved 3D electron microscopy density-fitting and validation
|
1840
|
+
workflows. Acta Crystallogr Sect D Struct Biol 77:41–47.
|
1841
|
+
https://doi.org/10.1107/S2059798320014928
|
1842
|
+
|
1843
|
+
See Also
|
1844
|
+
--------
|
1845
|
+
:py:class:`tme.matching_optimization.NormalVectorScore`
|
1846
|
+
:py:class:`tme.matching_optimization.PartialLeastSquareDifference`
|
1847
|
+
:py:class:`tme.matching_optimization.MutualInformation`
|
1848
|
+
:py:class:`tme.matching_optimization.Envelope`
|
1849
|
+
:py:class:`tme.matching_optimization.Chamfer`
|
1850
|
+
"""
|
1851
|
+
available_methods = ["ConvexHull", "Weight", "Sobel", "Laplace", "Minimum"]
|
1852
|
+
|
1853
|
+
if method not in available_methods:
|
1854
|
+
raise ValueError(
|
1855
|
+
"Argument method has to be one of the following: %s"
|
1856
|
+
% ", ".join(available_methods)
|
1857
|
+
)
|
1858
|
+
|
1859
|
+
lower_bound, upper_bound = density_boundaries
|
1860
|
+
if method == "ConvexHull":
|
1861
|
+
binary = np.transpose(np.where(self.data > lower_bound))
|
1862
|
+
hull = ConvexHull(binary)
|
1863
|
+
surface_points = binary[hull.vertices[:]]
|
1864
|
+
|
1865
|
+
elif method == "Sobel":
|
1866
|
+
filtered_map = np.multiply(self.data, (self.data > lower_bound))
|
1867
|
+
magn = generic_gradient_magnitude(filtered_map, sobel)
|
1868
|
+
surface_points = np.argwhere(magn > 0.5 * magn.max())
|
1869
|
+
|
1870
|
+
elif method == "Laplace":
|
1871
|
+
filtered_map = self.data > lower_bound
|
1872
|
+
magn = laplace(filtered_map)
|
1873
|
+
surface_points = np.argwhere(magn > 0.5 * magn.max())
|
1874
|
+
|
1875
|
+
elif method == "Minimum":
|
1876
|
+
fp = np.zeros((self.data.ndim,) * self.data.ndim)
|
1877
|
+
center = np.ones(self.data.ndim, dtype=int)
|
1878
|
+
fp[tuple(center)] = 1
|
1879
|
+
for i in range(self.data.ndim):
|
1880
|
+
offset = np.zeros(self.data.ndim, dtype=int)
|
1881
|
+
offset[i] = 1
|
1882
|
+
fp[tuple(center + offset)] = 1
|
1883
|
+
fp[tuple(center - offset)] = 1
|
1884
|
+
|
1885
|
+
filtered_map = (self.data > lower_bound).astype(int)
|
1886
|
+
filtered_map_surface = minimum_filter(
|
1887
|
+
filtered_map, footprint=fp, mode="constant", cval=0.8
|
1888
|
+
)
|
1889
|
+
filtered_map_surface = ((filtered_map - filtered_map_surface) == 1).astype(
|
1890
|
+
int
|
1891
|
+
)
|
1892
|
+
surface_points = np.argwhere(filtered_map_surface == 1)
|
1893
|
+
|
1894
|
+
elif method == "Weight":
|
1895
|
+
surface_points = np.argwhere(
|
1896
|
+
np.logical_and(self.data < upper_bound, self.data > lower_bound)
|
1897
|
+
)
|
1898
|
+
|
1899
|
+
return surface_points
|
1900
|
+
|
1901
|
+
def normal_vectors(self, coordinates: NDArray) -> NDArray:
|
1902
|
+
"""
|
1903
|
+
Calculates the normal vectors for the given coordinates on the densities
|
1904
|
+
of the current class instance. If the normal vector to a given coordinate
|
1905
|
+
can not be computed, the zero vector is returned instead. The output of this
|
1906
|
+
function can e.g. be used in
|
1907
|
+
:py:class:`tme.matching_optimization.NormalVectorScore`.
|
1908
|
+
|
1909
|
+
Parameters
|
1910
|
+
----------
|
1911
|
+
coordinates : NDArray
|
1912
|
+
An array of integer coordinates with shape (dimensions, coordinates)
|
1913
|
+
of which to calculate the normal vectors.
|
1914
|
+
|
1915
|
+
Returns
|
1916
|
+
-------
|
1917
|
+
NDArray
|
1918
|
+
An array with unit normal vectors with same shape as coordinates.
|
1919
|
+
|
1920
|
+
References
|
1921
|
+
----------
|
1922
|
+
.. [1] Cragnolini T, Sahota H, Joseph AP, Sweeney A, Malhotra S,
|
1923
|
+
Vasishtan D, Topf M (2021a) TEMPy2: A Python library with
|
1924
|
+
improved 3D electron microscopy density-fitting and validation
|
1925
|
+
workflows. Acta Crystallogr Sect D Struct Biol 77:41–47.
|
1926
|
+
https://doi.org/10.1107/S2059798320014928
|
1927
|
+
|
1928
|
+
Raises
|
1929
|
+
------
|
1930
|
+
ValueError
|
1931
|
+
If coordinates.shape[1] does not match self.data.ndim,
|
1932
|
+
coordinates.ndim != 2 or lies outside self.data.
|
1933
|
+
|
1934
|
+
See Also
|
1935
|
+
--------
|
1936
|
+
:py:class:`tme.matching_optimization.NormalVectorScore`
|
1937
|
+
:py:class:`tme.matching_optimization.PartialLeastSquareDifference`
|
1938
|
+
:py:class:`tme.matching_optimization.MutualInformation`
|
1939
|
+
:py:class:`tme.matching_optimization.Envelope`
|
1940
|
+
:py:class:`tme.matching_optimization.Chamfer`
|
1941
|
+
"""
|
1942
|
+
normal_vectors, coordinates = [], np.asarray(coordinates, dtype=int)
|
1943
|
+
|
1944
|
+
if coordinates.ndim != 2:
|
1945
|
+
raise ValueError("Coordinates should have shape point x dimension.")
|
1946
|
+
if coordinates.shape[1] != self.data.ndim:
|
1947
|
+
raise ValueError(
|
1948
|
+
f"Expected coordinate dimension {self.data.ndim}, "
|
1949
|
+
f"got {coordinates.shape[1]}."
|
1950
|
+
)
|
1951
|
+
in_box = np.logical_and(
|
1952
|
+
coordinates < np.array(self.shape), coordinates >= 0
|
1953
|
+
).min(axis=1)
|
1954
|
+
|
1955
|
+
out_of_box = np.invert(in_box)
|
1956
|
+
if out_of_box.sum() > 0:
|
1957
|
+
print(coordinates[out_of_box, :])
|
1958
|
+
raise ValueError("Coordinates outside of self.data detected.")
|
1959
|
+
|
1960
|
+
for index in range(coordinates.shape[0]):
|
1961
|
+
point = coordinates[index, :]
|
1962
|
+
start = np.maximum(point - 1, 0)
|
1963
|
+
stop = np.minimum(point + 2, self.data.shape)
|
1964
|
+
slc = tuple(slice(*coords) for coords in zip(start, stop))
|
1965
|
+
|
1966
|
+
inner_facing = np.array(np.where(self.data[slc] > self.data[tuple(point)]))
|
1967
|
+
if inner_facing.size == 0:
|
1968
|
+
normal_vectors.append(np.zeros_like(point))
|
1969
|
+
continue
|
1970
|
+
inner_facing -= np.ones_like(point)[:, None]
|
1971
|
+
inner_facing = inner_facing.sum(axis=1)
|
1972
|
+
inner_facing = inner_facing / np.linalg.norm(inner_facing)
|
1973
|
+
normal_vectors.append(inner_facing)
|
1974
|
+
|
1975
|
+
return np.array(normal_vectors)
|
1976
|
+
|
1977
|
+
def core_mask(self) -> NDArray:
|
1978
|
+
"""
|
1979
|
+
Calculates the weighted core mask of the current class instance.
|
1980
|
+
|
1981
|
+
Core mask is calculated by performing binary erosion on the internal
|
1982
|
+
data array in an iterative fashion until no non-zero data elements remain.
|
1983
|
+
In each iteration, all data elements larger than zero are incremented by one
|
1984
|
+
in a mask with same shape as the internal data array. Therefore,
|
1985
|
+
data elements in the output array with a value of n remained non-zero for
|
1986
|
+
n rounds of binary erosion. The higher the value, the more likely a data element
|
1987
|
+
is part of the core of the density map.
|
1988
|
+
|
1989
|
+
Returns
|
1990
|
+
-------
|
1991
|
+
NDArray
|
1992
|
+
An array with same shape as internal data array. Values contained
|
1993
|
+
indicate how many rounds of binary erosion were necessary to nullify
|
1994
|
+
a given data element.
|
1995
|
+
|
1996
|
+
References
|
1997
|
+
----------
|
1998
|
+
.. [1] Gydo Zundert and Alexandre Bonvin. Fast and sensitive rigid-body
|
1999
|
+
fitting into cryo-em density maps with powerfit. AIMS Biophysics,
|
2000
|
+
2:73–87, 04 2015. doi:10.3934/biophy.2015.2.73
|
2001
|
+
"""
|
2002
|
+
core_indices = np.zeros(self.shape)
|
2003
|
+
eroded_mask = self.data > 0
|
2004
|
+
while eroded_mask.sum() > 0:
|
2005
|
+
core_indices += eroded_mask
|
2006
|
+
eroded_mask = binary_erosion(eroded_mask)
|
2007
|
+
return core_indices
|
2008
|
+
|
2009
|
+
@staticmethod
|
2010
|
+
def center_of_mass(arr: NDArray, cutoff: float = None) -> NDArray:
|
2011
|
+
"""
|
2012
|
+
Computes the center of mass of a numpy ndarray instance using all available
|
2013
|
+
elements. For template matching it typically makes sense to only input
|
2014
|
+
positive densities.
|
2015
|
+
|
2016
|
+
Parameters
|
2017
|
+
----------
|
2018
|
+
arr : NDArray
|
2019
|
+
Array to compute the center of mass of.
|
2020
|
+
cutoff : float, optional
|
2021
|
+
Densities less than or equal to cutoff are nullified for center
|
2022
|
+
of mass computation. By default considers all values.
|
2023
|
+
|
2024
|
+
Returns
|
2025
|
+
-------
|
2026
|
+
NDArray
|
2027
|
+
Center of mass with shape (arr.ndim).
|
2028
|
+
"""
|
2029
|
+
cutoff = arr.min() - 1 if cutoff is None else cutoff
|
2030
|
+
arr = np.where(arr > cutoff, arr, 0)
|
2031
|
+
denominator = np.sum(arr)
|
2032
|
+
grids = np.ogrid[tuple(slice(0, i) for i in arr.shape)]
|
2033
|
+
|
2034
|
+
center_of_mass = np.array(
|
2035
|
+
[
|
2036
|
+
np.sum(np.multiply(arr, grids[dim].astype(float))) / denominator
|
2037
|
+
for dim in range(arr.ndim)
|
2038
|
+
]
|
2039
|
+
)
|
2040
|
+
|
2041
|
+
return center_of_mass
|
2042
|
+
|
2043
|
+
@classmethod
|
2044
|
+
def match_densities(
|
2045
|
+
cls,
|
2046
|
+
target: "Density",
|
2047
|
+
template: "Density",
|
2048
|
+
cutoff_target: float = 0,
|
2049
|
+
cutoff_template: float = 0,
|
2050
|
+
scoring_method: str = "NormalizedCrossCorrelation",
|
2051
|
+
) -> Tuple["Density", NDArray, NDArray, NDArray]:
|
2052
|
+
"""
|
2053
|
+
Aligns two :py:class:`Density` instances target and template and returns
|
2054
|
+
the aligned template.
|
2055
|
+
|
2056
|
+
If voxel sizes of target and template dont match coordinates are scaled
|
2057
|
+
to the numerically smaller voxel size. Instances are prealigned based on their
|
2058
|
+
center of mass. Finally :py:class:`tme.matching_optimization.FitRefinement` is
|
2059
|
+
used to determine translation and rotation to map template to target.
|
2060
|
+
|
2061
|
+
Parameters
|
2062
|
+
----------
|
2063
|
+
target : Density
|
2064
|
+
The target map for alignment.
|
2065
|
+
template : Density
|
2066
|
+
The template that should be aligned to the target.
|
2067
|
+
cutoff_target : float, optional
|
2068
|
+
The cutoff value for the target map, by default 0.
|
2069
|
+
cutoff_template : float, optional
|
2070
|
+
The cutoff value for the template map, by default 0.
|
2071
|
+
scoring_method : str, optional
|
2072
|
+
The scoring method to use for alignment. See
|
2073
|
+
:py:class:`tme.matching_optimization.FitRefinement` for available methods,
|
2074
|
+
by default "NormalizedCrossCorrelation".
|
2075
|
+
|
2076
|
+
Returns
|
2077
|
+
-------
|
2078
|
+
Tuple
|
2079
|
+
Tuple containing template aligned to target as :py:class:`Density` object,
|
2080
|
+
translation in voxels and rotation matrix used for the transformation.
|
2081
|
+
|
2082
|
+
Notes
|
2083
|
+
-----
|
2084
|
+
No densities below cutoff_template are present in the returned Density object.
|
2085
|
+
"""
|
2086
|
+
target_sampling_rate = np.array(target.sampling_rate)
|
2087
|
+
template_sampling_rate = np.array(template.sampling_rate)
|
2088
|
+
|
2089
|
+
target_sampling_rate = np.repeat(
|
2090
|
+
target_sampling_rate, target.data.ndim // target_sampling_rate.size
|
2091
|
+
)
|
2092
|
+
template_sampling_rate = np.repeat(
|
2093
|
+
template_sampling_rate, template.data.ndim // template_sampling_rate.size
|
2094
|
+
)
|
2095
|
+
|
2096
|
+
if not np.allclose(target_sampling_rate, template_sampling_rate):
|
2097
|
+
print(
|
2098
|
+
"Voxel size of target and template do not match. "
|
2099
|
+
"Using smaller voxel size for refinement."
|
2100
|
+
)
|
2101
|
+
|
2102
|
+
target_coordinates = target.to_pointcloud(cutoff_target)
|
2103
|
+
target_weights = target.data[tuple(target_coordinates)]
|
2104
|
+
|
2105
|
+
template_coordinates = template.to_pointcloud(cutoff_template)
|
2106
|
+
template_weights = template.data[tuple(template_coordinates)]
|
2107
|
+
|
2108
|
+
refinement_sampling_rate = np.minimum(
|
2109
|
+
target_sampling_rate, template_sampling_rate
|
2110
|
+
)
|
2111
|
+
target_scaling = np.divide(target_sampling_rate, refinement_sampling_rate)
|
2112
|
+
template_scaling = np.divide(template_sampling_rate, refinement_sampling_rate)
|
2113
|
+
target_coordinates = target_coordinates * target_scaling[:, None]
|
2114
|
+
template_coordinates = template_coordinates * template_scaling[:, None]
|
2115
|
+
|
2116
|
+
target_mass_center = cls.center_of_mass(target.data, cutoff_target)
|
2117
|
+
template_mass_center = cls.center_of_mass(template.data, cutoff_template)
|
2118
|
+
mass_center_difference = np.subtract(
|
2119
|
+
target_mass_center, template_mass_center
|
2120
|
+
).astype(int)
|
2121
|
+
template_coordinates += mass_center_difference[:, None]
|
2122
|
+
|
2123
|
+
matcher = FitRefinement()
|
2124
|
+
translation, rotation_matrix, score = matcher.refine(
|
2125
|
+
target_coordinates=target_coordinates,
|
2126
|
+
template_coordinates=template_coordinates,
|
2127
|
+
target_weights=target_weights,
|
2128
|
+
template_weights=template_weights,
|
2129
|
+
scoring_class=scoring_method,
|
2130
|
+
sampling_rate=np.ones(template.data.ndim),
|
2131
|
+
)
|
2132
|
+
|
2133
|
+
translation += mass_center_difference
|
2134
|
+
translation = np.divide(translation, template_scaling)
|
2135
|
+
|
2136
|
+
template.sampling_rate = template_sampling_rate.copy()
|
2137
|
+
ret = template.rigid_transform(
|
2138
|
+
rotation_matrix=rotation_matrix, use_geometric_center=False
|
2139
|
+
)
|
2140
|
+
ret.origin = target.origin.copy()
|
2141
|
+
ret.origin = ret.origin + np.multiply(translation, target_sampling_rate)
|
2142
|
+
|
2143
|
+
return ret, translation, rotation_matrix
|
2144
|
+
|
2145
|
+
@classmethod
|
2146
|
+
def match_structure_to_density(
|
2147
|
+
cls,
|
2148
|
+
target: "Density",
|
2149
|
+
template: "Structure",
|
2150
|
+
cutoff_target: float = 0,
|
2151
|
+
scoring_method: str = "NormalizedCrossCorrelation",
|
2152
|
+
) -> Tuple["Structure", NDArray, NDArray]:
|
2153
|
+
"""
|
2154
|
+
Aligns a :py:class:`tme.structure.Structure` template to :py:class:`Density`
|
2155
|
+
target and returns an aligned :py:class:`tme.structure.Structure` instance.
|
2156
|
+
|
2157
|
+
If voxel sizes of target and template dont match coordinates are scaled
|
2158
|
+
to the numerically smaller voxel size. Prealignment is done by center's
|
2159
|
+
of mass. Finally :py:class:`tme.matching_optimization.FitRefinement` is used to
|
2160
|
+
determine translation and rotation to match a template to target.
|
2161
|
+
|
2162
|
+
Parameters
|
2163
|
+
----------
|
2164
|
+
target : Density
|
2165
|
+
The target map for template matching.
|
2166
|
+
template : Structure
|
2167
|
+
The template that should be aligned to the target.
|
2168
|
+
cutoff_target : float, optional
|
2169
|
+
The cutoff value for the target map, by default 0.
|
2170
|
+
cutoff_template : float, optional
|
2171
|
+
The cutoff value for the template map, by default 0.
|
2172
|
+
scoring_method : str, optional
|
2173
|
+
The scoring method to use for template matching. See
|
2174
|
+
:py:class:`tme.matching_optimization.FitRefinement` for available methods,
|
2175
|
+
by default "NormalizedCrossCorrelation".
|
2176
|
+
|
2177
|
+
Returns
|
2178
|
+
-------
|
2179
|
+
Structure
|
2180
|
+
Tuple containing template aligned to target as
|
2181
|
+
:py:class:`tme.structure.Structure` object, translation and rotation
|
2182
|
+
matrix used for the transformation.
|
2183
|
+
|
2184
|
+
Notes
|
2185
|
+
-----
|
2186
|
+
Translation and rotation are in xyz format, different from
|
2187
|
+
:py:meth:`match_densities`, which is zyx.
|
2188
|
+
"""
|
2189
|
+
template_density = cls.from_structure(
|
2190
|
+
filename_or_structure=template, sampling_rate=target.sampling_rate
|
2191
|
+
)
|
2192
|
+
|
2193
|
+
ret, translation, rotation_matrix = cls.match_densities(
|
2194
|
+
target=target,
|
2195
|
+
template=template_density,
|
2196
|
+
cutoff_target=cutoff_target,
|
2197
|
+
cutoff_template=0,
|
2198
|
+
scoring_method=scoring_method,
|
2199
|
+
)
|
2200
|
+
out = template.copy()
|
2201
|
+
final_translation = np.add(
|
2202
|
+
-template_density.origin,
|
2203
|
+
np.multiply(translation, template_density.sampling_rate),
|
2204
|
+
)
|
2205
|
+
|
2206
|
+
# Atom coordinates are in xyz
|
2207
|
+
final_translation = final_translation[::-1]
|
2208
|
+
rotation_matrix = rotation_matrix[::-1, ::-1]
|
2209
|
+
|
2210
|
+
out.rigid_transform(
|
2211
|
+
translation=final_translation, rotation_matrix=rotation_matrix
|
2212
|
+
)
|
2213
|
+
|
2214
|
+
return out, final_translation, rotation_matrix
|
2215
|
+
|
2216
|
+
@staticmethod
|
2217
|
+
def align_coordinate_systems(target: "Density", template: "Density") -> "Density":
|
2218
|
+
"""
|
2219
|
+
Aligns the coordinate system of `target` and `template`.
|
2220
|
+
|
2221
|
+
Parameters
|
2222
|
+
----------
|
2223
|
+
target : Density
|
2224
|
+
The target density whose coordinate system should remain unchanged.
|
2225
|
+
template : Density
|
2226
|
+
The template density that will be aligned to match the target's
|
2227
|
+
coordinate system.
|
2228
|
+
|
2229
|
+
Raises
|
2230
|
+
------
|
2231
|
+
ValueError
|
2232
|
+
If the `sampling_rate` of `target` and `template` do not match.
|
2233
|
+
|
2234
|
+
Returns
|
2235
|
+
-------
|
2236
|
+
Density
|
2237
|
+
A copy of `template` aligned to the coordinate system of `target`.
|
2238
|
+
The `box_size` and `origin` will match that of `target`.
|
2239
|
+
|
2240
|
+
See Also
|
2241
|
+
--------
|
2242
|
+
:py:meth:`Density.match_densities` : To match aligned template to target.
|
2243
|
+
"""
|
2244
|
+
if not np.allclose(target.sampling_rate, template.sampling_rate):
|
2245
|
+
raise ValueError("sampling_rate of both maps have to match.")
|
2246
|
+
|
2247
|
+
template = template.copy()
|
2248
|
+
template.pad(target.shape, center=True)
|
2249
|
+
|
2250
|
+
origin_difference = np.divide(
|
2251
|
+
np.subtract(template.origin, target.origin), target.sampling_rate
|
2252
|
+
)
|
2253
|
+
template = template.rigid_transform(
|
2254
|
+
rotation_matrix=np.eye(template.data.ndim), translation=origin_difference
|
2255
|
+
)
|
2256
|
+
template.origin = target.origin.copy()
|
2257
|
+
return template
|
2258
|
+
|
2259
|
+
@staticmethod
|
2260
|
+
def fourier_shell_correlation(density1: "Density", density2: "Density") -> NDArray:
|
2261
|
+
"""
|
2262
|
+
Computes the Fourier Shell Correlation (FSC) between two instances of `Density`.
|
2263
|
+
|
2264
|
+
The Fourier transforms of the input maps are divided into shells
|
2265
|
+
based on their spatial frequency. The correlation between corresponding shells
|
2266
|
+
in the two maps is computed to give the FSC.
|
2267
|
+
|
2268
|
+
Parameters
|
2269
|
+
----------
|
2270
|
+
density1 : Density
|
2271
|
+
An instance of `Density` class for the first map for comparison.
|
2272
|
+
density2 : Density
|
2273
|
+
An instance of `Density` class for the second map for comparison.
|
2274
|
+
|
2275
|
+
Returns
|
2276
|
+
-------
|
2277
|
+
NDArray
|
2278
|
+
An array of shape (N, 2), where N is the number of shells,
|
2279
|
+
the first column represents the spatial frequency for each shell
|
2280
|
+
and the second column represents the corresponding FSC.
|
2281
|
+
|
2282
|
+
References
|
2283
|
+
----------
|
2284
|
+
.. [1] https://github.com/tdgrant1/denss/blob/master/saxstats/saxstats.py
|
2285
|
+
"""
|
2286
|
+
side = density1.data.shape[0]
|
2287
|
+
df = 1.0 / side
|
2288
|
+
|
2289
|
+
qx_ = np.fft.fftfreq(side) * side * df
|
2290
|
+
qx, qy, qz = np.meshgrid(qx_, qx_, qx_, indexing="ij")
|
2291
|
+
qr = np.sqrt(qx**2 + qy**2 + qz**2)
|
2292
|
+
|
2293
|
+
qmax = np.max(qr)
|
2294
|
+
qstep = np.min(qr[qr > 0])
|
2295
|
+
nbins = int(qmax / qstep)
|
2296
|
+
qbins = np.linspace(0, nbins * qstep, nbins + 1)
|
2297
|
+
qbin_labels = np.searchsorted(qbins, qr, "right") - 1
|
2298
|
+
|
2299
|
+
F1 = np.fft.fftn(density1.data)
|
2300
|
+
F2 = np.fft.fftn(density2.data)
|
2301
|
+
|
2302
|
+
qbin_labels = qbin_labels.reshape(-1)
|
2303
|
+
numerator = np.bincount(
|
2304
|
+
qbin_labels, weights=np.real(F1 * np.conj(F2)).reshape(-1)
|
2305
|
+
)
|
2306
|
+
term1 = np.bincount(qbin_labels, weights=np.abs(F1).reshape(-1) ** 2)
|
2307
|
+
term2 = np.bincount(qbin_labels, weights=np.abs(F2).reshape(-1) ** 2)
|
2308
|
+
np.multiply(term1, term2, out=term1)
|
2309
|
+
denominator = np.sqrt(term1)
|
2310
|
+
FSC = np.divide(numerator, denominator)
|
2311
|
+
|
2312
|
+
qidx = np.where(qbins < qx.max())
|
2313
|
+
|
2314
|
+
return np.vstack((qbins[qidx], FSC[qidx])).T
|