pytme 0.1.5__cp311-cp311-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. pytme-0.1.5.data/scripts/estimate_ram_usage.py +81 -0
  2. pytme-0.1.5.data/scripts/match_template.py +744 -0
  3. pytme-0.1.5.data/scripts/postprocess.py +279 -0
  4. pytme-0.1.5.data/scripts/preprocess.py +93 -0
  5. pytme-0.1.5.data/scripts/preprocessor_gui.py +729 -0
  6. pytme-0.1.5.dist-info/LICENSE +153 -0
  7. pytme-0.1.5.dist-info/METADATA +69 -0
  8. pytme-0.1.5.dist-info/RECORD +63 -0
  9. pytme-0.1.5.dist-info/WHEEL +5 -0
  10. pytme-0.1.5.dist-info/entry_points.txt +6 -0
  11. pytme-0.1.5.dist-info/top_level.txt +2 -0
  12. scripts/__init__.py +0 -0
  13. scripts/estimate_ram_usage.py +81 -0
  14. scripts/match_template.py +744 -0
  15. scripts/match_template_devel.py +788 -0
  16. scripts/postprocess.py +279 -0
  17. scripts/preprocess.py +93 -0
  18. scripts/preprocessor_gui.py +729 -0
  19. tme/__init__.py +6 -0
  20. tme/__version__.py +1 -0
  21. tme/analyzer.py +1144 -0
  22. tme/backends/__init__.py +134 -0
  23. tme/backends/cupy_backend.py +309 -0
  24. tme/backends/matching_backend.py +1154 -0
  25. tme/backends/npfftw_backend.py +763 -0
  26. tme/backends/pytorch_backend.py +526 -0
  27. tme/data/__init__.py +0 -0
  28. tme/data/c48n309.npy +0 -0
  29. tme/data/c48n527.npy +0 -0
  30. tme/data/c48n9.npy +0 -0
  31. tme/data/c48u1.npy +0 -0
  32. tme/data/c48u1153.npy +0 -0
  33. tme/data/c48u1201.npy +0 -0
  34. tme/data/c48u1641.npy +0 -0
  35. tme/data/c48u181.npy +0 -0
  36. tme/data/c48u2219.npy +0 -0
  37. tme/data/c48u27.npy +0 -0
  38. tme/data/c48u2947.npy +0 -0
  39. tme/data/c48u3733.npy +0 -0
  40. tme/data/c48u4749.npy +0 -0
  41. tme/data/c48u5879.npy +0 -0
  42. tme/data/c48u7111.npy +0 -0
  43. tme/data/c48u815.npy +0 -0
  44. tme/data/c48u83.npy +0 -0
  45. tme/data/c48u8649.npy +0 -0
  46. tme/data/c600v.npy +0 -0
  47. tme/data/c600vc.npy +0 -0
  48. tme/data/metadata.yaml +80 -0
  49. tme/data/quat_to_numpy.py +42 -0
  50. tme/data/scattering_factors.pickle +0 -0
  51. tme/density.py +2314 -0
  52. tme/extensions.cpython-311-darwin.so +0 -0
  53. tme/helpers.py +881 -0
  54. tme/matching_data.py +377 -0
  55. tme/matching_exhaustive.py +1553 -0
  56. tme/matching_memory.py +382 -0
  57. tme/matching_optimization.py +1123 -0
  58. tme/matching_utils.py +1180 -0
  59. tme/parser.py +429 -0
  60. tme/preprocessor.py +1291 -0
  61. tme/scoring.py +866 -0
  62. tme/structure.py +1428 -0
  63. tme/types.py +10 -0
tme/density.py ADDED
@@ -0,0 +1,2314 @@
1
+ """ Implements class to represent electron density maps.
2
+
3
+ Copyright (c) 2023 European Molecular Biology Laboratory
4
+
5
+ Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
+ """
7
+
8
+ import warnings
9
+ from io import BytesIO
10
+ from copy import deepcopy
11
+ from gzip import open as gzip_open
12
+ from typing import Tuple, Dict, Set
13
+ from os.path import splitext, basename
14
+
15
+ import mrcfile
16
+ import numpy as np
17
+ import skimage.io as skio
18
+
19
+ from scipy.ndimage import (
20
+ laplace,
21
+ generic_gradient_magnitude,
22
+ minimum_filter,
23
+ sobel,
24
+ binary_erosion,
25
+ zoom,
26
+ )
27
+ from scipy.spatial import ConvexHull
28
+
29
+ from .matching_optimization import FitRefinement
30
+ from .structure import Structure
31
+ from .matching_utils import (
32
+ minimum_enclosing_box,
33
+ array_to_memmap,
34
+ memmap_to_array,
35
+ )
36
+ from .types import NDArray
37
+ from .helpers import is_gzipped
38
+ from .backends import NumpyFFTWBackend
39
+
40
+
41
+ class Density:
42
+ """
43
+ Contains electron density data and implements operations on it.
44
+
45
+ Parameters
46
+ ----------
47
+ data : NDArray
48
+ Electron density data.
49
+ origin : NDArray, optional
50
+ Origin of the coordinate system. Defaults to zero.
51
+ sampling_rate : NDArray, optional
52
+ Sampling rate along data axis. Defaults to one.
53
+ metadata : dict, optional
54
+ Dictionary with metadata information, empty by default.
55
+
56
+ Raises
57
+ ------
58
+ ValueError
59
+ The metadata parameter is not a dictionary.
60
+
61
+ Examples
62
+ --------
63
+ The following achieves the minimal definition of a :py:class:`Density` instance.
64
+
65
+ >>> import numpy as np
66
+ >>> from tme import Density
67
+ >>> data = np.random.rand(50,70,40)
68
+ >>> Density(data = data)
69
+
70
+ Optional parameters are ``origin`` and ``sampling_rate`` that correspond
71
+ to the coordinate system reference and the edge length per axis element,
72
+ as well as the ``metadata`` dictionary. By default,
73
+ :py:attr:`Density.origin` is set to zero and :py:attr:`Density.sampling_rate`
74
+ to 1. If provided, origin or sampling_rate either need to be a single value:
75
+
76
+ >>> Density(data = data, origin = 0, sampling_rate = 1)
77
+
78
+ Be specified along each data axis:
79
+
80
+ >>> Density(data = data, origin = (0, 0, 0), sampling_rate = (1.5, 1.1, 1.2))
81
+
82
+ Or a combination of both:
83
+
84
+ >>> Density(data = data, origin = 0, sampling_rate = (1.5, 1.1, 1.2))
85
+ """
86
+
87
+ def __init__(
88
+ self,
89
+ data: NDArray,
90
+ origin: NDArray = None,
91
+ sampling_rate: NDArray = None,
92
+ metadata: Dict = {},
93
+ ):
94
+ origin = np.zeros(data.ndim) if origin is None else origin
95
+ sampling_rate = 1 if sampling_rate is None else sampling_rate
96
+ origin, sampling_rate = np.asarray(origin), np.asarray(sampling_rate)
97
+ origin = np.repeat(origin, data.ndim // origin.size)
98
+ sampling_rate = np.repeat(sampling_rate, data.ndim // sampling_rate.size)
99
+
100
+ if sampling_rate.size != data.ndim:
101
+ raise ValueError(
102
+ "sampling_rate size should be 1 or "
103
+ f"{data.ndim}, not {sampling_rate.size}."
104
+ )
105
+ if origin.size != data.ndim:
106
+ raise ValueError(f"Expected origin size : {data.ndim}, got {origin.size}.")
107
+ if type(metadata) != dict:
108
+ raise ValueError("Argument metadata has to be of class dict.")
109
+
110
+ self.data, self.origin, self.sampling_rate = data, origin, sampling_rate
111
+ self.metadata = metadata
112
+
113
+ def __repr__(self):
114
+ response = "Density object at {}\nOrigin: {}, sampling_rate: {}, Shape: {}"
115
+ return response.format(
116
+ hex(id(self)),
117
+ tuple(np.round(self.origin, 3)),
118
+ tuple(np.round(self.sampling_rate, 3)),
119
+ self.shape,
120
+ )
121
+
122
+ @classmethod
123
+ def from_file(
124
+ cls, filename: str, subset: Tuple[slice] = None, use_memmap: bool = False
125
+ ) -> "Density":
126
+ """
127
+ Reads in a file and converts it into :py:class:`Density` instance.
128
+
129
+ Parameters
130
+ ----------
131
+ filename : str
132
+ Path to a file in CCP4/MRC, EM or a format supported by skimage.io.imread.
133
+ The file can be gzip compressed.
134
+ subset : tuple of slices, optional
135
+ Slices representing the desired subset along each dimension.
136
+ use_memmap : bool, optional
137
+ Whether the Density objects data attribute should be memmory mapped.
138
+
139
+ Returns
140
+ -------
141
+ Density
142
+ An instance of the :py:class:`Density` class.
143
+
144
+ References
145
+ ----------
146
+ .. [1] Burnley T et al., Acta Cryst. D, 2017
147
+ .. [2] Nickell S. et al, Journal of Structural Biology, 2005.
148
+ .. [3] https://scikit-image.org/docs/stable/api/skimage.io.html
149
+
150
+ Examples
151
+ --------
152
+ :py:meth:`Density.from_file` reads files in CCP4/MRC, EM, or a format supported
153
+ by skimage.io.imread and converts them into a :py:class:`Density` instance. The
154
+ following outlines how to read a file in the CCP4/MRC format [1]_:
155
+
156
+ >>> from tme import Density
157
+ >>> Density.from_file("/path/to/file.mrc")
158
+
159
+ In some cases, you might want to read only a specific subset of the data.
160
+ This can be achieved by passing a tuple of slices to the ``subset`` parameter.
161
+ For example, to read only the first 50 voxels along each dimension:
162
+
163
+ >>> subset_slices = (slice(0, 50), slice(0, 50), slice(0, 50))
164
+ >>> Density.from_file("/path/to/file.mrc", subset=subset_slices)
165
+
166
+ For large density maps, memory mapping can be used to read the file directly
167
+ from disk without loading it entirely into memory. This is particularly useful
168
+ for large datasets or when working with limited memory resources:
169
+
170
+ >>> Density.from_file("/path/to/large_file.mrc", use_memmap=True)
171
+
172
+ Note that use_memmap will be ignored if the file is gzip compressed.
173
+
174
+ If the input file has an `.em` or `.em.gz` extension, it will automatically
175
+ be parsed as EM file [2]_.
176
+
177
+ >>> Density.from_file("/path/to/file.em")
178
+ >>> Density.from_file("/path/to/file.em.gz")
179
+
180
+ If the file format is not CCP4/MRC or EM, :py:meth:`Density.from_file` attempts
181
+ to use skimage.io.imread to read the file [3]_. This fallback does not extract
182
+ origin or sampling_rate information from the file:
183
+
184
+ >>> Density.from_file("/path/to/other_format.tif")
185
+
186
+ Notes
187
+ -----
188
+ If ``filename`` ends with ".em" or ".em.gz" the method will parse it as EM file.
189
+ Otherwise it defaults to the CCP4/MRC format and on failure, defaults to
190
+ skimage.io.imread regardless of the extension. Currently, the later does not
191
+ extract origin or sampling_rate information from the file.
192
+
193
+ See Also
194
+ --------
195
+ :py:meth:`Density.to_file`
196
+
197
+ """
198
+ try:
199
+ func = cls._load_mrc
200
+ if filename.endswith(".em") or filename.endswith(".em.gz"):
201
+ func = cls._load_em
202
+ data, origin, sampling_rate = func(
203
+ filename=filename, subset=subset, use_memmap=use_memmap
204
+ )
205
+ except ValueError:
206
+ data, origin, sampling_rate = cls._load_skio(filename=filename)
207
+ if subset is not None:
208
+ cls._validate_slices(slices=subset, shape=data.shape)
209
+ data = data[subset].copy()
210
+
211
+ return cls(data=data, origin=origin, sampling_rate=sampling_rate)
212
+
213
+ @classmethod
214
+ def _load_mrc(
215
+ cls, filename: str, subset: Tuple[int] = None, use_memmap: bool = False
216
+ ) -> Tuple[NDArray]:
217
+ """
218
+ Extracts data from a CCP4/MRC file.
219
+
220
+ Parameters
221
+ ----------
222
+ filename : str
223
+ Path to a file in CCP4/MRC format.
224
+ subset : tuple of slices, optional
225
+ Slices representing the desired subset along each dimension.
226
+ use_memmap : bool, optional
227
+ Whether the Density objects data attribute should be memmory mapped.
228
+
229
+ Returns
230
+ -------
231
+ NDArray
232
+ The data attribute of the CCP4/MRC file.
233
+ NDArray
234
+ The coordinate origin of the data.
235
+ NDArray
236
+ The sampling rate of the data.
237
+
238
+ References
239
+ ----------
240
+ .. [1] Burnley T, Palmer C & Winn M (2017) Recent developments in the
241
+ CCP-EM software suite. Acta Cryst. D73:469–477.
242
+ doi: 10.1107/S2059798317007859
243
+
244
+ Raises
245
+ ------
246
+ ValueError
247
+ If the mrcfile is malformatted.
248
+ If the subset starts below zero, exceeds the data dimension or does not
249
+ have the same length as the data dimensions.
250
+
251
+ See Also
252
+ --------
253
+ :py:meth:`Density.from_file`
254
+
255
+ """
256
+ with mrcfile.open(filename, header_only=True) as mrc:
257
+ data_shape = mrc.header.nz, mrc.header.ny, mrc.header.nx
258
+ data_type = mrcfile.utils.data_dtype_from_header(mrc.header)
259
+
260
+ # All map related parameters should be in zyx order
261
+ origin = (
262
+ mrc.header["origin"]
263
+ .astype([("x", "<f4"), ("y", "<f4"), ("z", "<f4")])
264
+ .view(("<f4", 3))
265
+ )
266
+ origin = origin[::-1]
267
+
268
+ # nx := column; ny := row; nz := section
269
+ start = np.array(
270
+ [
271
+ int(mrc.header["nxstart"]),
272
+ int(mrc.header["nystart"]),
273
+ int(mrc.header["nzstart"]),
274
+ ]
275
+ )
276
+
277
+ crs_index = (
278
+ np.array(
279
+ [
280
+ int(mrc.header["mapc"]),
281
+ int(mrc.header["mapr"]),
282
+ int(mrc.header["maps"]),
283
+ ]
284
+ )
285
+ - 1
286
+ )
287
+
288
+ # mapc := column; mapr := row; maps := section;
289
+ if not (0 in crs_index and 1 in crs_index and 2 in crs_index):
290
+ raise ValueError(f"Malformatted CRS array in {filename}")
291
+
292
+ sampling_rate = mrc.voxel_size.astype(
293
+ [("x", "<f4"), ("y", "<f4"), ("z", "<f4")]
294
+ ).view(("<f4", 3))
295
+ sampling_rate = sampling_rate[::-1]
296
+ sampling_rate = np.array(sampling_rate)
297
+
298
+ if np.all(origin == start):
299
+ pass
300
+ elif np.all(origin == 0) and not np.all(start == 0):
301
+ origin = np.multiply(start, sampling_rate)
302
+ elif np.all(
303
+ np.abs(origin.astype(int))
304
+ != np.abs((start * sampling_rate).astype(int))
305
+ ) and not np.all(start == 0):
306
+ origin = np.multiply(start, sampling_rate)
307
+
308
+ if is_gzipped(filename):
309
+ if use_memmap:
310
+ warnings.warn(
311
+ f"Cannot open gzipped file {filename} as memmap."
312
+ f" Please gunzip {filename} to use memmap functionality."
313
+ )
314
+ use_memmap = False
315
+
316
+ if subset is not None:
317
+ subset_shape = [x.stop - x.start for x in subset]
318
+ if np.allclose(subset_shape, data_shape):
319
+ return cls._load_mrc(
320
+ filename=filename, subset=None, use_memmap=use_memmap
321
+ )
322
+
323
+ data = cls._read_binary_subset(
324
+ filename=filename,
325
+ slices=subset,
326
+ data_shape=data_shape,
327
+ dtype=data_type,
328
+ header_size=1024,
329
+ )
330
+ return data, origin, sampling_rate
331
+
332
+ if not use_memmap:
333
+ with mrcfile.open(filename, header_only=False) as mrc:
334
+ data = mrc.data.astype(np.float32, copy=False)
335
+ else:
336
+ with mrcfile.mrcmemmap.MrcMemmap(filename, header_only=False) as mrc:
337
+ data = mrc.data
338
+
339
+ if not np.all(crs_index == (0, 1, 2)):
340
+ data.setflags(write=True)
341
+ data = np.transpose(data, crs_index)
342
+ start = np.take(start, crs_index)
343
+
344
+ return data, origin, sampling_rate
345
+
346
+ @classmethod
347
+ def _load_em(
348
+ cls, filename: str, subset: Tuple[int] = None, use_memmap: bool = False
349
+ ) -> Tuple[NDArray]:
350
+ """
351
+ Extracts data from a EM file.
352
+
353
+ Parameters
354
+ ----------
355
+ filename : str
356
+ Path to a file in EM format.
357
+ subset : tuple of slices, optional
358
+ Slices representing the desired subset along each dimension.
359
+ use_memmap : bool, optional
360
+ Whether the Density objects data attribute should be memmory mapped.
361
+
362
+ Returns
363
+ -------
364
+ NDArray
365
+ The data attribute of the EM file.
366
+ NDArray
367
+ The coordinate origin of the data.
368
+ NDArray
369
+ The sampling rate of the data.
370
+
371
+ References
372
+ ----------
373
+ .. [1] Nickell S. et al, Journal of Structural Biology, 2005.
374
+
375
+ Warns
376
+ -----
377
+ Warns if the pixel size is zero.
378
+
379
+ Notes
380
+ -----
381
+ A pixel size of zero will be treated as missing value and changed to one. This
382
+ function does not yet extract an origin like :py:meth:`Density._load_mrc`.
383
+
384
+ See Also
385
+ --------
386
+ :py:meth:`Density.from_file`
387
+ """
388
+ DATA_TYPE_CODING = {
389
+ 1: np.byte,
390
+ 2: np.int16,
391
+ 3: np.int32,
392
+ 5: np.float32,
393
+ 6: np.float64,
394
+ 8: np.complex64,
395
+ 9: np.complex128,
396
+ }
397
+
398
+ func = gzip_open if is_gzipped(filename) else open
399
+ with func(filename, mode="rb") as f:
400
+ if is_gzipped(filename):
401
+ f = BytesIO(f.read())
402
+
403
+ f.seek(3, 1)
404
+ data_type_code = np.frombuffer(f.read(1), dtype="<i1")[0]
405
+ data_type = DATA_TYPE_CODING.get(data_type_code)
406
+
407
+ data_shape = np.frombuffer(f.read(3 * 4), dtype="<i4")[::-1]
408
+
409
+ f.seek(80, 1)
410
+ user_params = np.frombuffer(f.read(40 * 4), dtype="<i4")
411
+
412
+ pixel_size = user_params[6] / 1000.0
413
+ f.seek(256, 1)
414
+
415
+ if use_memmap and subset is None:
416
+ data = np.memmap(f, dtype=data_type, mode="r", offset=f.tell()).reshape(
417
+ data_shape
418
+ )
419
+ elif subset is None:
420
+ data_size = np.prod(data_shape) * np.dtype(data_type).itemsize
421
+ data = np.frombuffer(f.read(data_size), dtype=data_type).reshape(
422
+ data_shape
423
+ )
424
+ data = data.astype(np.float32)
425
+ else:
426
+ subset_shape = [x.stop - x.start for x in subset]
427
+ if np.allclose(subset_shape, data_shape):
428
+ return cls._load_em(
429
+ filename=filename, subset=None, use_memmap=use_memmap
430
+ )
431
+
432
+ data = cls._read_binary_subset(
433
+ filename=filename,
434
+ slices=subset,
435
+ data_shape=data_shape,
436
+ dtype=data_type(),
437
+ header_size=f.tell(),
438
+ )
439
+
440
+ origin = np.zeros(3, dtype=data.dtype)
441
+
442
+ if pixel_size == 0:
443
+ warnings.warn(
444
+ f"Got invalid sampling rate {pixel_size}, overwriting it to 1."
445
+ )
446
+ pixel_size = 1
447
+ sampling_rate = np.repeat(pixel_size, data.ndim).astype(data.dtype)
448
+
449
+ return data, origin, sampling_rate
450
+
451
+ @staticmethod
452
+ def _validate_slices(slices: Tuple[slice], shape: Tuple[int]):
453
+ """
454
+ Validate whether the given slices fit within the provided data shape.
455
+
456
+ Parameters
457
+ ----------
458
+ slices : Tuple[slice]
459
+ A tuple of slice objects, one per dimension of the data.
460
+ shape : Tuple[int]
461
+ The shape of the data being sliced, as a tuple of integers.
462
+
463
+ Raises
464
+ ------
465
+ ValueError
466
+ - If the length of `slices` doesn't match the dimension of shape.
467
+ - If any slice has a stop value exceeding any dimension in shape.
468
+ - If any slice has a stop value that is negative.
469
+ """
470
+
471
+ n_dims = len(shape)
472
+ if len(slices) != n_dims:
473
+ raise ValueError(
474
+ f"Expected length of slices : {n_dims}, got : {len(slices)}"
475
+ )
476
+
477
+ if any([slices[i].stop > shape[i] for i in range(n_dims)]):
478
+ raise ValueError(f"Subset exceeds data dimensions ({shape}).")
479
+
480
+ if any([slices[i].stop < 0 for i in range(n_dims)]):
481
+ raise ValueError("Subsets have to be non-negative.")
482
+
483
+ @classmethod
484
+ def _read_binary_subset(
485
+ cls,
486
+ filename: str,
487
+ slices: Tuple[slice],
488
+ data_shape: Tuple[int],
489
+ dtype: type,
490
+ header_size: int,
491
+ ) -> NDArray:
492
+ """
493
+ Read a subset of data from a binary file with a header.
494
+
495
+ Parameters
496
+ ----------
497
+ filename : str
498
+ Path to the binary file.
499
+ slices : tuple of slice objects
500
+ Slices representing the desired subset in each dimension.
501
+ data_shape : tuple of ints
502
+ Shape of the complete dataset in the file.
503
+ dtype : numpy dtype
504
+ Data type of the dataset in the file.
505
+ header_size : int
506
+ Size of the file's header in bytes.
507
+
508
+ Returns
509
+ -------
510
+ NDArray
511
+ Subset of the dataset as specified by the slices.
512
+
513
+ Raises
514
+ ------
515
+ NotImplementedError
516
+ If the data is not three dimensional.
517
+
518
+ See Also
519
+ --------
520
+ :py:meth:`Density._load_mrc`
521
+ :py:meth:`Density._load_em`
522
+ """
523
+ n_dims = len(data_shape)
524
+ if n_dims != 3:
525
+ raise NotImplementedError("Only 3-dimensional data can be subsetted.")
526
+
527
+ cls._validate_slices(slices=slices, shape=data_shape)
528
+ bytes_per_item = dtype.itemsize
529
+
530
+ subset_shape = [s.stop - s.start for s in slices]
531
+ subset_data = np.empty(subset_shape, dtype=dtype)
532
+
533
+ row_bytes = (slices[2].stop - slices[2].start) * bytes_per_item
534
+ full_row_bytes = data_shape[2] * bytes_per_item
535
+ x_offset = slices[2].start * bytes_per_item
536
+
537
+ func = gzip_open if is_gzipped(filename) else open
538
+ with func(filename, mode="rb") as f:
539
+ if is_gzipped(filename):
540
+ f = BytesIO(f.read())
541
+
542
+ for z in range(slices[0].start, slices[0].stop):
543
+ base_offset_z = header_size + z * data_shape[1] * full_row_bytes
544
+
545
+ for y in range(slices[1].start, slices[1].stop):
546
+ offset = base_offset_z + y * full_row_bytes + x_offset
547
+ f.seek(offset)
548
+ row = np.frombuffer(f.read(row_bytes), dtype=dtype)
549
+ subset_data[z - slices[0].start, y - slices[1].start] = row
550
+
551
+ return subset_data
552
+
553
+ @staticmethod
554
+ def _load_skio(filename: str) -> Tuple[NDArray]:
555
+ """
556
+ Uses skimage.io.imread to extract data from filename.
557
+
558
+ Parameters
559
+ ----------
560
+ filename : str
561
+ Path to a file whose format is supported by skimage.io.imread.
562
+
563
+ Returns
564
+ -------
565
+ NDArray
566
+ The data attribute of the file.
567
+ NDArray
568
+ The coordinate origin of the data.
569
+ NDArray
570
+ The sampling rate of the data.
571
+
572
+ References
573
+ ----------
574
+ .. [1] https://scikit-image.org/docs/stable/api/skimage.io.html
575
+
576
+ Warns
577
+ -----
578
+ Warns that origin and sampling_rate are not yet extracted from ``filename``.
579
+
580
+ See Also
581
+ --------
582
+ :py:meth:`Density.from_file`
583
+ """
584
+ swap = filename
585
+ if is_gzipped(filename):
586
+ with gzip_open(filename, "rb") as infile:
587
+ swap = BytesIO(infile.read())
588
+
589
+ data = skio.imread(swap)
590
+ warnings.warn(
591
+ "origin and sampling_rate are not yet extracted from non CCP4/MRC files."
592
+ )
593
+ return data, np.zeros(data.ndim), np.ones(data.ndim)
594
+
595
+ @classmethod
596
+ def from_structure(
597
+ cls,
598
+ filename_or_structure: str,
599
+ shape: Tuple[int] = None,
600
+ sampling_rate: NDArray = np.ones(1),
601
+ origin: Tuple[float] = None,
602
+ weight_type: str = "atomic_weight",
603
+ scattering_args: Dict = dict(),
604
+ chain: str = None,
605
+ filter_by_elements: Set = None,
606
+ filter_by_residues: Set = None,
607
+ ) -> "Density":
608
+ """
609
+ Reads in an atomic structure and converts it into a :py:class:`Density`
610
+ instance.
611
+
612
+ Parameters
613
+ ----------
614
+ filename_or_structure : str or :py:class:`tme.structure.Structure`
615
+ Either :py:class:`tme.structure.Structure` instance or path to
616
+ structure file that can be read by
617
+ :py:meth:`tme.structure.Structure.from_file`.
618
+ shape : tuple of int, optional
619
+ Shape of the new :py:class:`Density` instance. By default,
620
+ computes the minimum 3D box holding all atoms.
621
+ sampling_rate : float, optional
622
+ Sampling rate of the output array along each axis, in the same unit
623
+ as the atoms in the structure. Defaults to one Ångstroms
624
+ per axis unit.
625
+ origin : tuple of float, optional
626
+ Origin of the coordinate system. If provided, its expected to be in
627
+ z, y, x form in the same unit as the atoms in the structure.
628
+ By default, computes origin as distance between minimal coordinate
629
+ and coordinate system origin.
630
+ weight_type : str, optional
631
+ Which weight should be given to individual atoms. For valid values
632
+ see :py:meth:`tme.structure.Structure.to_volume`.
633
+ chain : str, optional
634
+ The chain that should be extracted from the structure. If multiple chains
635
+ should be selected, they needto be a comma separated string,
636
+ e.g. 'A,B,CE'. If chain None, all chains are returned. Default is None.
637
+ filter_by_elements : set, optional
638
+ Set of atomic elements to keep. Default is all atoms.
639
+ filter_by_residues : set, optional
640
+ Set of residues to keep. Default is all residues.
641
+ scattering_args : dict, optional
642
+ Additional arguments for scattering factor computation.
643
+
644
+ Returns
645
+ -------
646
+ :py:class:`Density`
647
+ Newly created :py:class:`Density` instance.
648
+
649
+ References
650
+ ----------
651
+ .. [1] Sorzano, Carlos et al (Mar. 2015). Fast and accurate conversion
652
+ of atomic models into electron density maps. AIMS Biophysics
653
+ 2, 8–20.
654
+
655
+ Examples
656
+ --------
657
+ The following outlines the minimal parameters needed to read in an
658
+ atomic structure and convert it into a :py:class:`Density` instance. For
659
+ specification on supported formats refer to
660
+ :py:meth:`tme.structure.Structure.from_file`.
661
+
662
+ >>> path_to_structure = "/path/to/structure.cif"
663
+ >>> density = Density.from_structure(path_to_structure)
664
+
665
+ :py:meth:`Density.from_structure` will automatically determine the appropriate
666
+ density dimensions based on the structure. The origin will be computed as
667
+ minimal distance required to move the closest atom of the structure to the
668
+ coordinate system origin. Furthermore, all chains will be used and the atom
669
+ densities will be represented by their atomic weight and accumulated
670
+ on a per-voxel basis.
671
+
672
+ The following will read in chain A of an atomic structure and discretize
673
+ it on a grid of dimension 100 x 100 x 100 using a sampling rate of
674
+ 2.5 Angstrom per voxel.
675
+
676
+ >>> density = Density.from_structure(
677
+ >>> filename_or_structure = path_to_structure,
678
+ >>> shape = (100, 100, 100),
679
+ >>> sampling_rate = 2.5,
680
+ >>> chain = "A"
681
+ >>> )
682
+
683
+ We can restrict the generated py:class:`Density` instance to only contain
684
+ specific elements like carbon and nitrogen:
685
+
686
+ >>> density = Density.from_structure(
687
+ >>> filename_or_structure = path_to_structure,
688
+ >>> filter_by_elements = {"C", "N"}
689
+ >>> )
690
+
691
+ or specified residues such as polar amino acids:
692
+
693
+ >>> density = Density.from_structure(
694
+ >>> filename_or_structure = path_to_structure,
695
+ >>> filter_by_residues = {"SER", "THR", "CYS", "ASN", "GLN", "TYR"}
696
+ >>> )
697
+
698
+ :py:meth:`Density.from_structure` supports a variety of methods to convert
699
+ atoms into densities. In additino to 'atomic_weight', 'atomic_number',
700
+ 'van_der_waals_radius' its possible to use experimentally determined scattering
701
+ factors from various sources:
702
+
703
+ >>> density = Density.from_structure(
704
+ >>> filename_or_structure = path_to_structure,
705
+ >>> weight_type = "scattering_factors",
706
+ >>> scattering_args={"source": "dt1969"}
707
+ >>> )
708
+
709
+ or a lowpass filtered representation introduced in [1]_:
710
+
711
+ >>> density = Density.from_structure(
712
+ >>> filename_or_structure = path_to_structure,
713
+ >>> weight_type = "lowpass_scattering_factors",
714
+ >>> scattering_args={"source": "dt1969"}
715
+ >>> )
716
+
717
+ See Also
718
+ --------
719
+ :py:meth:`tme.structure.Structure.from_file`
720
+ :py:meth:`tme.structure.Structure.to_volume`
721
+ """
722
+ structure = filename_or_structure
723
+ if type(filename_or_structure) == str:
724
+ structure = Structure.from_file(
725
+ filename=filename_or_structure,
726
+ filter_by_elements=filter_by_elements,
727
+ filter_by_residues=filter_by_residues,
728
+ )
729
+
730
+ volume, origin, sampling_rate = structure.to_volume(
731
+ shape=shape,
732
+ sampling_rate=sampling_rate,
733
+ origin=origin,
734
+ chain=chain,
735
+ weight_type=weight_type,
736
+ scattering_args=scattering_args,
737
+ )
738
+
739
+ return cls(
740
+ data=volume,
741
+ origin=origin,
742
+ sampling_rate=sampling_rate,
743
+ metadata=structure.details.copy(),
744
+ )
745
+
746
+ def to_file(self, filename: str, gzip: bool = False) -> None:
747
+ """
748
+ Writes current class instance to disk.
749
+
750
+ Parameters
751
+ ----------
752
+ filename : str
753
+ Path to write to.
754
+ gzip : bool, optional
755
+ If True, the output will be gzip compressed and "gz" will be added
756
+ to the filename if not already present. By default False.
757
+
758
+ References
759
+ ----------
760
+ .. [1] Burnley T et al., Acta Cryst. D, 2017
761
+ .. [2] Nickell S. et al, Journal of Structural Biology, 2005
762
+ .. [3] https://scikit-image.org/docs/stable/api/skimage.io.html
763
+
764
+ Examples
765
+ --------
766
+ The following creates a :py:class:`Density` instance `dens` holding
767
+ random data values and writes it to disk:
768
+
769
+ >>> import numpy as np
770
+ >>> from tme import Density
771
+ >>> data = np.random.rand(50,50,50)
772
+ >>> dens = Density(data = data, origin = (0, 0, 0), sampling_rate = (1, 1, 1))
773
+ >>> dens.to_file("example.mrc")
774
+
775
+ The output file can also be directly ``gzip`` compressed. The corresponding
776
+ ".gz" extension will be automatically added if absent [1]_.
777
+
778
+ >>> dens.to_file("example.mrc", gzip=True)
779
+
780
+ The :py:meth:`Density.to_file` method also supports writing EM files [2]_:
781
+
782
+ >>> dens.to_file("example.em")
783
+
784
+ In addition, a variety of image file formats are supported [3]_:
785
+
786
+ >>> data = np.random.rand(50,50)
787
+ >>> dens = Density(data = data, origin = (0, 0), sampling_rate = (1, 1))
788
+ >>> dens.to_file("example.tiff")
789
+
790
+ Notes
791
+ -----
792
+ If ``filename`` ends with ".em" or ".em.gz", the method will create an EM file.
793
+ Otherwise, it defaults to the CCP4/MRC format, and on failure, it falls back
794
+ to `skimage.io.imsave`.
795
+
796
+ See Also
797
+ --------
798
+ :py:meth:`Density.from_file`
799
+ """
800
+ if gzip:
801
+ filename = filename if filename.endswith(".gz") else f"{filename}.gz"
802
+
803
+ try:
804
+ func = self._save_mrc
805
+ if filename.endswith(".em") or filename.endswith(".em.gz"):
806
+ func = self._save_em
807
+ _ = func(filename=filename, gzip=gzip)
808
+ except ValueError:
809
+ _ = self._save_skio(filename=filename, gzip=gzip)
810
+
811
+ def _save_mrc(self, filename: str, gzip: bool) -> None:
812
+ """
813
+ Writes current class instance to disk as mrc file.
814
+
815
+ Parameters
816
+ ----------
817
+ filename : str
818
+ Path to write to.
819
+ gzip : bool, optional
820
+ If True, the output will be gzip compressed.
821
+
822
+ References
823
+ ----------
824
+ .. [1] Burnley T et al., Acta Cryst. D, 2017
825
+ """
826
+ compression = "gzip" if gzip else None
827
+ with mrcfile.new(filename, overwrite=True, compression=compression) as mrc:
828
+ mrc.set_data(self.data.astype("float32"))
829
+ mrc.header.nzstart, mrc.header.nystart, mrc.header.nxstart = np.ceil(
830
+ np.divide(self.origin, self.sampling_rate)
831
+ )
832
+ # mrcfile library expects origin to be in xyz format
833
+ mrc.header.mapc, mrc.header.mapr, mrc.header.maps = (1, 2, 3)
834
+ mrc.header["origin"] = tuple(self.origin[::-1])
835
+ mrc.voxel_size = tuple(self.sampling_rate[::-1])
836
+
837
+ def _save_em(self, filename: str, gzip: bool) -> None:
838
+ """
839
+ Writes data to disk as an .em file.
840
+
841
+ Parameters
842
+ ----------
843
+ filename : str
844
+ Path to write the .em file to.
845
+ data : NDArray
846
+ Data to be saved.
847
+ origin : NDArray
848
+ Coordinate origin of the data.
849
+ sampling_rate : NDArray
850
+ Sampling rate of the data.
851
+
852
+ References
853
+ ----------
854
+ .. [1] Nickell S. et al, Journal of Structural Biology, 2005.
855
+ """
856
+ DATA_TYPE_MAPPING = {
857
+ np.dtype(np.int8): 1,
858
+ np.dtype(np.int16): 2,
859
+ np.dtype(np.int32): 3,
860
+ np.dtype(np.float32): 5,
861
+ np.dtype(np.float64): 6,
862
+ np.dtype(np.complex64): 8,
863
+ np.dtype(np.complex128): 9,
864
+ }
865
+
866
+ data_type_code = DATA_TYPE_MAPPING.get(self.data.dtype, 5)
867
+
868
+ func = gzip_open if gzip else open
869
+ with func(filename, "wb") as f:
870
+ f.write(np.array([0], dtype=np.int8).tobytes())
871
+ f.write(np.array([0, 0, data_type_code], dtype=np.int8).tobytes())
872
+ f.write(np.array(self.data.shape, dtype="<i4").tobytes())
873
+ f.write(b" " * 80)
874
+ user_params = np.zeros(40, dtype="<i4")
875
+ user_params[6] = int(self.sampling_rate[0] * 1000)
876
+ f.write(user_params.tobytes())
877
+ f.write(b" " * 256)
878
+ f.write(self.data.tobytes())
879
+
880
+ def _save_skio(self, filename: str, gzip: bool) -> None:
881
+ """
882
+ Uses skimage.io.imsave to write data to filename.
883
+
884
+ Parameters
885
+ ----------
886
+ filename : str
887
+ Path to write to with a format supported by skimage.io.imsave.
888
+ gzip : bool, optional
889
+ If True, the output will be gzip compressed.
890
+
891
+ References
892
+ ----------
893
+ .. [1] https://scikit-image.org/docs/stable/api/skimage.io.html
894
+ """
895
+ swap, kwargs = filename, {}
896
+ if gzip:
897
+ swap = BytesIO()
898
+ kwargs["format"] = splitext(basename(filename.replace(".gz", "")))[1]
899
+ skio.imsave(fname=swap, arr=self.data.astype("float32"), **kwargs)
900
+ if gzip:
901
+ with gzip_open(filename, "wb") as outfile:
902
+ outfile.write(swap.getvalue())
903
+
904
+ @property
905
+ def empty(self) -> "Density":
906
+ """
907
+ Returns a copy of the current class instance with all elements in
908
+ :py:attr:`Density.data` set to zero. :py:attr:`Density.origin` and
909
+ :py:attr:`Density.sampling_rate` will be copied, while
910
+ :py:attr:`Density.metadata` will be initialized to an empty dictionary.
911
+
912
+ Examples
913
+ --------
914
+ >>> import numpy as np
915
+ >>> from tme import Density
916
+ >>> original_density = Density.from_file("/path/to/file.mrc")
917
+ >>> empty_density = original_density.empty
918
+ >>> np.all(empty_density.data == 0)
919
+ True
920
+ """
921
+ return Density(
922
+ data=np.zeros_like(self.data),
923
+ origin=deepcopy(self.origin),
924
+ sampling_rate=deepcopy(self.sampling_rate),
925
+ )
926
+
927
+ def copy(self) -> "Density":
928
+ """
929
+ Returns a copy of the current :py:class:`Density` instance.
930
+
931
+ Examples
932
+ --------
933
+ >>> from tme import Density
934
+ >>> original_density = Density.from_file("/path/to/file.mrc")
935
+ >>> copied_density = original_density.copy
936
+ >>> np.all(copied_density.data == original_density.data)
937
+ True
938
+ """
939
+ return Density(
940
+ data=self.data.copy(),
941
+ origin=deepcopy(self.origin[:]),
942
+ sampling_rate=self.sampling_rate,
943
+ metadata=deepcopy(self.metadata),
944
+ )
945
+
946
+ def to_memmap(self) -> None:
947
+ """
948
+ Converts the current class instance's :py:attr:`Density.data` attribute to
949
+ a :obj:`numpy.memmap` instance.
950
+
951
+ Examples
952
+ --------
953
+ The following outlines how to use the :py:meth:`Density.to_memmap` method.
954
+
955
+ >>> from tme import Density
956
+ >>> large_density = Density.from_file("/path/to/large_file.mrc")
957
+ >>> large_density.to_memmap()
958
+
959
+ A more efficient solution to achieve the result outlined above is to
960
+ provide the ``use_memmap`` flag in :py:meth:`Density.from_file`.
961
+
962
+ >>> Density.from_file("/path/to/large_file.mrc", use_memmap = True)
963
+
964
+ In practice, the :py:meth:`Density.to_memmap` method finds application, if a
965
+ large number of :py:class:`Density` instances need to be in memory at once,
966
+ without occupying the full phyiscal memory required to store
967
+ :py:attr:`Density.data`.
968
+
969
+
970
+ See Also
971
+ --------
972
+ :py:meth:`Density.to_numpy`
973
+ """
974
+ if type(self.data) == np.memmap:
975
+ return None
976
+
977
+ filename = array_to_memmap(arr=self.data)
978
+
979
+ self.data = np.memmap(
980
+ filename, mode="r", dtype=self.data.dtype, shape=self.data.shape
981
+ )
982
+
983
+ def to_numpy(self) -> None:
984
+ """
985
+ Converts the current class instance's :py:attr:`Density.data` attribute to
986
+ an in-memory :obj:`numpy.ndarray`.
987
+
988
+ Examples
989
+ --------
990
+ >>> from tme import Density
991
+ >>> density = Density.from_file("/path/to/large_file.mrc")
992
+ >>> density.to_memmap() # Convert to memory-mapped array first
993
+ >>> density.to_numpy() # Now, convert back to an in-memory array
994
+
995
+ See Also
996
+ --------
997
+ :py:meth:`Density.to_memmap`
998
+ """
999
+ self.data = memmap_to_array(self.data)
1000
+
1001
+ @property
1002
+ def shape(self) -> Tuple[int]:
1003
+ """
1004
+ Returns the dimensions of current instance's :py:attr:`Density.data`
1005
+ attribute.
1006
+
1007
+ Returns
1008
+ -------
1009
+ tuple
1010
+ The dimensions of :py:attr:`Density.data`.
1011
+
1012
+ Examples
1013
+ --------
1014
+ The following outlines the usage of :py:attr:`Density.shape`:
1015
+
1016
+ >>> import numpy as np
1017
+ >>> from tme import Density
1018
+ >>> dens = Density(np.array([0, 1, 1, 1, 0]))
1019
+ >>> dens.shape
1020
+ (5,)
1021
+ """
1022
+ return self.data.shape
1023
+
1024
+ @property
1025
+ def data(self) -> NDArray:
1026
+ """
1027
+ Returns the value of the current instance's :py:attr:`Density.data`
1028
+ attribute.
1029
+
1030
+ Returns
1031
+ -------
1032
+ NDArray
1033
+ Value of the current instance's :py:attr:`Density.data` attribute.
1034
+
1035
+ Examples
1036
+ --------
1037
+ The following outlines the usage of :py:attr:`Density.data`:
1038
+
1039
+ >>> import numpy as np
1040
+ >>> from tme import Density
1041
+ >>> dens = Density(np.array([0, 1, 1, 1, 0]))
1042
+ >>> dens.data
1043
+ array([0, 1, 1, 1, 0])
1044
+
1045
+ """
1046
+ return self._data
1047
+
1048
+ @data.setter
1049
+ def data(self, data: NDArray) -> None:
1050
+ """
1051
+ Sets the value of the current instance's :py:attr:`Density.data` attribute.
1052
+ """
1053
+ self._data = data
1054
+
1055
+ @property
1056
+ def origin(self) -> NDArray:
1057
+ """
1058
+ Returns the value of the current instance's :py:attr:`Density.origin`
1059
+ attribute.
1060
+
1061
+ Returns
1062
+ -------
1063
+ NDArray
1064
+ Value of the current instance's :py:attr:`Density.origin` attribute.
1065
+
1066
+ Examples
1067
+ --------
1068
+ The following outlines the usage of :py:attr:`Density.origin`:
1069
+
1070
+ >>> import numpy as np
1071
+ >>> from tme import Density
1072
+ >>> dens = Density(np.array([0, 1, 1, 1, 0]))
1073
+ >>> dens.origin
1074
+ array([0.])
1075
+ """
1076
+ return self._origin
1077
+
1078
+ @origin.setter
1079
+ def origin(self, origin: NDArray) -> None:
1080
+ """
1081
+ Sets the origin of the class instance.
1082
+ """
1083
+ origin = np.asarray(origin)
1084
+ origin = np.repeat(origin, self.data.ndim // origin.size)
1085
+ self._origin = origin
1086
+
1087
+ @property
1088
+ def sampling_rate(self) -> NDArray:
1089
+ """
1090
+ Returns sampling rate along data axis.
1091
+ """
1092
+ return self._sampling_rate
1093
+
1094
+ @sampling_rate.setter
1095
+ def sampling_rate(self, sampling_rate: NDArray) -> None:
1096
+ """
1097
+ Sets the sampling rate of the class instance.
1098
+ """
1099
+ sampling_rate = np.asarray(sampling_rate)
1100
+ sampling_rate = np.repeat(sampling_rate, self.data.ndim // sampling_rate.size)
1101
+ self._sampling_rate = sampling_rate
1102
+
1103
+ @property
1104
+ def metadata(self) -> Dict:
1105
+ """
1106
+ Returns dictionary with metadata information, empty by default.
1107
+ """
1108
+ return self._metadata
1109
+
1110
+ @metadata.setter
1111
+ def metadata(self, metadata: Dict) -> None:
1112
+ """
1113
+ Sets the metadata of the class instance.
1114
+ """
1115
+ self._metadata = metadata
1116
+
1117
+ def to_pointcloud(self, threshold: float = 0) -> NDArray:
1118
+ """
1119
+ Returns data indices that are larger than the given threshold.
1120
+
1121
+ Parameters
1122
+ ----------
1123
+ threshold : float, optional
1124
+ The cutoff value to determine the indices. Default is 0.
1125
+
1126
+ Returns
1127
+ -------
1128
+ NDArray
1129
+ Data indices that are larger than the given threshold with shape
1130
+ (dimensions, indices).
1131
+
1132
+ Examples
1133
+ --------
1134
+ >>> density.to_pointcloud(0)
1135
+ """
1136
+ return np.array(np.where(self.data > threshold))
1137
+
1138
+ def _pad_slice(self, box: Tuple[slice], pad_kwargs: Dict = {}) -> NDArray:
1139
+ """
1140
+ Pads the internal data array according to box.
1141
+
1142
+ Negative slices indices will result in a left-hand padding, while
1143
+ slice indices larger than the box_size property of the current class
1144
+ instance will result in a right-hand padding.
1145
+
1146
+ Parameters
1147
+ ----------
1148
+ box : tuple of slice
1149
+ Tuple of slice objects that define the box dimensions.
1150
+ pad_kwargs: dict, optional
1151
+ Parameter dictionary passed to numpy pad.
1152
+
1153
+ Returns
1154
+ -------
1155
+ NDArray
1156
+ The padded internal data array.
1157
+ """
1158
+ box_start = np.array([b.start for b in box])
1159
+ box_stop = np.array([b.stop for b in box])
1160
+ left_pad = -np.minimum(box_start, np.zeros(len(box), dtype=int))
1161
+
1162
+ right_pad = box_stop - box_start * (box_start > 0)
1163
+ right_pad -= np.array(self.shape, dtype=int)
1164
+ right_pad = np.maximum(right_pad, np.zeros_like(right_pad))
1165
+ padding = tuple((left, right) for left, right in zip(left_pad, right_pad))
1166
+
1167
+ ret = np.pad(self.data, padding, **pad_kwargs)
1168
+ return ret
1169
+
1170
+ def adjust_box(self, box: Tuple[slice], pad_kwargs: Dict = {}) -> None:
1171
+ """
1172
+ Adjusts the internal data array and origin of the current class instance
1173
+ according to the provided box.
1174
+
1175
+ Parameters
1176
+ ----------
1177
+ box : tuple of slices
1178
+ A tuple of slices describing how each axis of the volume array
1179
+ should be sliced. See :py:meth:`Density.trim_box` on how to produce
1180
+ such an object.
1181
+ pad_kwargs: dict, optional
1182
+ Parameter dictionary passed to numpy pad.
1183
+
1184
+ See Also
1185
+ --------
1186
+ :py:meth:`Density.trim_box`
1187
+
1188
+ Examples
1189
+ --------
1190
+ The following demonstrates the ability of :py:meth:`Density.adjust_box`
1191
+ to extract a subdensity from the current :py:class:`Density` instance.
1192
+ :py:meth:`Density.adjust_box` not only operats on :py:attr:`Density.data`,
1193
+ but also modifies :py:attr:`Density.origin` according to ``box``.
1194
+
1195
+ >>> import numpy as np
1196
+ >>> from tme import Density
1197
+ >>> dens = Density(np.ones((5, 5)))
1198
+ >>> box = (slice(1, 4), slice(2, 5))
1199
+ >>> dens.adjust_box(box)
1200
+ >>> dens
1201
+ Origin: (1.0, 2.0), sampling_rate: (1, 1), Shape: (3, 3)
1202
+
1203
+ :py:meth:`Density.adjust_box` can also extend the box of the current
1204
+ :py:class:`Density` instance. This is achieved by negative start or
1205
+ stops that exceed the dimension of the current :py:attr:`Density.data` array.
1206
+
1207
+ >>> box = (slice(-1, 10), slice(2, 10))
1208
+ >>> dens.adjust_box(box)
1209
+ >>> dens
1210
+ Origin: (0.0, 4.0), sampling_rate: (1, 1), Shape: (11, 8)
1211
+
1212
+ However, do note that only the start coordinate of each slice in ``box``
1213
+ can be negative.
1214
+
1215
+ >>> box = (slice(-1, 10), slice(2, -10))
1216
+ >>> dens.adjust_box(box)
1217
+ >>> dens
1218
+ Origin: (-1.0, 6.0), sampling_rate: (1, 1), Shape: (11, 0)
1219
+ """
1220
+ crop_box = tuple(
1221
+ slice(max(b.start, 0), min(b.stop, shape))
1222
+ for b, shape in zip(box, self.data.shape)
1223
+ )
1224
+ self.data = self.data[crop_box].copy()
1225
+
1226
+ # In case the box is larger than the current map
1227
+ self.data = self._pad_slice(box, pad_kwargs=pad_kwargs)
1228
+
1229
+ # Adjust the origin
1230
+ left_shift = np.array([-1 * box[i].start for i in range(len(box))])
1231
+ self.origin = self.origin - np.multiply(left_shift, self.sampling_rate)
1232
+
1233
+ def trim_box(self, cutoff: float, margin: int = 0) -> Tuple[slice]:
1234
+ """
1235
+ Computes a rectangle with sufficient dimension that encloses all
1236
+ values of the internal data array larger than the specified cutoff,
1237
+ expanded by the specified margin.
1238
+
1239
+ The output can be passed to :py:meth:`Density.adjust_box` to crop
1240
+ the internal data array.
1241
+
1242
+ Parameters
1243
+ ----------
1244
+ cutoff : float
1245
+ The threshold value for determining the minimum enclosing box. Default is 0.
1246
+ margin : int, optional
1247
+ The margin to add to the box dimensions. Default is 0.
1248
+
1249
+ Returns
1250
+ -------
1251
+ tuple
1252
+ A tuple containing slice objects representing the box.
1253
+
1254
+ Raises
1255
+ ------
1256
+ ValueError
1257
+ If the cutoff is larger than or equal to the maximum density value.
1258
+
1259
+ Examples
1260
+ --------
1261
+ The following will compute the bounding box that encloses all values
1262
+ in the example array that are larger than zero:
1263
+
1264
+ >>> import numpy as np
1265
+ >>> from tme import Density
1266
+ >>> dens = Density(np.array([0,1,1,1,0]))
1267
+ >>> dens.trim_box(0)
1268
+ (slice(1, 4, None),)
1269
+
1270
+ The resulting tuple can be passed to :py:meth:`Density.adjust_box` to trim the
1271
+ current :py:class:`Density` instance:
1272
+
1273
+ >>> dens.adjust_box(dens.trim_box(0))
1274
+ >>> dens.data.shape
1275
+ (3,)
1276
+
1277
+ See Also
1278
+ --------
1279
+ :py:meth:`Density.adjust_box`
1280
+ """
1281
+ if cutoff >= self.data.max():
1282
+ raise ValueError(
1283
+ f"Cutoff exceeds data range ({cutoff} >= {self.data.max()})."
1284
+ )
1285
+ starts, stops = [], []
1286
+ for axis in range(self.data.ndim):
1287
+ projected_max = np.max(
1288
+ self.data, axis=tuple(i for i in range(self.data.ndim) if i != axis)
1289
+ )
1290
+ valid = np.where(projected_max > cutoff)[0]
1291
+ starts.append(max(0, valid[0] - margin))
1292
+ stops.append(min(self.data.shape[axis], valid[-1] + margin + 1))
1293
+ slices = tuple(slice(*coord) for coord in zip(starts, stops))
1294
+ return slices
1295
+
1296
+ def minimum_enclosing_box(
1297
+ self,
1298
+ cutoff: float,
1299
+ use_geometric_center: bool = False,
1300
+ ) -> Tuple[slice]:
1301
+ """
1302
+ Compute the enclosing box that holds all possible rotations of the internal
1303
+ data array.
1304
+
1305
+ Parameters
1306
+ ----------
1307
+ cutoff : float
1308
+ Above this value arr elements are considered. Defaults to 0.
1309
+ use_geometric_center : bool, optional
1310
+ Whether the box should accommodate the geometric or the coordinate
1311
+ center. Defaults to False.
1312
+
1313
+ Returns
1314
+ -------
1315
+ tuple
1316
+ Tuple of slices corresponding to the minimum enclosing box.
1317
+
1318
+ See Also
1319
+ --------
1320
+ :py:meth:`Density.adjust_box`
1321
+ :py:meth:`tme.matching_utils.minimum_enclosing_box`
1322
+ """
1323
+ coordinates = self.to_pointcloud(threshold=cutoff)
1324
+ starts, stops = coordinates.min(axis=1), coordinates.max(axis=1)
1325
+
1326
+ shape = minimum_enclosing_box(
1327
+ coordinates=coordinates,
1328
+ use_geometric_center=use_geometric_center,
1329
+ )
1330
+ difference = np.maximum(np.subtract(shape, np.subtract(stops, starts)), 0)
1331
+
1332
+ shift_start = np.divide(difference, 2).astype(int)
1333
+ shift_stop = shift_start + np.mod(difference, 2)
1334
+
1335
+ starts = (starts - shift_start).astype(int)
1336
+ stops = (stops + shift_stop).astype(int)
1337
+
1338
+ enclosing_box = tuple(slice(start, stop) for start, stop in zip(starts, stops))
1339
+
1340
+ return tuple(enclosing_box)
1341
+
1342
+ def pad(
1343
+ self, new_shape: Tuple[int], center: bool = True, padding_value: float = 0
1344
+ ) -> None:
1345
+ """
1346
+ :py:meth:`Density.pad` extends the internal :py:attr:`Density.data`
1347
+ array of the current :py:class:`Density` instance to ``new_shape`` and
1348
+ adapts :py:attr:`Density.origin` accordingly:
1349
+
1350
+ Parameters
1351
+ ----------
1352
+ new_shape : tuple of int
1353
+ The desired shape for the new volume.
1354
+ center : bool, optional
1355
+ Whether the data should be centered in the new box. Default is True.
1356
+ padding_value : float, optional
1357
+ Value to pad the data array with. Default is zero.
1358
+
1359
+ Raises
1360
+ ------
1361
+ ValueError
1362
+ If the length of `new_shape` does not match the dimensionality of the
1363
+ internal data array.
1364
+
1365
+ Examples
1366
+ --------
1367
+ The following demonstrates the functionality of :py:meth:`Density.pad` on
1368
+ a one-dimensional array:
1369
+
1370
+ >>> import numpy as np
1371
+ >>> from tme import Density
1372
+ >>> dens = Density(np.array([1,1,1]))
1373
+ >>> dens.pad(new_shape = (5,), center = True)
1374
+ >>> dens.data
1375
+ array([0, 1, 1, 1, 0])
1376
+
1377
+ It's also possible to pass a user-defined ``padding_value``:
1378
+
1379
+ >>> dens = Density(np.array([1,1,1]))
1380
+ >>> dens.pad(new_shape = (5,), center = True, padding_value = -1)
1381
+ >>> dens.data
1382
+ array([-1, 1, 1, 1, -1])
1383
+
1384
+ If ``center`` is set to False, the padding values will be appended:
1385
+
1386
+ >>> dens = Density(np.array([1,1,1]))
1387
+ >>> dens.pad(new_shape = (5,), center = False)
1388
+ >>> dens.data
1389
+ array([1, 1, 1, 0, 0])
1390
+
1391
+ """
1392
+ if len(new_shape) != self.data.ndim:
1393
+ raise ValueError(
1394
+ f"new_shape has dimension {len(new_shape)}"
1395
+ f" but expected was {self.data.ndim}."
1396
+ )
1397
+
1398
+ new_box = tuple(slice(0, stop) for stop in new_shape)
1399
+ if center:
1400
+ overhang = np.subtract(new_shape, self.shape).astype(int)
1401
+ padding = overhang // 2
1402
+ left = -padding
1403
+ right = np.add(self.shape, padding + overhang % 2)
1404
+ new_box = tuple(slice(*box) for box in zip(left, right))
1405
+
1406
+ self.adjust_box(new_box, pad_kwargs={"constant_values": padding_value})
1407
+
1408
+ def centered(self, cutoff: float = 0) -> Tuple["Density", NDArray]:
1409
+ """
1410
+ Shifts the data center of mass to the center of the data array. The box size
1411
+ of the return Density object is at least equal to the box size of the class
1412
+ instance.
1413
+
1414
+ Parameters
1415
+ ----------
1416
+ cutoff : float, optional
1417
+ Only elements in data larger than cutoff will be considered for
1418
+ computing the new box. By default considers only positive elements.
1419
+
1420
+ Notes
1421
+ -----
1422
+ Should any axis of the class instance data array be smaller than the return
1423
+ value of :py:meth:`Density.minimum_enclosing_box`, the size of the internal
1424
+ data array is adapted to avoid array elements larger than cutoff to fall
1425
+ outside the data array.
1426
+
1427
+ Returns
1428
+ -------
1429
+ Density
1430
+ A copy of the class instance whose data center of mass is in the
1431
+ center of the data array.
1432
+ NDArray
1433
+ The coordinate translation.
1434
+
1435
+ See Also
1436
+ --------
1437
+ :py:meth:`Density.trim_box`
1438
+ :py:meth:`Density.minimum_enclosing_box`
1439
+
1440
+
1441
+ Examples
1442
+ --------
1443
+ :py:meth:`Density.centered` returns a tuple containing a centered version
1444
+ of the current :py:class:`Density` instance, as well as an array with
1445
+ translations. The translation corresponds to the shift that was used to
1446
+ center the current :py:class:`Density` instance.
1447
+
1448
+ >>> import numpy as np
1449
+ >>> from tme import Density
1450
+ >>> dens = Density(np.ones((5,5)))
1451
+ >>> centered_dens, translation = dens.centered(0)
1452
+ >>> translation
1453
+ array([-4.4408921e-16, 4.4408921e-16])
1454
+
1455
+ :py:meth:`Density.centered` extended the :py:attr:`Density.data` attribute
1456
+ of the current :py:class:`Density` instance and modified
1457
+ :py:attr:`Density.origin` accordingly.
1458
+
1459
+ >>> centered_dens
1460
+ Origin: (-1.0, -1.0), sampling_rate: (1, 1), Shape: (7, 7)
1461
+
1462
+ :py:meth:`Density.centered` achieves centering via zero-padding the
1463
+ internal :py:attr:`Density.data` attribute:
1464
+
1465
+ >>> centered_dens.data
1466
+ array([[0., 0., 0., 0., 0., 0.],
1467
+ [0., 1., 1., 1., 1., 1.],
1468
+ [0., 1., 1., 1., 1., 1.],
1469
+ [0., 1., 1., 1., 1., 1.],
1470
+ [0., 1., 1., 1., 1., 1.],
1471
+ [0., 1., 1., 1., 1., 1.]])
1472
+
1473
+ `centered_dens` is sufficiently large to represent all rotations that
1474
+ could be applied to the :py:attr:`Density.data` attribute. Lets look
1475
+ at a random rotation obtained from
1476
+ :py:meth:`tme.matching_utils.get_rotation_matrices`.
1477
+
1478
+ >>> from tme.matching_utils import get_rotation_matrices
1479
+ >>> rotation_matrix = get_rotation_matrices(dim = 2 ,angular_sampling = 10)[0]
1480
+ >>> rotated_centered_dens = centered_dens.rigid_transform(
1481
+ >>> rotation_matrix = rotation_matrix,
1482
+ >>> order = None
1483
+ >>> )
1484
+ >>> print(centered_dens.data.sum(), rotated_centered_dens.data.sum())
1485
+ 25.000000000000007 25.000000000000007
1486
+
1487
+ """
1488
+ ret = self.copy()
1489
+
1490
+ box = ret.minimum_enclosing_box(cutoff=cutoff, use_geometric_center=False)
1491
+ ret.adjust_box(box)
1492
+
1493
+ new_shape = np.maximum(ret.shape, self.shape)
1494
+ ret.pad(new_shape)
1495
+
1496
+ center = self.center_of_mass(ret.data, cutoff)
1497
+ shift = np.subtract(np.divide(ret.shape, 2), center).astype(int)
1498
+
1499
+ ret = ret.rigid_transform(
1500
+ translation=shift,
1501
+ rotation_matrix=np.eye(ret.data.ndim),
1502
+ use_geometric_center=False,
1503
+ )
1504
+ offset = np.subtract(center, self.center_of_mass(ret.data))
1505
+
1506
+ return ret, offset
1507
+
1508
+ @classmethod
1509
+ def rotate_array(
1510
+ cls,
1511
+ arr: NDArray,
1512
+ rotation_matrix: NDArray,
1513
+ arr_mask: NDArray = None,
1514
+ translation: NDArray = None,
1515
+ use_geometric_center: bool = False,
1516
+ out: NDArray = None,
1517
+ out_mask: NDArray = None,
1518
+ order: int = 3,
1519
+ ) -> None:
1520
+ """
1521
+ Rotates coordinates of arr according to rotation_matrix.
1522
+
1523
+ If no output array is provided, this method will compute an array with
1524
+ sufficient space to hold all elements. If both `arr` and `arr_mask`
1525
+ are provided, `arr_mask` will be centered according to arr.
1526
+
1527
+ Parameters
1528
+ ----------
1529
+ arr : NDArray
1530
+ The input array to be rotated.
1531
+ arr_mask : NDArray, optional
1532
+ The mask of `arr` that will be equivalently rotated.
1533
+ rotation_matrix : NDArray
1534
+ The rotation matrix to apply [d x d].
1535
+ translation : NDArray
1536
+ The translation to apply [d].
1537
+ use_geometric_center : bool, optional
1538
+ Whether the rotation should be centered around the geometric
1539
+ or mass center. Default is mass center.
1540
+ out : NDArray, optional
1541
+ The output array to write the rotation of `arr` to.
1542
+ out_mask : NDArray, optional
1543
+ The output array to write the rotation of `arr_mask` to.
1544
+ order : int, optional
1545
+ Spline interpolation order. Has to be in the range 0-5.
1546
+ """
1547
+
1548
+ return NumpyFFTWBackend().rotate_array(
1549
+ arr=arr,
1550
+ rotation_matrix=rotation_matrix,
1551
+ arr_mask=arr_mask,
1552
+ translation=translation,
1553
+ use_geometric_center=use_geometric_center,
1554
+ out=out,
1555
+ out_mask=out_mask,
1556
+ order=order,
1557
+ )
1558
+
1559
+ @staticmethod
1560
+ def rotate_array_coordinates(
1561
+ arr: NDArray,
1562
+ coordinates: NDArray,
1563
+ rotation_matrix: NDArray,
1564
+ translation: NDArray = None,
1565
+ out: NDArray = None,
1566
+ use_geometric_center: bool = True,
1567
+ arr_mask: NDArray = None,
1568
+ mask_coordinates: NDArray = None,
1569
+ out_mask: NDArray = None,
1570
+ ) -> None:
1571
+ """
1572
+ Rotates coordinates of arr according to rotation_matrix.
1573
+
1574
+ If no output array is provided, this method will compute an array with
1575
+ sufficient space to hold all elements. If both `arr` and `arr_mask`
1576
+ are provided, `arr_mask` will be centered according to arr.
1577
+
1578
+ No centering will be performed if the rotation matrix is the identity matrix.
1579
+
1580
+ Parameters
1581
+ ----------
1582
+ arr : NDArray
1583
+ The input array to be rotated.
1584
+ coordinates : NDArray
1585
+ The pointcloud [d x N] containing elements of `arr` that should be rotated.
1586
+ See :py:meth:`Density.to_pointcloud` on how to obtain the coordinates.
1587
+ rotation_matrix : NDArray
1588
+ The rotation matrix to apply [d x d].
1589
+ rotation_matrix : NDArray
1590
+ The translation to apply [d].
1591
+ out : NDArray, optional
1592
+ The output array to write the rotation of `arr` to.
1593
+ use_geometric_center : bool, optional
1594
+ Whether the rotation should be centered around the geometric
1595
+ or mass center.
1596
+ arr_mask : NDArray, optional
1597
+ The mask of `arr` that will be equivalently rotated.
1598
+ mask_coordinates : NDArray, optional
1599
+ Equivalent to `coordinates`, but containing elements of `arr_mask`
1600
+ that should be rotated.
1601
+ out_mask : NDArray, optional
1602
+ The output array to write the rotation of `arr_mask` to.
1603
+ """
1604
+ return NumpyFFTWBackend().rotate_array_coordinates(
1605
+ arr=arr,
1606
+ coordinates=coordinates,
1607
+ rotation_matrix=rotation_matrix,
1608
+ translation=translation,
1609
+ out=out,
1610
+ use_geometric_center=use_geometric_center,
1611
+ arr_mask=arr_mask,
1612
+ mask_coordinates=mask_coordinates,
1613
+ out_mask=out_mask,
1614
+ )
1615
+
1616
+ def rigid_transform(
1617
+ self,
1618
+ rotation_matrix: NDArray,
1619
+ translation: NDArray = None,
1620
+ order: int = 3,
1621
+ use_geometric_center: bool = False,
1622
+ ) -> "Density":
1623
+ """
1624
+ Performs a rigid transform of the current class instance.
1625
+
1626
+ Parameters
1627
+ ----------
1628
+ rotation_matrix : NDArray
1629
+ Rotation matrix to apply to the `Density` instance.
1630
+ translation : NDArray
1631
+ Translation to apply to the `Density` instance.
1632
+ order : int, optional
1633
+ Order of spline interpolation.
1634
+ use_geometric_center : bool, optional
1635
+ Whether to use geometric or coordinate center. If False,
1636
+ class instance should be centered using :py:meth:`Density.centered`.
1637
+
1638
+ Returns
1639
+ -------
1640
+ Density
1641
+ The transformed instance of :py:class:`tme.density.Density`.
1642
+
1643
+ Examples
1644
+ --------
1645
+ >>> import numpy as np
1646
+ >>> rotation_matrix = np.eye(3)
1647
+ >>> rotation_matrix[0] = -1
1648
+ >>> density.rotate(rotation_matrix = rotation_matrix)
1649
+
1650
+ Notes
1651
+ -----
1652
+ :py:meth:`Density.rigid_transform` that the internal data array is
1653
+ sufficiently sized to accomodate the transform.
1654
+
1655
+ See Also
1656
+ --------
1657
+ :py:meth:`Density.centered`, :py:meth:`Density.minimum_enclosing_box`
1658
+ """
1659
+ transformed_map = self.empty
1660
+
1661
+ self.rotate_array(
1662
+ arr=self.data,
1663
+ rotation_matrix=rotation_matrix,
1664
+ translation=translation,
1665
+ order=order,
1666
+ use_geometric_center=use_geometric_center,
1667
+ out=transformed_map.data,
1668
+ )
1669
+ eps = np.finfo(transformed_map.data.dtype).eps
1670
+ transformed_map.data[transformed_map.data < eps] = 0
1671
+ return transformed_map
1672
+
1673
+ def align_origins(self, other_map: "Density") -> "Density":
1674
+ """
1675
+ Aligns the origin of another to the origin of the current class instance.
1676
+
1677
+ Parameters
1678
+ ----------
1679
+ other_map : Density
1680
+ An instance of :py:class:`Density` class to align with the current map.
1681
+
1682
+ Raises
1683
+ ------
1684
+ ValueError
1685
+ If the sampling_rate of both class instances does not match.
1686
+
1687
+ Returns
1688
+ -------
1689
+ Density
1690
+ A modified copy of `other_map` with aligned origin.
1691
+ """
1692
+ if not np.allclose(self.sampling_rate, other_map.sampling_rate):
1693
+ raise ValueError("sampling_rate of both maps have to match.")
1694
+
1695
+ origin_difference = np.divide(
1696
+ np.subtract(self.origin, other_map.origin), self.sampling_rate
1697
+ )
1698
+ origin_difference = origin_difference.astype(int)
1699
+
1700
+ box_start = np.minimum(origin_difference, other_map.shape)
1701
+ box_end = np.maximum(origin_difference, other_map.shape)
1702
+
1703
+ new_box = tuple(slice(*pos) for pos in zip(box_start, box_end))
1704
+
1705
+ ret = other_map.copy()
1706
+ ret.adjust_box(new_box)
1707
+ return ret
1708
+
1709
+ def resample(self, new_sampling_rate: Tuple[float], order: int = 1) -> "Density":
1710
+ """
1711
+ Resamples the current class instance to ``new_sampling_rate`` using
1712
+ spline interpolation of order ``order``.
1713
+
1714
+ Parameters
1715
+ ----------
1716
+ new_sampling_rate : Tuple[float]
1717
+ Sampling rate to resample to.
1718
+ order : int, optional
1719
+ Order of spline used for interpolation, by default 1.
1720
+
1721
+ Returns
1722
+ -------
1723
+ Density
1724
+ A resampled instance of `Density` class.
1725
+ """
1726
+ map_copy, new_sampling_rate = self.copy(), np.array(new_sampling_rate)
1727
+ new_sampling_rate = np.repeat(
1728
+ new_sampling_rate, map_copy.data.ndim // new_sampling_rate.size
1729
+ )
1730
+ scale_factor = np.divide(map_copy.sampling_rate, new_sampling_rate)
1731
+
1732
+ map_copy.data = zoom(map_copy.data, scale_factor, order=order)
1733
+ map_copy.sampling_rate = new_sampling_rate
1734
+
1735
+ return map_copy
1736
+
1737
+ def density_boundary(
1738
+ self, weight: float, fraction_surface: float = 0.1, volume_factor: float = 1.21
1739
+ ) -> Tuple[float]:
1740
+ """
1741
+ Computes the density boundary of the current class instance. The density
1742
+ boundary in this setting is defined as minimal and maximal density value
1743
+ enclosing a certain ``weight``.
1744
+
1745
+ Parameters
1746
+ ----------
1747
+ weight : float
1748
+ Density weight to compute volume cutoff on. This could e.g. be the
1749
+ sum of contained atomic weights.
1750
+ fraction_surface : float, optional
1751
+ Approximate fraction of surface voxels on all voxels enclosing
1752
+ ``weight``, by default 0.1. Decreasing this value increases the
1753
+ upper volume boundary.
1754
+ volume_factor : float, optional
1755
+ Factor used to compute how many distinct density values
1756
+ can be used to represent ``weight``, by default 1.21.
1757
+
1758
+ Returns
1759
+ -------
1760
+ tuple
1761
+ Tuple containing lower and upper bound on densities.
1762
+
1763
+ References
1764
+ ----------
1765
+ .. [1] Cragnolini T, Sahota H, Joseph AP, Sweeney A, Malhotra S,
1766
+ Vasishtan D, Topf M (2021a) TEMPy2: A Python library with
1767
+ improved 3D electron microscopy density-fitting and validation
1768
+ workflows. Acta Crystallogr Sect D Struct Biol 77:41–47.
1769
+ https://doi.org/10.1107/S2059798320014928
1770
+
1771
+ Raises
1772
+ ------
1773
+ ValueError
1774
+ If input any input parameter is <= 0.
1775
+ """
1776
+ if weight <= 0 or fraction_surface <= 0 or volume_factor <= 0:
1777
+ raise ValueError(
1778
+ "weight, fraction_surface and volume_factor need to be >= 0."
1779
+ )
1780
+ num_voxels = np.min(
1781
+ volume_factor * weight / np.power(self.sampling_rate, self.data.ndim)
1782
+ ).astype(int)
1783
+ surface_included_voxels = int(num_voxels * (1 + fraction_surface))
1784
+
1785
+ map_partition = np.partition(
1786
+ self.data.flatten(), (-num_voxels, -surface_included_voxels)
1787
+ )
1788
+ upper_limit = map_partition[-num_voxels]
1789
+ lower_limit = map_partition[-surface_included_voxels]
1790
+
1791
+ return (lower_limit, upper_limit)
1792
+
1793
+ def surface_coordinates(
1794
+ self, density_boundaries: Tuple[float], method: str = "ConvexHull"
1795
+ ) -> NDArray:
1796
+ """
1797
+ Calculates the surface coordinates of the current class instance using
1798
+ different boundary and surface detection methods. This method is relevant
1799
+ for determining coordinates used in template matching,
1800
+ see :py:class:`tme.matching_exhaustive.FitRefinement`.
1801
+
1802
+ Parameters
1803
+ ----------
1804
+ density_boundaries : tuple
1805
+ Tuple of two floats with lower and upper bounds of density values
1806
+ to be considered on the surface (see :py:meth:`Density.density_boundary`).
1807
+ method : str, optional
1808
+ Surface coordinates are determined using this method:
1809
+
1810
+ +--------------+-----------------------------------------------------+
1811
+ | 'ConvexHull' | Use the lower bound density convex hull vertices. |
1812
+ +--------------+-----------------------------------------------------+
1813
+ | 'Weight' | Use all coordinates within ``density_boundaries``. |
1814
+ +--------------+-----------------------------------------------------+
1815
+ | 'Sobel' | Set densities below the lower bound density to zero |
1816
+ | | apply a sobel filter and return density coordinates |
1817
+ | | larger than 0.5 times the maximum filter value. |
1818
+ +--------------+-----------------------------------------------------+
1819
+ | 'Laplace' | Like 'Sobel' but with a laplace filter. |
1820
+ +--------------+-----------------------------------------------------+
1821
+ | 'Minimum' | Like 'Sobel' and 'Laplace' but with a spherical |
1822
+ | | minimum filter on the lower density bound. |
1823
+ +--------------+-----------------------------------------------------+
1824
+
1825
+ Raises
1826
+ ------
1827
+ ValueError
1828
+ If the chosen method is not available.
1829
+
1830
+ Returns
1831
+ -------
1832
+ NDArray
1833
+ An array of surface coordinates with shape (number_of_points, dimensions).
1834
+
1835
+ References
1836
+ ----------
1837
+ .. [1] Cragnolini T, Sahota H, Joseph AP, Sweeney A, Malhotra S,
1838
+ Vasishtan D, Topf M (2021a) TEMPy2: A Python library with
1839
+ improved 3D electron microscopy density-fitting and validation
1840
+ workflows. Acta Crystallogr Sect D Struct Biol 77:41–47.
1841
+ https://doi.org/10.1107/S2059798320014928
1842
+
1843
+ See Also
1844
+ --------
1845
+ :py:class:`tme.matching_optimization.NormalVectorScore`
1846
+ :py:class:`tme.matching_optimization.PartialLeastSquareDifference`
1847
+ :py:class:`tme.matching_optimization.MutualInformation`
1848
+ :py:class:`tme.matching_optimization.Envelope`
1849
+ :py:class:`tme.matching_optimization.Chamfer`
1850
+ """
1851
+ available_methods = ["ConvexHull", "Weight", "Sobel", "Laplace", "Minimum"]
1852
+
1853
+ if method not in available_methods:
1854
+ raise ValueError(
1855
+ "Argument method has to be one of the following: %s"
1856
+ % ", ".join(available_methods)
1857
+ )
1858
+
1859
+ lower_bound, upper_bound = density_boundaries
1860
+ if method == "ConvexHull":
1861
+ binary = np.transpose(np.where(self.data > lower_bound))
1862
+ hull = ConvexHull(binary)
1863
+ surface_points = binary[hull.vertices[:]]
1864
+
1865
+ elif method == "Sobel":
1866
+ filtered_map = np.multiply(self.data, (self.data > lower_bound))
1867
+ magn = generic_gradient_magnitude(filtered_map, sobel)
1868
+ surface_points = np.argwhere(magn > 0.5 * magn.max())
1869
+
1870
+ elif method == "Laplace":
1871
+ filtered_map = self.data > lower_bound
1872
+ magn = laplace(filtered_map)
1873
+ surface_points = np.argwhere(magn > 0.5 * magn.max())
1874
+
1875
+ elif method == "Minimum":
1876
+ fp = np.zeros((self.data.ndim,) * self.data.ndim)
1877
+ center = np.ones(self.data.ndim, dtype=int)
1878
+ fp[tuple(center)] = 1
1879
+ for i in range(self.data.ndim):
1880
+ offset = np.zeros(self.data.ndim, dtype=int)
1881
+ offset[i] = 1
1882
+ fp[tuple(center + offset)] = 1
1883
+ fp[tuple(center - offset)] = 1
1884
+
1885
+ filtered_map = (self.data > lower_bound).astype(int)
1886
+ filtered_map_surface = minimum_filter(
1887
+ filtered_map, footprint=fp, mode="constant", cval=0.8
1888
+ )
1889
+ filtered_map_surface = ((filtered_map - filtered_map_surface) == 1).astype(
1890
+ int
1891
+ )
1892
+ surface_points = np.argwhere(filtered_map_surface == 1)
1893
+
1894
+ elif method == "Weight":
1895
+ surface_points = np.argwhere(
1896
+ np.logical_and(self.data < upper_bound, self.data > lower_bound)
1897
+ )
1898
+
1899
+ return surface_points
1900
+
1901
+ def normal_vectors(self, coordinates: NDArray) -> NDArray:
1902
+ """
1903
+ Calculates the normal vectors for the given coordinates on the densities
1904
+ of the current class instance. If the normal vector to a given coordinate
1905
+ can not be computed, the zero vector is returned instead. The output of this
1906
+ function can e.g. be used in
1907
+ :py:class:`tme.matching_optimization.NormalVectorScore`.
1908
+
1909
+ Parameters
1910
+ ----------
1911
+ coordinates : NDArray
1912
+ An array of integer coordinates with shape (dimensions, coordinates)
1913
+ of which to calculate the normal vectors.
1914
+
1915
+ Returns
1916
+ -------
1917
+ NDArray
1918
+ An array with unit normal vectors with same shape as coordinates.
1919
+
1920
+ References
1921
+ ----------
1922
+ .. [1] Cragnolini T, Sahota H, Joseph AP, Sweeney A, Malhotra S,
1923
+ Vasishtan D, Topf M (2021a) TEMPy2: A Python library with
1924
+ improved 3D electron microscopy density-fitting and validation
1925
+ workflows. Acta Crystallogr Sect D Struct Biol 77:41–47.
1926
+ https://doi.org/10.1107/S2059798320014928
1927
+
1928
+ Raises
1929
+ ------
1930
+ ValueError
1931
+ If coordinates.shape[1] does not match self.data.ndim,
1932
+ coordinates.ndim != 2 or lies outside self.data.
1933
+
1934
+ See Also
1935
+ --------
1936
+ :py:class:`tme.matching_optimization.NormalVectorScore`
1937
+ :py:class:`tme.matching_optimization.PartialLeastSquareDifference`
1938
+ :py:class:`tme.matching_optimization.MutualInformation`
1939
+ :py:class:`tme.matching_optimization.Envelope`
1940
+ :py:class:`tme.matching_optimization.Chamfer`
1941
+ """
1942
+ normal_vectors, coordinates = [], np.asarray(coordinates, dtype=int)
1943
+
1944
+ if coordinates.ndim != 2:
1945
+ raise ValueError("Coordinates should have shape point x dimension.")
1946
+ if coordinates.shape[1] != self.data.ndim:
1947
+ raise ValueError(
1948
+ f"Expected coordinate dimension {self.data.ndim}, "
1949
+ f"got {coordinates.shape[1]}."
1950
+ )
1951
+ in_box = np.logical_and(
1952
+ coordinates < np.array(self.shape), coordinates >= 0
1953
+ ).min(axis=1)
1954
+
1955
+ out_of_box = np.invert(in_box)
1956
+ if out_of_box.sum() > 0:
1957
+ print(coordinates[out_of_box, :])
1958
+ raise ValueError("Coordinates outside of self.data detected.")
1959
+
1960
+ for index in range(coordinates.shape[0]):
1961
+ point = coordinates[index, :]
1962
+ start = np.maximum(point - 1, 0)
1963
+ stop = np.minimum(point + 2, self.data.shape)
1964
+ slc = tuple(slice(*coords) for coords in zip(start, stop))
1965
+
1966
+ inner_facing = np.array(np.where(self.data[slc] > self.data[tuple(point)]))
1967
+ if inner_facing.size == 0:
1968
+ normal_vectors.append(np.zeros_like(point))
1969
+ continue
1970
+ inner_facing -= np.ones_like(point)[:, None]
1971
+ inner_facing = inner_facing.sum(axis=1)
1972
+ inner_facing = inner_facing / np.linalg.norm(inner_facing)
1973
+ normal_vectors.append(inner_facing)
1974
+
1975
+ return np.array(normal_vectors)
1976
+
1977
+ def core_mask(self) -> NDArray:
1978
+ """
1979
+ Calculates the weighted core mask of the current class instance.
1980
+
1981
+ Core mask is calculated by performing binary erosion on the internal
1982
+ data array in an iterative fashion until no non-zero data elements remain.
1983
+ In each iteration, all data elements larger than zero are incremented by one
1984
+ in a mask with same shape as the internal data array. Therefore,
1985
+ data elements in the output array with a value of n remained non-zero for
1986
+ n rounds of binary erosion. The higher the value, the more likely a data element
1987
+ is part of the core of the density map.
1988
+
1989
+ Returns
1990
+ -------
1991
+ NDArray
1992
+ An array with same shape as internal data array. Values contained
1993
+ indicate how many rounds of binary erosion were necessary to nullify
1994
+ a given data element.
1995
+
1996
+ References
1997
+ ----------
1998
+ .. [1] Gydo Zundert and Alexandre Bonvin. Fast and sensitive rigid-body
1999
+ fitting into cryo-em density maps with powerfit. AIMS Biophysics,
2000
+ 2:73–87, 04 2015. doi:10.3934/biophy.2015.2.73
2001
+ """
2002
+ core_indices = np.zeros(self.shape)
2003
+ eroded_mask = self.data > 0
2004
+ while eroded_mask.sum() > 0:
2005
+ core_indices += eroded_mask
2006
+ eroded_mask = binary_erosion(eroded_mask)
2007
+ return core_indices
2008
+
2009
+ @staticmethod
2010
+ def center_of_mass(arr: NDArray, cutoff: float = None) -> NDArray:
2011
+ """
2012
+ Computes the center of mass of a numpy ndarray instance using all available
2013
+ elements. For template matching it typically makes sense to only input
2014
+ positive densities.
2015
+
2016
+ Parameters
2017
+ ----------
2018
+ arr : NDArray
2019
+ Array to compute the center of mass of.
2020
+ cutoff : float, optional
2021
+ Densities less than or equal to cutoff are nullified for center
2022
+ of mass computation. By default considers all values.
2023
+
2024
+ Returns
2025
+ -------
2026
+ NDArray
2027
+ Center of mass with shape (arr.ndim).
2028
+ """
2029
+ cutoff = arr.min() - 1 if cutoff is None else cutoff
2030
+ arr = np.where(arr > cutoff, arr, 0)
2031
+ denominator = np.sum(arr)
2032
+ grids = np.ogrid[tuple(slice(0, i) for i in arr.shape)]
2033
+
2034
+ center_of_mass = np.array(
2035
+ [
2036
+ np.sum(np.multiply(arr, grids[dim].astype(float))) / denominator
2037
+ for dim in range(arr.ndim)
2038
+ ]
2039
+ )
2040
+
2041
+ return center_of_mass
2042
+
2043
+ @classmethod
2044
+ def match_densities(
2045
+ cls,
2046
+ target: "Density",
2047
+ template: "Density",
2048
+ cutoff_target: float = 0,
2049
+ cutoff_template: float = 0,
2050
+ scoring_method: str = "NormalizedCrossCorrelation",
2051
+ ) -> Tuple["Density", NDArray, NDArray, NDArray]:
2052
+ """
2053
+ Aligns two :py:class:`Density` instances target and template and returns
2054
+ the aligned template.
2055
+
2056
+ If voxel sizes of target and template dont match coordinates are scaled
2057
+ to the numerically smaller voxel size. Instances are prealigned based on their
2058
+ center of mass. Finally :py:class:`tme.matching_optimization.FitRefinement` is
2059
+ used to determine translation and rotation to map template to target.
2060
+
2061
+ Parameters
2062
+ ----------
2063
+ target : Density
2064
+ The target map for alignment.
2065
+ template : Density
2066
+ The template that should be aligned to the target.
2067
+ cutoff_target : float, optional
2068
+ The cutoff value for the target map, by default 0.
2069
+ cutoff_template : float, optional
2070
+ The cutoff value for the template map, by default 0.
2071
+ scoring_method : str, optional
2072
+ The scoring method to use for alignment. See
2073
+ :py:class:`tme.matching_optimization.FitRefinement` for available methods,
2074
+ by default "NormalizedCrossCorrelation".
2075
+
2076
+ Returns
2077
+ -------
2078
+ Tuple
2079
+ Tuple containing template aligned to target as :py:class:`Density` object,
2080
+ translation in voxels and rotation matrix used for the transformation.
2081
+
2082
+ Notes
2083
+ -----
2084
+ No densities below cutoff_template are present in the returned Density object.
2085
+ """
2086
+ target_sampling_rate = np.array(target.sampling_rate)
2087
+ template_sampling_rate = np.array(template.sampling_rate)
2088
+
2089
+ target_sampling_rate = np.repeat(
2090
+ target_sampling_rate, target.data.ndim // target_sampling_rate.size
2091
+ )
2092
+ template_sampling_rate = np.repeat(
2093
+ template_sampling_rate, template.data.ndim // template_sampling_rate.size
2094
+ )
2095
+
2096
+ if not np.allclose(target_sampling_rate, template_sampling_rate):
2097
+ print(
2098
+ "Voxel size of target and template do not match. "
2099
+ "Using smaller voxel size for refinement."
2100
+ )
2101
+
2102
+ target_coordinates = target.to_pointcloud(cutoff_target)
2103
+ target_weights = target.data[tuple(target_coordinates)]
2104
+
2105
+ template_coordinates = template.to_pointcloud(cutoff_template)
2106
+ template_weights = template.data[tuple(template_coordinates)]
2107
+
2108
+ refinement_sampling_rate = np.minimum(
2109
+ target_sampling_rate, template_sampling_rate
2110
+ )
2111
+ target_scaling = np.divide(target_sampling_rate, refinement_sampling_rate)
2112
+ template_scaling = np.divide(template_sampling_rate, refinement_sampling_rate)
2113
+ target_coordinates = target_coordinates * target_scaling[:, None]
2114
+ template_coordinates = template_coordinates * template_scaling[:, None]
2115
+
2116
+ target_mass_center = cls.center_of_mass(target.data, cutoff_target)
2117
+ template_mass_center = cls.center_of_mass(template.data, cutoff_template)
2118
+ mass_center_difference = np.subtract(
2119
+ target_mass_center, template_mass_center
2120
+ ).astype(int)
2121
+ template_coordinates += mass_center_difference[:, None]
2122
+
2123
+ matcher = FitRefinement()
2124
+ translation, rotation_matrix, score = matcher.refine(
2125
+ target_coordinates=target_coordinates,
2126
+ template_coordinates=template_coordinates,
2127
+ target_weights=target_weights,
2128
+ template_weights=template_weights,
2129
+ scoring_class=scoring_method,
2130
+ sampling_rate=np.ones(template.data.ndim),
2131
+ )
2132
+
2133
+ translation += mass_center_difference
2134
+ translation = np.divide(translation, template_scaling)
2135
+
2136
+ template.sampling_rate = template_sampling_rate.copy()
2137
+ ret = template.rigid_transform(
2138
+ rotation_matrix=rotation_matrix, use_geometric_center=False
2139
+ )
2140
+ ret.origin = target.origin.copy()
2141
+ ret.origin = ret.origin + np.multiply(translation, target_sampling_rate)
2142
+
2143
+ return ret, translation, rotation_matrix
2144
+
2145
+ @classmethod
2146
+ def match_structure_to_density(
2147
+ cls,
2148
+ target: "Density",
2149
+ template: "Structure",
2150
+ cutoff_target: float = 0,
2151
+ scoring_method: str = "NormalizedCrossCorrelation",
2152
+ ) -> Tuple["Structure", NDArray, NDArray]:
2153
+ """
2154
+ Aligns a :py:class:`tme.structure.Structure` template to :py:class:`Density`
2155
+ target and returns an aligned :py:class:`tme.structure.Structure` instance.
2156
+
2157
+ If voxel sizes of target and template dont match coordinates are scaled
2158
+ to the numerically smaller voxel size. Prealignment is done by center's
2159
+ of mass. Finally :py:class:`tme.matching_optimization.FitRefinement` is used to
2160
+ determine translation and rotation to match a template to target.
2161
+
2162
+ Parameters
2163
+ ----------
2164
+ target : Density
2165
+ The target map for template matching.
2166
+ template : Structure
2167
+ The template that should be aligned to the target.
2168
+ cutoff_target : float, optional
2169
+ The cutoff value for the target map, by default 0.
2170
+ cutoff_template : float, optional
2171
+ The cutoff value for the template map, by default 0.
2172
+ scoring_method : str, optional
2173
+ The scoring method to use for template matching. See
2174
+ :py:class:`tme.matching_optimization.FitRefinement` for available methods,
2175
+ by default "NormalizedCrossCorrelation".
2176
+
2177
+ Returns
2178
+ -------
2179
+ Structure
2180
+ Tuple containing template aligned to target as
2181
+ :py:class:`tme.structure.Structure` object, translation and rotation
2182
+ matrix used for the transformation.
2183
+
2184
+ Notes
2185
+ -----
2186
+ Translation and rotation are in xyz format, different from
2187
+ :py:meth:`match_densities`, which is zyx.
2188
+ """
2189
+ template_density = cls.from_structure(
2190
+ filename_or_structure=template, sampling_rate=target.sampling_rate
2191
+ )
2192
+
2193
+ ret, translation, rotation_matrix = cls.match_densities(
2194
+ target=target,
2195
+ template=template_density,
2196
+ cutoff_target=cutoff_target,
2197
+ cutoff_template=0,
2198
+ scoring_method=scoring_method,
2199
+ )
2200
+ out = template.copy()
2201
+ final_translation = np.add(
2202
+ -template_density.origin,
2203
+ np.multiply(translation, template_density.sampling_rate),
2204
+ )
2205
+
2206
+ # Atom coordinates are in xyz
2207
+ final_translation = final_translation[::-1]
2208
+ rotation_matrix = rotation_matrix[::-1, ::-1]
2209
+
2210
+ out.rigid_transform(
2211
+ translation=final_translation, rotation_matrix=rotation_matrix
2212
+ )
2213
+
2214
+ return out, final_translation, rotation_matrix
2215
+
2216
+ @staticmethod
2217
+ def align_coordinate_systems(target: "Density", template: "Density") -> "Density":
2218
+ """
2219
+ Aligns the coordinate system of `target` and `template`.
2220
+
2221
+ Parameters
2222
+ ----------
2223
+ target : Density
2224
+ The target density whose coordinate system should remain unchanged.
2225
+ template : Density
2226
+ The template density that will be aligned to match the target's
2227
+ coordinate system.
2228
+
2229
+ Raises
2230
+ ------
2231
+ ValueError
2232
+ If the `sampling_rate` of `target` and `template` do not match.
2233
+
2234
+ Returns
2235
+ -------
2236
+ Density
2237
+ A copy of `template` aligned to the coordinate system of `target`.
2238
+ The `box_size` and `origin` will match that of `target`.
2239
+
2240
+ See Also
2241
+ --------
2242
+ :py:meth:`Density.match_densities` : To match aligned template to target.
2243
+ """
2244
+ if not np.allclose(target.sampling_rate, template.sampling_rate):
2245
+ raise ValueError("sampling_rate of both maps have to match.")
2246
+
2247
+ template = template.copy()
2248
+ template.pad(target.shape, center=True)
2249
+
2250
+ origin_difference = np.divide(
2251
+ np.subtract(template.origin, target.origin), target.sampling_rate
2252
+ )
2253
+ template = template.rigid_transform(
2254
+ rotation_matrix=np.eye(template.data.ndim), translation=origin_difference
2255
+ )
2256
+ template.origin = target.origin.copy()
2257
+ return template
2258
+
2259
+ @staticmethod
2260
+ def fourier_shell_correlation(density1: "Density", density2: "Density") -> NDArray:
2261
+ """
2262
+ Computes the Fourier Shell Correlation (FSC) between two instances of `Density`.
2263
+
2264
+ The Fourier transforms of the input maps are divided into shells
2265
+ based on their spatial frequency. The correlation between corresponding shells
2266
+ in the two maps is computed to give the FSC.
2267
+
2268
+ Parameters
2269
+ ----------
2270
+ density1 : Density
2271
+ An instance of `Density` class for the first map for comparison.
2272
+ density2 : Density
2273
+ An instance of `Density` class for the second map for comparison.
2274
+
2275
+ Returns
2276
+ -------
2277
+ NDArray
2278
+ An array of shape (N, 2), where N is the number of shells,
2279
+ the first column represents the spatial frequency for each shell
2280
+ and the second column represents the corresponding FSC.
2281
+
2282
+ References
2283
+ ----------
2284
+ .. [1] https://github.com/tdgrant1/denss/blob/master/saxstats/saxstats.py
2285
+ """
2286
+ side = density1.data.shape[0]
2287
+ df = 1.0 / side
2288
+
2289
+ qx_ = np.fft.fftfreq(side) * side * df
2290
+ qx, qy, qz = np.meshgrid(qx_, qx_, qx_, indexing="ij")
2291
+ qr = np.sqrt(qx**2 + qy**2 + qz**2)
2292
+
2293
+ qmax = np.max(qr)
2294
+ qstep = np.min(qr[qr > 0])
2295
+ nbins = int(qmax / qstep)
2296
+ qbins = np.linspace(0, nbins * qstep, nbins + 1)
2297
+ qbin_labels = np.searchsorted(qbins, qr, "right") - 1
2298
+
2299
+ F1 = np.fft.fftn(density1.data)
2300
+ F2 = np.fft.fftn(density2.data)
2301
+
2302
+ qbin_labels = qbin_labels.reshape(-1)
2303
+ numerator = np.bincount(
2304
+ qbin_labels, weights=np.real(F1 * np.conj(F2)).reshape(-1)
2305
+ )
2306
+ term1 = np.bincount(qbin_labels, weights=np.abs(F1).reshape(-1) ** 2)
2307
+ term2 = np.bincount(qbin_labels, weights=np.abs(F2).reshape(-1) ** 2)
2308
+ np.multiply(term1, term2, out=term1)
2309
+ denominator = np.sqrt(term1)
2310
+ FSC = np.divide(numerator, denominator)
2311
+
2312
+ qidx = np.where(qbins < qx.max())
2313
+
2314
+ return np.vstack((qbins[qidx], FSC[qidx])).T