pytme 0.1.5__cp311-cp311-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. pytme-0.1.5.data/scripts/estimate_ram_usage.py +81 -0
  2. pytme-0.1.5.data/scripts/match_template.py +744 -0
  3. pytme-0.1.5.data/scripts/postprocess.py +279 -0
  4. pytme-0.1.5.data/scripts/preprocess.py +93 -0
  5. pytme-0.1.5.data/scripts/preprocessor_gui.py +729 -0
  6. pytme-0.1.5.dist-info/LICENSE +153 -0
  7. pytme-0.1.5.dist-info/METADATA +69 -0
  8. pytme-0.1.5.dist-info/RECORD +63 -0
  9. pytme-0.1.5.dist-info/WHEEL +5 -0
  10. pytme-0.1.5.dist-info/entry_points.txt +6 -0
  11. pytme-0.1.5.dist-info/top_level.txt +2 -0
  12. scripts/__init__.py +0 -0
  13. scripts/estimate_ram_usage.py +81 -0
  14. scripts/match_template.py +744 -0
  15. scripts/match_template_devel.py +788 -0
  16. scripts/postprocess.py +279 -0
  17. scripts/preprocess.py +93 -0
  18. scripts/preprocessor_gui.py +729 -0
  19. tme/__init__.py +6 -0
  20. tme/__version__.py +1 -0
  21. tme/analyzer.py +1144 -0
  22. tme/backends/__init__.py +134 -0
  23. tme/backends/cupy_backend.py +309 -0
  24. tme/backends/matching_backend.py +1154 -0
  25. tme/backends/npfftw_backend.py +763 -0
  26. tme/backends/pytorch_backend.py +526 -0
  27. tme/data/__init__.py +0 -0
  28. tme/data/c48n309.npy +0 -0
  29. tme/data/c48n527.npy +0 -0
  30. tme/data/c48n9.npy +0 -0
  31. tme/data/c48u1.npy +0 -0
  32. tme/data/c48u1153.npy +0 -0
  33. tme/data/c48u1201.npy +0 -0
  34. tme/data/c48u1641.npy +0 -0
  35. tme/data/c48u181.npy +0 -0
  36. tme/data/c48u2219.npy +0 -0
  37. tme/data/c48u27.npy +0 -0
  38. tme/data/c48u2947.npy +0 -0
  39. tme/data/c48u3733.npy +0 -0
  40. tme/data/c48u4749.npy +0 -0
  41. tme/data/c48u5879.npy +0 -0
  42. tme/data/c48u7111.npy +0 -0
  43. tme/data/c48u815.npy +0 -0
  44. tme/data/c48u83.npy +0 -0
  45. tme/data/c48u8649.npy +0 -0
  46. tme/data/c600v.npy +0 -0
  47. tme/data/c600vc.npy +0 -0
  48. tme/data/metadata.yaml +80 -0
  49. tme/data/quat_to_numpy.py +42 -0
  50. tme/data/scattering_factors.pickle +0 -0
  51. tme/density.py +2314 -0
  52. tme/extensions.cpython-311-darwin.so +0 -0
  53. tme/helpers.py +881 -0
  54. tme/matching_data.py +377 -0
  55. tme/matching_exhaustive.py +1553 -0
  56. tme/matching_memory.py +382 -0
  57. tme/matching_optimization.py +1123 -0
  58. tme/matching_utils.py +1180 -0
  59. tme/parser.py +429 -0
  60. tme/preprocessor.py +1291 -0
  61. tme/scoring.py +866 -0
  62. tme/structure.py +1428 -0
  63. tme/types.py +10 -0
@@ -0,0 +1,1123 @@
1
+ """ Implements various methods for non-exhaustive template matching
2
+ based on numerical optimization.
3
+
4
+ Copyright (c) 2023 European Molecular Biology Laboratory
5
+
6
+ Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
7
+ """
8
+
9
+ from typing import Tuple, Dict
10
+ from abc import ABC, abstractmethod
11
+
12
+ import numpy as np
13
+ from numpy.typing import NDArray
14
+ from scipy.optimize import (
15
+ differential_evolution,
16
+ LinearConstraint,
17
+ basinhopping,
18
+ )
19
+ from scipy.ndimage import laplace
20
+ from scipy.spatial import KDTree
21
+
22
+ from .matching_utils import rigid_transform, euler_to_rotationmatrix
23
+
24
+
25
+ class MatchCoordinatesToDensity(ABC):
26
+ """
27
+ A class to template match coordinate sets.
28
+
29
+ Parameters
30
+ ----------
31
+ target_coordinates : NDArray
32
+ The coordinates of the target.
33
+ template_coordinates : NDArray
34
+ The coordinates of the template.
35
+ target_weights : NDArray
36
+ The weights of the target.
37
+ template_weights : NDArray
38
+ The weights of the template.
39
+ sampling_rate : NDArray
40
+ The size of the voxel.
41
+ template_mask_coordinates : NDArray, optional
42
+ The coordinates of the template mask. Default is None.
43
+ target_mask_coordinates : NDArray, optional
44
+ The coordinates of the target mask. Default is None.
45
+ **kwargs : dict, optional
46
+ Other keyword arguments.
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ target_coordinates: NDArray,
52
+ template_coordinates: NDArray,
53
+ target_weights: NDArray,
54
+ template_weights: NDArray,
55
+ sampling_rate: NDArray,
56
+ template_mask_coordinates: NDArray = None,
57
+ target_mask_coordinates: NDArray = None,
58
+ **kwargs,
59
+ ):
60
+ target, _, origin = FitRefinement.array_from_coordinates(
61
+ target_coordinates, target_weights, sampling_rate
62
+ )
63
+ self.target_density = target
64
+ self.target_origin = origin
65
+ self.sampling_rate = sampling_rate
66
+
67
+ self.template_weights = template_weights
68
+ self.template_coordinates = template_coordinates
69
+ self.template_coordinates_rotated = np.empty(
70
+ self.template_coordinates.shape, dtype=np.float32
71
+ )
72
+
73
+ self.target_mask_density = None
74
+ if target_mask_coordinates is not None:
75
+ target_mask, *_ = FitRefinement.array_from_coordinates(
76
+ coordinates=target_mask_coordinates.astype(np.float32),
77
+ weights=np.ones(target_mask_coordinates.shape[1]),
78
+ shape=self.target_density.shape,
79
+ origin=self.target_origin,
80
+ sampling_rate=self.sampling_rate,
81
+ )
82
+ self.target_mask_density = target_mask
83
+
84
+ self.template_mask_coordinates = None
85
+ self.template_mask_coordinates_rotated = None
86
+ if template_mask_coordinates is not None:
87
+ self.template_mask_coordinates = template_mask_coordinates
88
+ self.template_mask_coordinates_rotated = np.empty(
89
+ self.template_mask_coordinates.shape, dtype=np.float32
90
+ )
91
+
92
+ def __call__(self, x: NDArray):
93
+ """
94
+ Return the score for a given transformation.
95
+
96
+ Parameters
97
+ ----------
98
+ x : NDArray
99
+ The input transformation parameters.
100
+
101
+ Returns
102
+ -------
103
+ float
104
+ The negative score from the scoring function.
105
+ """
106
+ translation, rotation = x[:3], x[3:]
107
+ rotation_matrix = euler_to_rotationmatrix(rotation)
108
+
109
+ rigid_transform(
110
+ coordinates=self.template_coordinates,
111
+ coordinates_mask=self.template_mask_coordinates,
112
+ rotation_matrix=rotation_matrix,
113
+ translation=translation,
114
+ out=self.template_coordinates_rotated,
115
+ out_mask=self.template_mask_coordinates_rotated,
116
+ use_geometric_center=False,
117
+ )
118
+
119
+ mapping = FitRefinement.map_coordinates_to_array(
120
+ coordinates=self.template_coordinates_rotated,
121
+ coordinates_mask=self.template_mask_coordinates_rotated,
122
+ array_origin=self.target_origin,
123
+ array_shape=self.target_density.shape,
124
+ sampling_rate=self.sampling_rate,
125
+ )
126
+
127
+ return -self.scoring_function(
128
+ transformed_coordinates=mapping[0],
129
+ transformed_coordinates_mask=mapping[1],
130
+ in_volume=mapping[2],
131
+ in_volume_mask=mapping[3],
132
+ )
133
+
134
+ @abstractmethod
135
+ def scoring_function(*args, **kwargs):
136
+ """
137
+ Computes a scoring metric for a given set of coordinates.
138
+
139
+ This function is not intended to be called directly, but should rather be
140
+ defined by classes inheriting from :py:class:`MatchCoordinatesToDensity`
141
+ to parse a given file format.
142
+ """
143
+
144
+
145
+ class MatchCoordinatesToCoordinates(ABC):
146
+ """
147
+ A class to template match coordinate sets.
148
+
149
+ Parameters
150
+ ----------
151
+ target_coordinates : NDArray
152
+ The coordinates of the target.
153
+ template_coordinates : NDArray
154
+ The coordinates of the template.
155
+ target_weights : NDArray
156
+ The weights of the target.
157
+ template_weights : NDArray
158
+ The weights of the template.
159
+ sampling_rate : NDArray
160
+ The size of the voxel.
161
+ template_mask_coordinates : NDArray, optional
162
+ The coordinates of the template mask. Default is None.
163
+ target_mask_coordinates : NDArray, optional
164
+ The coordinates of the target mask. Default is None.
165
+ **kwargs : dict, optional
166
+ Other keyword arguments.
167
+ """
168
+
169
+ def __init__(
170
+ self,
171
+ target_coordinates: NDArray,
172
+ template_coordinates: NDArray,
173
+ target_weights: NDArray,
174
+ template_weights: NDArray,
175
+ template_mask_coordinates: NDArray = None,
176
+ target_mask_coordinates: NDArray = None,
177
+ **kwargs,
178
+ ):
179
+ self.target_weights = target_weights
180
+ self.target_coordinates = target_coordinates
181
+
182
+ self.template_weights = template_weights
183
+ self.template_coordinates = template_coordinates
184
+ self.template_coordinates_rotated = np.empty(
185
+ self.template_coordinates.shape, dtype=np.float32
186
+ )
187
+ self.target_mask_coordinates = target_mask_coordinates
188
+
189
+ self.template_mask_coordinates = None
190
+ self.template_mask_coordinates_rotated = None
191
+ if template_mask_coordinates is not None:
192
+ self.template_mask_coordinates = template_mask_coordinates
193
+ self.template_mask_coordinates_rotated = np.empty(
194
+ self.template_mask_coordinates.shape, dtype=np.float32
195
+ )
196
+
197
+ def __call__(self, x: NDArray):
198
+ """
199
+ Return the score for a given transformation.
200
+
201
+ Parameters
202
+ ----------
203
+ x : NDArray
204
+ The input transformation parameters.
205
+
206
+ Returns
207
+ -------
208
+ float
209
+ The negative score from the scoring function.
210
+ """
211
+ translation, rotation = x[:3], x[3:]
212
+ rotation_matrix = euler_to_rotationmatrix(rotation)
213
+
214
+ rigid_transform(
215
+ coordinates=self.template_coordinates,
216
+ coordinates_mask=self.template_mask_coordinates,
217
+ rotation_matrix=rotation_matrix,
218
+ translation=translation,
219
+ out=self.template_coordinates_rotated,
220
+ out_mask=self.template_mask_coordinates_rotated,
221
+ use_geometric_center=False,
222
+ )
223
+
224
+ return -self.scoring_function(
225
+ transformed_coordinates=self.template_coordinates_rotated,
226
+ transformed_coordinates_mask=self.template_mask_coordinates_rotated,
227
+ )
228
+
229
+ @abstractmethod
230
+ def scoring_function(*args, **kwargs):
231
+ """
232
+ Computes a scoring metric for a given set of coordinates.
233
+
234
+ This function is not intended to be called directly, but should rather be
235
+ defined by classes inheriting from :py:class:`MatchCoordinatesToDensity`
236
+ to parse a given file format.
237
+ """
238
+
239
+
240
+ class CrossCorrelation(MatchCoordinatesToDensity):
241
+ """
242
+ Class representing the Cross-Correlation matching score.
243
+
244
+ Cross-Correlation score formula:
245
+
246
+ .. math::
247
+
248
+ \\text{score} = \\text{target_weights} \\cdot \\text{template_weights}
249
+
250
+ """
251
+
252
+ def __init__(self, **kwargs):
253
+ super().__init__(**kwargs)
254
+ self.denominator = 1
255
+
256
+ def scoring_function(
257
+ self,
258
+ transformed_coordinates: NDArray,
259
+ transformed_coordinates_mask: NDArray,
260
+ in_volume: NDArray,
261
+ in_volume_mask: NDArray,
262
+ ) -> float:
263
+ """
264
+ Compute the Cross-Correlation score.
265
+
266
+ Parameters
267
+ ----------
268
+ transformed_coordinates : NDArray
269
+ Transformed coordinates.
270
+ transformed_coordinates_mask : NDArray
271
+ Mask for the transformed coordinates.
272
+ in_volume : NDArray
273
+ Binary mask indicating which ``transformed_coordinates`` are in the
274
+ target volume.
275
+ in_volume_mask : NDArray
276
+ Binary mask indicating which ``transformed_coordinates`` are in the
277
+ target mask volume.
278
+
279
+ Returns
280
+ -------
281
+ float
282
+ The Cross-Correlation score.
283
+ """
284
+ score = np.dot(
285
+ self.target_density[tuple(transformed_coordinates[:, in_volume])],
286
+ self.template_weights[in_volume],
287
+ )
288
+ score /= self.denominator
289
+ return score
290
+
291
+
292
+ class LaplaceCrossCorrelation(CrossCorrelation):
293
+ """
294
+ Class representing the Laplace Cross-Correlation matching score.
295
+
296
+ The score is computed like CrossCorrelation, but with Laplace filtered
297
+ weights, indicated by the Laplace operator :math:`\\nabla^{2}`.
298
+
299
+ .. math::
300
+
301
+ \\text{score} = \\nabla^{2} \\text{target_weights} \\cdot
302
+ \\nabla^{2} \\text{template_weights}
303
+
304
+ """
305
+
306
+ def __init__(self, **kwargs):
307
+ super().__init__(**kwargs)
308
+ self.target_density = laplace(self.target_density)
309
+
310
+ arr, positions, _ = FitRefinement.array_from_coordinates(
311
+ self.template_coordinates, self.template_weights, self.sampling_rate
312
+ )
313
+ self.template_weights = laplace(arr)[tuple(positions)]
314
+
315
+
316
+ class NormalizedCrossCorrelation(CrossCorrelation):
317
+ """
318
+ Class representing the Normalized Cross-Correlation matching score.
319
+
320
+ The score is computed by normalizing the dot product of `target_weights` and
321
+ `template_weights` with the product of their norms. This normalization ensures
322
+ the score lies between -1 and 1, providing a measure of similarity that's invariant
323
+ to scale.
324
+
325
+ .. math::
326
+
327
+ \\text{score} = \\frac{\\text{target_weights} \\cdot \\text{template_weights}}
328
+ {\\text{max(target_norm} \\times \\text{template_norm, eps)}}
329
+
330
+ Where:
331
+
332
+ .. math::
333
+
334
+ \\text{target_norm} = ||\\text{target_weights}||
335
+
336
+ .. math::
337
+
338
+ \\text{template_norm} = ||\\text{template_weights}||
339
+
340
+ Here, :math:`||.||` denotes the L2 (Euclidean) norm.
341
+
342
+ """
343
+
344
+ def __init__(self, **kwargs):
345
+ super().__init__(**kwargs)
346
+ target_norm = np.linalg.norm(self.target_density[self.target_density != 0])
347
+ template_norm = np.linalg.norm(self.template_weights)
348
+ self.denominator = np.fmax(target_norm * template_norm, np.finfo(float).eps)
349
+
350
+
351
+ class NormalizedCrossCorrelationMean(NormalizedCrossCorrelation):
352
+ """
353
+ Class representing the Mean Normalized Cross-Correlation matching score.
354
+
355
+ This class extends the Normalized Cross-Correlation by computing the score
356
+ after subtracting the mean from both `target_weights` and `template_weights`.
357
+ This modification enhances the matching score's sensitivity to patterns
358
+ over flat regions in the data.
359
+
360
+ Mathematically, the Mean Normalized Cross-Correlation score is computed as:
361
+
362
+ .. math::
363
+
364
+ \\text{score} = \\frac{(\\text{target_weights} - \\text{mean(target_weights)})
365
+ \\cdot (\\text{template_weights} -
366
+ \\text{mean(template_weights)})}
367
+ {\\text{max(target_norm} \\times \\text{template_norm, eps)}}
368
+
369
+ Where:
370
+
371
+ .. math::
372
+
373
+ \\text{target_norm} = ||\\text{target_weights} - \\text{mean(target_weights)}||
374
+
375
+ .. math::
376
+
377
+ \\text{template_norm} = ||\\text{template_weights} -
378
+ \\text{mean(template_weights)}||
379
+
380
+ Here, :math:`||.||` denotes the L2 (Euclidean) norm, and :math:`\\text{mean(.)}`
381
+ computes the mean of the respective weights.
382
+ """
383
+
384
+ def __init__(self, **kwargs):
385
+ print(kwargs["target_weights"].mean())
386
+ kwargs["target_weights"] -= kwargs["target_weights"].mean()
387
+ kwargs["template_weights"] -= kwargs["template_weights"].mean()
388
+ super().__init__(**kwargs)
389
+
390
+
391
+ class MaskedCrossCorrelation(MatchCoordinatesToDensity):
392
+ """
393
+ Class representing the Masked Cross-Correlation matching score.
394
+
395
+ The Masked Cross-Correlation computes the similarity between `target_weights`
396
+ and `template_weights` under respective masks. The score is normalized and lies
397
+ between -1 and 1, providing a measure of similarity even in the presence of
398
+ missing or masked data.
399
+
400
+ The formula for the Masked Cross-Correlation is:
401
+
402
+ .. math::
403
+ \\text{numerator} = \\text{dot}(\\text{target_weights},
404
+ \\text{template_weights}) -
405
+ \\frac{\\text{sum}(\\text{mask_target}) \\times
406
+ \\text{sum}(\\text{mask_template})}
407
+ {\\text{mask_overlap}}
408
+
409
+ .. math::
410
+ \\text{denominator1} = \\text{sum}(\\text{mask_target}^2) -
411
+ \\frac{\\text{sum}(\\text{mask_target})^2}
412
+ {\\text{mask_overlap}}
413
+
414
+ .. math::
415
+ \\text{denominator2} = \\text{sum}(\\text{mask_template}^2) -
416
+ \\frac{\\text{sum}(\\text{mask_template})^2}
417
+ {\\text{mask_overlap}}
418
+
419
+ .. math::
420
+ \\text{denominator} = \\sqrt{\\text{denominator1} \\times \\text{denominator2}}
421
+
422
+ .. math::
423
+ \\text{score} = \\frac{\\text{numerator}}{\\text{denominator}}
424
+ \\text{ if denominator } \\neq 0
425
+ \\text{ else } 0
426
+
427
+ Where:
428
+
429
+ - mask_target and mask_template are binary masks for the target_weights
430
+ and template_weights respectively.
431
+
432
+ - mask_overlap represents the number of overlapping non-zero elements in
433
+ the masks.
434
+
435
+ References
436
+ ----------
437
+ .. [1] Masked FFT registration, Dirk Padfield, CVPR 2010 conference
438
+ """
439
+
440
+ def __init__(self, **kwargs):
441
+ super().__init__(**kwargs)
442
+
443
+ def scoring_function(
444
+ self,
445
+ transformed_coordinates: NDArray,
446
+ transformed_coordinates_mask: NDArray,
447
+ in_volume: NDArray,
448
+ in_volume_mask: NDArray,
449
+ ) -> float:
450
+ """
451
+ Compute the Masked Cross-Correlation score.
452
+
453
+ Parameters
454
+ ----------
455
+ transformed_coordinates : NDArray
456
+ Transformed coordinates.
457
+ transformed_coordinates_mask : NDArray
458
+ Mask for the transformed coordinates.
459
+ in_volume : NDArray
460
+ Binary mask indicating which ``transformed_coordinates`` are in the
461
+ target volume.
462
+ in_volume_mask : NDArray
463
+ Binary mask indicating which ``transformed_coordinates`` are in the
464
+ target mask volume.
465
+
466
+ Returns
467
+ -------
468
+ float
469
+ The Masked Cross-Correlation score.
470
+ """
471
+ mask_overlap = np.sum(
472
+ self.target_mask_density[
473
+ tuple(transformed_coordinates_mask[:, in_volume_mask])
474
+ ],
475
+ )
476
+ mask_overlap = np.fmax(mask_overlap, np.finfo(float).eps)
477
+
478
+ mask_target = self.target_density[
479
+ tuple(transformed_coordinates_mask[:, in_volume_mask])
480
+ ]
481
+ denominator1 = np.subtract(
482
+ np.sum(mask_target**2),
483
+ np.divide(np.square(np.sum(mask_target)), mask_overlap),
484
+ )
485
+ mask_template = np.multiply(
486
+ self.template_weights[in_volume],
487
+ self.target_mask_density[tuple(transformed_coordinates[:, in_volume])],
488
+ )
489
+ denominator2 = np.subtract(
490
+ np.sum(mask_template**2),
491
+ np.divide(np.square(np.sum(mask_template)), mask_overlap),
492
+ )
493
+
494
+ denominator1 = np.fmax(denominator1, 0.0)
495
+ denominator2 = np.fmax(denominator2, 0.0)
496
+ denominator = np.sqrt(np.multiply(denominator1, denominator2))
497
+
498
+ numerator = np.dot(
499
+ self.target_density[tuple(transformed_coordinates[:, in_volume])],
500
+ self.template_weights[in_volume],
501
+ )
502
+
503
+ numerator -= np.divide(
504
+ np.multiply(np.sum(mask_target), np.sum(mask_template)), mask_overlap
505
+ )
506
+
507
+ if denominator == 0:
508
+ return 0
509
+
510
+ score = numerator / denominator
511
+ return score
512
+
513
+
514
+ class PartialLeastSquareDifference(MatchCoordinatesToDensity):
515
+ """
516
+ Class representing the Partial Least Square Difference matching score.
517
+
518
+ The Partial Least Square Difference (PLSQ) between the target :math:`f` and the
519
+ template :math:`g` is calculated as:
520
+
521
+ .. math::
522
+
523
+ \\text{d(f,g)} = \\sum_{i=1}^{n} \\| f(\\mathbf{p}_i) - g(\\mathbf{q}_i) \\|^2
524
+
525
+ References
526
+ ----------
527
+ .. [1] Daven Vasishtan and Maya Topf, "Scoring functions for cryoEM density
528
+ fitting", Journal of Structural Biology, vol. 174, no. 2,
529
+ pp. 333--343, 2011. DOI: https://doi.org/10.1016/j.jsb.2011.01.012
530
+ """
531
+
532
+ def __init__(self, **kwargs):
533
+ super().__init__(**kwargs)
534
+
535
+ def scoring_function(
536
+ self,
537
+ transformed_coordinates: NDArray,
538
+ transformed_coordinates_mask: NDArray,
539
+ in_volume: NDArray,
540
+ in_volume_mask: NDArray,
541
+ ) -> float:
542
+ """
543
+ Compute the Partial Least Square Difference score.
544
+
545
+ Given the transformed coordinates and their associated mask, this function
546
+ computes the difference between target and template densities.
547
+
548
+ Parameters
549
+ ----------
550
+ transformed_coordinates : NDArray
551
+ Transformed coordinates.
552
+ transformed_coordinates_mask : NDArray
553
+ Mask for the transformed coordinates.
554
+ in_volume : NDArray
555
+ Binary mask indicating which ``transformed_coordinates`` are in the
556
+ target volume.
557
+ in_volume_mask : NDArray
558
+ Binary mask indicating which ``transformed_coordinates`` are in the
559
+ target mask volume.
560
+
561
+ Returns
562
+ -------
563
+ float
564
+ The negative of the Partial Least Square Difference score.
565
+ """
566
+ score = np.sum(
567
+ np.square(
568
+ np.subtract(
569
+ self.target_density[tuple(transformed_coordinates[:, in_volume])],
570
+ self.template_weights[in_volume],
571
+ )
572
+ )
573
+ )
574
+ score += np.sum(np.square(self.template_weights[np.invert(in_volume)]))
575
+
576
+ return -score
577
+
578
+
579
+ class Chamfer(MatchCoordinatesToCoordinates):
580
+ """
581
+ Class representing the Chamfer matching score.
582
+
583
+ The Chamfer distance is computed as:
584
+
585
+ .. math::
586
+
587
+ \\text{d(f,g)} = \\frac{1}{|X|} \\sum_{\\mathbf{f}_i \\in X}
588
+ \\inf_{\\mathbf{g} \\in Y} ||\\mathbf{f}_i - \\mathbf{g}||_2
589
+
590
+ References
591
+ ----------
592
+ .. [1] Daven Vasishtan and Maya Topf, "Scoring functions for cryoEM density
593
+ fitting", Journal of Structural Biology, vol. 174, no. 2,
594
+ pp. 333--343, 2011. DOI: https://doi.org/10.1016/j.jsb.2011.01.012
595
+ """
596
+
597
+ def __init__(self, **kwargs):
598
+ super().__init__(**kwargs)
599
+ self.target_tree = KDTree(self.target_coordinates.T)
600
+
601
+ def scoring_function(
602
+ self,
603
+ transformed_coordinates: NDArray,
604
+ transformed_coordinates_mask: NDArray,
605
+ **kwargs,
606
+ ) -> float:
607
+ """
608
+ Compute the Chamfer distance score.
609
+
610
+ Given the transformed coordinates and their associated mask, this function
611
+ calculates the average distance between the rotated template coordinates
612
+ and the nearest target coordinates.
613
+
614
+ Parameters
615
+ ----------
616
+ transformed_coordinates : NDArray
617
+ Transformed coordinates.
618
+
619
+ Returns
620
+ -------
621
+ float
622
+ The negative of the Chamfer distance score.
623
+
624
+ """
625
+ dist, _ = self.target_tree.query(self.template_coordinates_rotated.T)
626
+ score = np.mean(dist)
627
+ return -score
628
+
629
+
630
+ class MutualInformation(MatchCoordinatesToDensity):
631
+ """
632
+ Class representing the Mutual Information matching score.
633
+
634
+ The Mutual Information (MI) score is calculated as:
635
+
636
+ .. math::
637
+
638
+ \\text{d(f,g)} = \\sum_{f,g} p(f,g) \\log \\frac{p(f,g)}{p(f)p(g)}
639
+
640
+ References
641
+ ----------
642
+ .. [1] Daven Vasishtan and Maya Topf, "Scoring functions for cryoEM density
643
+ fitting", Journal of Structural Biology, vol. 174, no. 2,
644
+ pp. 333--343, 2011. DOI: https://doi.org/10.1016/j.jsb.2011.01.012
645
+
646
+ """
647
+
648
+ def __init__(self, **kwargs):
649
+ super().__init__(**kwargs)
650
+
651
+ def scoring_function(
652
+ self,
653
+ transformed_coordinates: NDArray,
654
+ transformed_coordinates_mask: NDArray,
655
+ in_volume: NDArray,
656
+ in_volume_mask: NDArray,
657
+ ) -> float:
658
+ """
659
+ Compute the Mutual Information score.
660
+
661
+ Given the transformed coordinates and their associated mask, this function
662
+ computes the mutual information between the target and template densities.
663
+
664
+ Parameters
665
+ ----------
666
+ transformed_coordinates : NDArray
667
+ Transformed coordinates.
668
+ transformed_coordinates_mask : NDArray
669
+ Mask for the transformed coordinates.
670
+ in_volume : NDArray
671
+ Binary mask indicating which ``transformed_coordinates`` are in the
672
+ target volume.
673
+ in_volume_mask : NDArray
674
+ Binary mask indicating which ``transformed_coordinates`` are in the
675
+ target mask volume.
676
+
677
+ Returns
678
+ -------
679
+ float
680
+ The Mutual Information score.
681
+ """
682
+ p_xy, target, template = np.histogram2d(
683
+ self.target_density[tuple(transformed_coordinates[:, in_volume])],
684
+ self.template_weights[in_volume],
685
+ )
686
+ p_x, p_y = np.sum(p_xy, axis=1), np.sum(p_xy, axis=0)
687
+
688
+ p_xy /= p_xy.sum()
689
+ p_x /= p_x.sum()
690
+ p_y /= p_y.sum()
691
+
692
+ logprob = np.divide(p_xy, p_x[:, None] * p_y[None, :] + np.finfo(float).eps)
693
+ score = np.nansum(p_xy * logprob)
694
+
695
+ return score
696
+
697
+
698
+ class Envelope(MatchCoordinatesToDensity):
699
+ """
700
+ Class representing the Envelope matching score.
701
+
702
+ The Envelope score (ENV) is calculated as:
703
+
704
+ .. math::
705
+
706
+ \\text{d(f,g)} = \\sum_{\\mathbf{p} \\in P} f'(\\mathbf{p})
707
+ \\cdot g'(\\mathbf{p})
708
+
709
+ References
710
+ ----------
711
+ .. [1] Daven Vasishtan and Maya Topf, "Scoring functions for cryoEM density
712
+ fitting", Journal of Structural Biology, vol. 174, no. 2,
713
+ pp. 333--343, 2011. DOI: https://doi.org/10.1016/j.jsb.2011.01.012
714
+ """
715
+
716
+ def __init__(self, target_threshold: float, **kwargs):
717
+ super().__init__(**kwargs)
718
+ self.target_density = np.where(self.target_density > target_threshold, -1, 1)
719
+ self.target_density_present = np.sum(self.target_density == -1)
720
+ self.target_density_absent = np.sum(self.target_density == 1)
721
+ self.template_weights = np.ones_like(self.template_weights)
722
+
723
+ def scoring_function(
724
+ self,
725
+ transformed_coordinates: NDArray,
726
+ transformed_coordinates_mask: NDArray,
727
+ in_volume: NDArray,
728
+ in_volume_mask: NDArray,
729
+ ) -> float:
730
+ """
731
+ Compute the Envelope score.
732
+
733
+ Given the transformed coordinates and their associated mask, this function
734
+ computes the envelope score based on target density thresholds.
735
+
736
+ Parameters
737
+ ----------
738
+ transformed_coordinates : NDArray
739
+ Transformed coordinates.
740
+ transformed_coordinates_mask : NDArray
741
+ Mask for the transformed coordinates.
742
+ in_volume : NDArray
743
+ Binary mask indicating which ``transformed_coordinates`` are in the
744
+ target volume.
745
+ in_volume_mask : NDArray
746
+ Binary mask indicating which ``transformed_coordinates`` are in the
747
+ target mask volume.
748
+
749
+ Returns
750
+ -------
751
+ float
752
+ The Envelope score.
753
+ """
754
+ score = self.target_density[tuple(transformed_coordinates[:, in_volume])]
755
+ unassigned_density = self.target_density_present - (score == -1).sum()
756
+
757
+ score = score.sum() - unassigned_density - 2 * np.sum(np.invert(in_volume))
758
+ min_score = -self.target_density_present - 2 * self.target_density_absent
759
+ score = (score - 2 * min_score) / (2 * self.target_density_present - min_score)
760
+
761
+ return score
762
+
763
+
764
+ class NormalVectorScore(MatchCoordinatesToCoordinates):
765
+ """
766
+ Class representing the Normal Vector matching score.
767
+
768
+ The Normal Vector Score (NVS) is calculated as:
769
+
770
+ .. math::
771
+
772
+ \\text{d(f,g)} = \\frac{1}{N} \\sum_{i=1}^{N}
773
+ \\frac{
774
+ {\\vec{f}_i} \\cdot {\\vec{g}_i}
775
+ }{
776
+ ||\\vec{f}_i|| \\, ||\\vec{g}_i||
777
+ }
778
+
779
+ References
780
+ ----------
781
+ .. [1] Daven Vasishtan and Maya Topf, "Scoring functions for cryoEM density
782
+ fitting", Journal of Structural Biology, vol. 174, no. 2,
783
+ pp. 333--343, 2011. DOI: https://doi.org/10.1016/j.jsb.2011.01.012
784
+
785
+ """
786
+
787
+ def __init__(self, **kwargs):
788
+ super().__init__(**kwargs)
789
+
790
+ def scoring_function(
791
+ self,
792
+ transformed_coordinates: NDArray,
793
+ transformed_coordinates_mask: NDArray,
794
+ **kwargs,
795
+ ) -> float:
796
+ """
797
+ Compute the Normal Vector Score.
798
+
799
+ Given the template and target vectors, this function computes the average
800
+ cosine similarity between the two sets of vectors.
801
+
802
+ Parameters
803
+ ----------
804
+ template_vectors : NDArray
805
+ Normal vectors derived from the template.
806
+ target_vectors : NDArray
807
+ Normal vectors derived from the target.
808
+
809
+ Returns
810
+ -------
811
+ float
812
+ The Normal Vector Score.
813
+ """
814
+ numerator = np.multiply(transformed_coordinates, self.target_coordinates)
815
+ denominator = np.linalg.norm(transformed_coordinates)
816
+ denominator *= np.linalg.norm(self.target_coordinates)
817
+ score = np.mean(numerator / denominator)
818
+ return score
819
+
820
+
821
+ MATCHING_OPTIMIZATION_REGISTER = {
822
+ "CrossCorrelation": CrossCorrelation,
823
+ "LaplaceCrossCorrelation": LaplaceCrossCorrelation,
824
+ "NormalizedCrossCorrelationMean": NormalizedCrossCorrelationMean,
825
+ "NormalizedCrossCorrelation": NormalizedCrossCorrelation,
826
+ "MaskedCrossCorrelation": MaskedCrossCorrelation,
827
+ "PartialLeastSquareDifference": PartialLeastSquareDifference,
828
+ "Envelope": Envelope,
829
+ "Chamfer": Chamfer,
830
+ "MutualInformation": MutualInformation,
831
+ "NormalVectorScore": NormalVectorScore,
832
+ }
833
+
834
+
835
+ def register_matching_optimization(match_name: str, match_class: type):
836
+ """
837
+ Registers a class to be used by :py:class:`FitRefinement`.
838
+
839
+ Parameters
840
+ ----------
841
+ match_name : str
842
+ Name of the matching instance.
843
+ match_class : type
844
+ Class pointer.
845
+
846
+ Raises
847
+ ------
848
+ ValueError
849
+ If any of the required methods is not defined.
850
+ """
851
+ methods_to_check = ["__init__", "__call__", "scoring_function"]
852
+
853
+ for method in methods_to_check:
854
+ if not hasattr(match_class, method):
855
+ raise ValueError(
856
+ f"Method '{method}' is not defined in the provided class or object."
857
+ )
858
+ MATCHING_OPTIMIZATION_REGISTER[match_name] = match_class
859
+
860
+
861
+ class FitRefinement:
862
+ """
863
+ A class to refine the fit between target and template coordinates.
864
+
865
+ Notes
866
+ -----
867
+ By default scipy.optimize.differential_evolution or scipy.optimize.basinhopping
868
+ are used which can be unreliable if the initial alignment is very poor. Other
869
+ optimizers can be implemented by subclassing :py:class:`FitRefinement` and
870
+ overwriting the :py:meth:`FitRefinement.refine` function.
871
+
872
+ """
873
+
874
+ @staticmethod
875
+ def map_coordinates_to_array(
876
+ coordinates: NDArray,
877
+ array_shape: NDArray,
878
+ array_origin: NDArray,
879
+ sampling_rate: NDArray,
880
+ coordinates_mask: NDArray = None,
881
+ ) -> Tuple[NDArray, NDArray]:
882
+ """
883
+ Map coordinates to a volume based on given voxel size and origin.
884
+
885
+ Parameters
886
+ ----------
887
+ coordinates : NDArray
888
+ An array representing the coordinates to be mapped [d x N].
889
+ array_shape : NDArray
890
+ The shape of the array to which the coordinates are mapped.
891
+ array_origin : NDArray
892
+ The origin of the array to which the coordinates are mapped.
893
+ sampling_rate : NDArray
894
+ The size of a voxel in the array.
895
+ coordinates_mask : NDArray, optional
896
+ An array representing the mask for the coordinates [d x T].
897
+
898
+ Returns
899
+ -------
900
+ tuple
901
+ Returns transformed coordinates, transformed coordinates mask,
902
+ mask for in_volume points, and mask for in_volume points in mask.
903
+ """
904
+ coordinates = coordinates.astype(sampling_rate.dtype)
905
+ np.divide(
906
+ coordinates - array_origin[:, None], sampling_rate[:, None], out=coordinates
907
+ )
908
+ transformed_coordinates = coordinates.astype(int)
909
+ in_volume = np.logical_and(
910
+ transformed_coordinates < np.array(array_shape)[:, None],
911
+ transformed_coordinates >= 0,
912
+ ).min(axis=0)
913
+
914
+ transformed_coordinates_mask, in_volume_mask = None, None
915
+
916
+ if coordinates_mask is not None:
917
+ coordinates_mask = coordinates_mask.astype(sampling_rate.dtype)
918
+ np.divide(
919
+ coordinates_mask - array_origin[:, None],
920
+ sampling_rate[:, None],
921
+ out=coordinates_mask,
922
+ )
923
+ transformed_coordinates_mask = coordinates_mask.astype(int)
924
+ in_volume_mask = np.logical_and(
925
+ transformed_coordinates_mask < np.array(array_shape)[:, None],
926
+ transformed_coordinates_mask >= 0,
927
+ ).min(axis=0)
928
+
929
+ return (
930
+ transformed_coordinates,
931
+ transformed_coordinates_mask,
932
+ in_volume,
933
+ in_volume_mask,
934
+ )
935
+
936
+ @staticmethod
937
+ def array_from_coordinates(
938
+ coordinates: NDArray,
939
+ weights: NDArray,
940
+ sampling_rate: NDArray,
941
+ origin: NDArray = None,
942
+ shape: NDArray = None,
943
+ ) -> Tuple[NDArray, NDArray, NDArray]:
944
+ """
945
+ Create a volume from coordinates, using given weights and voxel size.
946
+
947
+ Parameters
948
+ ----------
949
+ coordinates : NDArray
950
+ An array representing the coordinates [d x N].
951
+ weights : NDArray
952
+ An array representing the weights for each coordinate [N].
953
+ sampling_rate : NDArray
954
+ The size of a voxel in the volume.
955
+ origin : NDArray, optional
956
+ The origin of the volume.
957
+ shape : NDArray, optional
958
+ The shape of the volume.
959
+
960
+ Returns
961
+ -------
962
+ tuple
963
+ Returns the generated volume, positions of coordinates, and origin.
964
+ """
965
+ if origin is None:
966
+ origin = coordinates.min(axis=1)
967
+
968
+ positions = np.divide(coordinates - origin[:, None], sampling_rate[:, None])
969
+ positions = positions.astype(int)
970
+
971
+ if shape is None:
972
+ shape = positions.max(axis=1) + 1
973
+
974
+ arr = np.zeros(shape, dtype=np.float32)
975
+ np.add.at(arr, tuple(positions), weights)
976
+ return arr, positions, origin
977
+
978
+ def refine(
979
+ self,
980
+ target_coordinates: NDArray,
981
+ target_weights: NDArray,
982
+ template_coordinates: NDArray,
983
+ template_weights: NDArray,
984
+ sampling_rate: float = None,
985
+ translational_uncertainty: Tuple[float] = None,
986
+ rotational_uncertainty: Tuple[float] = None,
987
+ scoring_class: str = "CrossCorrelation",
988
+ scoring_class_parameters: Dict = dict(),
989
+ local_optimization: bool = True,
990
+ maxiter: int = 100,
991
+ ) -> (NDArray, NDArray):
992
+ """
993
+ Refines the alignment of template coordinates to target coordinates.
994
+
995
+ Parameters
996
+ ----------
997
+ target_coordinates : NDArray
998
+ The coordinates of the target.
999
+
1000
+ target_weights : NDArray
1001
+ The weights of the target.
1002
+
1003
+ template_coordinates : NDArray
1004
+ The coordinates of the template.
1005
+
1006
+ template_weights : NDArray
1007
+ The weights of the template.
1008
+
1009
+ sampling_rate : float, optional
1010
+ The size of the voxel. Default is None.
1011
+
1012
+ translational_uncertainty : (float,), optional
1013
+ The translational uncertainty. Default is None.
1014
+
1015
+ rotational_uncertainty : (float,), optional
1016
+ The rotational uncertainty. Default is None.
1017
+
1018
+ scoring_class : str, optional
1019
+ The scoring class to be used. Default is "CC".
1020
+
1021
+ scoring_class_parameters : dict, optional
1022
+ The parameters for the scoring class. Default is an empty dictionary.
1023
+
1024
+ local_optimization : bool, optional
1025
+ Whether to use local optimization. Default is True.
1026
+
1027
+ maxiter : int, optional
1028
+ The maximum number of iterations. Default is 100.
1029
+
1030
+ Returns
1031
+ -------
1032
+ tuple
1033
+ A tuple containing the translation and rotation matrix of the refinement,
1034
+ as well as the score of the refinement.
1035
+
1036
+ Raises
1037
+ ------
1038
+ NotNotImplementedError
1039
+ If scoring class is not a part of `MATCHING_OPTIMIZATION_REGISTER`.
1040
+ Individual scores can be added via
1041
+ :py:meth:`register_matching_optimization`.
1042
+
1043
+ See Also
1044
+ --------
1045
+ :py:meth:`register_matching_optimization`
1046
+ """
1047
+ if scoring_class not in MATCHING_OPTIMIZATION_REGISTER:
1048
+ raise NotImplementedError(
1049
+ f"Parameter score has to be one of "
1050
+ f"{', '.join(MATCHING_OPTIMIZATION_REGISTER.keys())}."
1051
+ )
1052
+ scoring_class = MATCHING_OPTIMIZATION_REGISTER.get(scoring_class, None)
1053
+
1054
+ if sampling_rate is None:
1055
+ sampling_rate = np.ones(1)
1056
+ sampling_rate = np.repeat(
1057
+ sampling_rate, target_coordinates.shape[0] // sampling_rate.size
1058
+ )
1059
+
1060
+ score = scoring_class(
1061
+ target_coordinates=target_coordinates,
1062
+ template_coordinates=template_coordinates,
1063
+ target_weights=target_weights,
1064
+ template_weights=template_weights,
1065
+ sampling_rate=sampling_rate,
1066
+ **scoring_class_parameters,
1067
+ )
1068
+
1069
+ initial_score = score(np.zeros(6))
1070
+
1071
+ mass_center_target = np.dot(target_coordinates, target_weights)
1072
+ mass_center_target /= target_weights.sum()
1073
+ mass_center_template = np.dot(template_coordinates, template_weights)
1074
+ mass_center_template /= template_weights.sum()
1075
+
1076
+ if translational_uncertainty is None:
1077
+ mass_center_difference = np.ceil(
1078
+ np.subtract(mass_center_target, mass_center_template)
1079
+ ).astype(int)
1080
+ target_range = np.ceil(
1081
+ np.divide(
1082
+ np.subtract(
1083
+ target_coordinates.max(axis=1), target_coordinates.min(axis=1)
1084
+ ),
1085
+ 2,
1086
+ )
1087
+ ).astype(int)
1088
+ translational_uncertainty = tuple(
1089
+ (center - start, center + start)
1090
+ for center, start in zip(mass_center_difference, target_range)
1091
+ )
1092
+ if rotational_uncertainty is None:
1093
+ rotational_uncertainty = tuple(
1094
+ (-90, 90) for _ in range(target_coordinates.shape[0])
1095
+ )
1096
+
1097
+ uncertainty = (*translational_uncertainty, *rotational_uncertainty)
1098
+ bounds = [bound if bound != (0, 0) else (-1e-9, 1e-9) for bound in uncertainty]
1099
+ linear_constraint = LinearConstraint(
1100
+ np.eye(len(bounds)), np.min(bounds, axis=1), np.max(bounds, axis=1)
1101
+ )
1102
+
1103
+ if local_optimization:
1104
+ result = basinhopping(
1105
+ x0=np.zeros(6),
1106
+ func=score,
1107
+ niter=maxiter,
1108
+ minimizer_kwargs={"method": "COBYLA", "constraints": linear_constraint},
1109
+ )
1110
+ else:
1111
+ result = differential_evolution(
1112
+ func=score,
1113
+ bounds=bounds,
1114
+ constraints=linear_constraint,
1115
+ maxiter=maxiter,
1116
+ )
1117
+
1118
+ print(f"Initial score: {-initial_score} - Refined score: {-result.fun}")
1119
+ if initial_score < result.fun:
1120
+ result.x = np.zeros_like(result.x)
1121
+ translation, rotation = result.x[:3], result.x[3:]
1122
+ rotation_matrix = euler_to_rotationmatrix(rotation)
1123
+ return translation, rotation_matrix, -result.fun