pytme 0.1.5__cp311-cp311-macosx_14_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. pytme-0.1.5.data/scripts/estimate_ram_usage.py +81 -0
  2. pytme-0.1.5.data/scripts/match_template.py +744 -0
  3. pytme-0.1.5.data/scripts/postprocess.py +279 -0
  4. pytme-0.1.5.data/scripts/preprocess.py +93 -0
  5. pytme-0.1.5.data/scripts/preprocessor_gui.py +729 -0
  6. pytme-0.1.5.dist-info/LICENSE +153 -0
  7. pytme-0.1.5.dist-info/METADATA +69 -0
  8. pytme-0.1.5.dist-info/RECORD +63 -0
  9. pytme-0.1.5.dist-info/WHEEL +5 -0
  10. pytme-0.1.5.dist-info/entry_points.txt +6 -0
  11. pytme-0.1.5.dist-info/top_level.txt +2 -0
  12. scripts/__init__.py +0 -0
  13. scripts/estimate_ram_usage.py +81 -0
  14. scripts/match_template.py +744 -0
  15. scripts/match_template_devel.py +788 -0
  16. scripts/postprocess.py +279 -0
  17. scripts/preprocess.py +93 -0
  18. scripts/preprocessor_gui.py +729 -0
  19. tme/__init__.py +6 -0
  20. tme/__version__.py +1 -0
  21. tme/analyzer.py +1144 -0
  22. tme/backends/__init__.py +134 -0
  23. tme/backends/cupy_backend.py +309 -0
  24. tme/backends/matching_backend.py +1154 -0
  25. tme/backends/npfftw_backend.py +763 -0
  26. tme/backends/pytorch_backend.py +526 -0
  27. tme/data/__init__.py +0 -0
  28. tme/data/c48n309.npy +0 -0
  29. tme/data/c48n527.npy +0 -0
  30. tme/data/c48n9.npy +0 -0
  31. tme/data/c48u1.npy +0 -0
  32. tme/data/c48u1153.npy +0 -0
  33. tme/data/c48u1201.npy +0 -0
  34. tme/data/c48u1641.npy +0 -0
  35. tme/data/c48u181.npy +0 -0
  36. tme/data/c48u2219.npy +0 -0
  37. tme/data/c48u27.npy +0 -0
  38. tme/data/c48u2947.npy +0 -0
  39. tme/data/c48u3733.npy +0 -0
  40. tme/data/c48u4749.npy +0 -0
  41. tme/data/c48u5879.npy +0 -0
  42. tme/data/c48u7111.npy +0 -0
  43. tme/data/c48u815.npy +0 -0
  44. tme/data/c48u83.npy +0 -0
  45. tme/data/c48u8649.npy +0 -0
  46. tme/data/c600v.npy +0 -0
  47. tme/data/c600vc.npy +0 -0
  48. tme/data/metadata.yaml +80 -0
  49. tme/data/quat_to_numpy.py +42 -0
  50. tme/data/scattering_factors.pickle +0 -0
  51. tme/density.py +2314 -0
  52. tme/extensions.cpython-311-darwin.so +0 -0
  53. tme/helpers.py +881 -0
  54. tme/matching_data.py +377 -0
  55. tme/matching_exhaustive.py +1553 -0
  56. tme/matching_memory.py +382 -0
  57. tme/matching_optimization.py +1123 -0
  58. tme/matching_utils.py +1180 -0
  59. tme/parser.py +429 -0
  60. tme/preprocessor.py +1291 -0
  61. tme/scoring.py +866 -0
  62. tme/structure.py +1428 -0
  63. tme/types.py +10 -0
tme/analyzer.py ADDED
@@ -0,0 +1,1144 @@
1
+ """ Implements classes to analyze score spaces from systematic fitting.
2
+
3
+ Copyright (c) 2023 European Molecular Biology Laboratory
4
+
5
+ Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
+ """
7
+ from time import sleep
8
+ from typing import Tuple, List, Dict
9
+ from abc import ABC, abstractmethod
10
+ from contextlib import nullcontext
11
+ from multiprocessing import RawValue, Manager, Lock
12
+
13
+ import numpy as np
14
+ from numpy.typing import NDArray
15
+ from scipy.stats import entropy
16
+ from sklearn.cluster import DBSCAN
17
+ from skimage.feature import peak_local_max
18
+
19
+ from .extensions import max_index_by_label, online_statistics
20
+ from .matching_utils import (
21
+ split_numpy_array_slices,
22
+ array_to_memmap,
23
+ generate_tempfile_name,
24
+ )
25
+
26
+ from .backends import backend
27
+
28
+
29
+ def filter_points_indices(coordinates: NDArray, min_distance: Tuple[int]):
30
+ if min_distance <= 0:
31
+ return backend.arange(coordinates.shape[0])
32
+ bucket_indices = backend.astype(backend.divide(coordinates, min_distance), int)
33
+ multiplier = backend.power(
34
+ backend.max(bucket_indices, axis=0) + 1, backend.arange(bucket_indices.shape[1])
35
+ )
36
+ backend.multiply(bucket_indices, multiplier, out=bucket_indices)
37
+ flattened_indices = backend.sum(bucket_indices, axis=1)
38
+ _, unique_indices = backend.unique(flattened_indices, return_index=True)
39
+ unique_indices = unique_indices[backend.argsort(unique_indices)]
40
+ return unique_indices
41
+
42
+
43
+ def filter_points(coordinates: NDArray, min_distance: Tuple[int]):
44
+ unique_indices = filter_points_indices(coordinates, min_distance)
45
+ coordinates = coordinates[unique_indices]
46
+ return coordinates
47
+
48
+
49
+ class PeakCaller(ABC):
50
+ """
51
+ Base class for peak calling algorithms.
52
+
53
+ Parameters
54
+ ----------
55
+ number_of_peaks : int, optional
56
+ Number of candidate peaks to consider.
57
+ min_distance : int, optional
58
+ Minimum distance between peaks.
59
+ **kwargs
60
+ Additional keyword arguments.
61
+
62
+ Raises
63
+ ------
64
+ ValueError
65
+ If number_of_peaks is less than or equal to zero.
66
+ If min_distances is less than zero.
67
+ """
68
+
69
+ def __init__(
70
+ self,
71
+ number_of_peaks: int = 1000,
72
+ min_distance: int = 1,
73
+ **kwargs,
74
+ ):
75
+ number_of_peaks, min_distance = int(number_of_peaks), int(min_distance)
76
+ if number_of_peaks <= 0:
77
+ raise ValueError(
78
+ f"number_of_peaks has to be larger than 0, got {number_of_peaks}"
79
+ )
80
+ if min_distance < 0:
81
+ raise ValueError(f"min_distance has to be non-negative, got {min_distance}")
82
+
83
+ self.peak_list = []
84
+ self.min_distance = min_distance
85
+ self.number_of_peaks = number_of_peaks
86
+
87
+ def __iter__(self):
88
+ """
89
+ Returns a generator to list objects containing translation,
90
+ rotation, score and details of a given candidate.
91
+ """
92
+ self.peak_list = [backend.to_cpu_array(arr) for arr in self.peak_list]
93
+ yield from self.peak_list
94
+
95
+ def __call__(
96
+ self, score_space: NDArray, rotation_matrix: NDArray, **kwargs
97
+ ) -> None:
98
+ """
99
+ Update the internal parameter store based on input array.
100
+
101
+ Parameters
102
+ ----------
103
+ score_space : NDArray
104
+ Array containing the score space.
105
+ rotation_matrix : NDArray
106
+ Rotation matrix used to obtain the score array.
107
+ **kwargs
108
+ Additional keyword arguments.
109
+ """
110
+ peak_positions, peak_details = self.call_peaks(
111
+ score_space=score_space, rotation_matrix=rotation_matrix, **kwargs
112
+ )
113
+
114
+ if peak_positions is None:
115
+ return None
116
+
117
+ peak_positions = backend.astype(peak_positions, int)
118
+ if peak_positions.shape[0] == 0:
119
+ return None
120
+
121
+ if peak_details is None:
122
+ peak_details = backend.to_backend_array([-1] * peak_positions.shape[0])
123
+
124
+ rotations = backend.repeat(
125
+ rotation_matrix.reshape(1, *rotation_matrix.shape),
126
+ peak_positions.shape[0],
127
+ axis=0,
128
+ )
129
+
130
+ self._update(
131
+ peak_positions=peak_positions,
132
+ peak_details=peak_details,
133
+ peak_scores=score_space[tuple(peak_positions.T)],
134
+ rotations=rotations,
135
+ **kwargs,
136
+ )
137
+
138
+ @abstractmethod
139
+ def call_peaks(
140
+ self, score_space: NDArray, rotation_matrix: NDArray, **kwargs
141
+ ) -> Tuple[NDArray, NDArray]:
142
+ """
143
+ Call peaks in the score space.
144
+
145
+ This function is not intended to be called directly, but should rather be
146
+ defined by classes inheriting from :py:class:`PeakCaller` to execute a given
147
+ peak calling algorithm.
148
+
149
+ Parameters
150
+ ----------
151
+ score_space : NDArray
152
+ Data array of scores.
153
+ minimum_score : float
154
+ Minimum score value to consider.
155
+ min_distance : float
156
+ Minimum distance between maxima.
157
+
158
+ Returns
159
+ -------
160
+ NDArray
161
+ Array of peak coordiantes.
162
+ NDArray
163
+ Array of peak details.
164
+ """
165
+
166
+ @classmethod
167
+ def merge(cls, candidates=List[List], **kwargs) -> NDArray:
168
+ """
169
+ Merge multiple instances of :py:class:`PeakCaller`.
170
+
171
+ Parameters
172
+ ----------
173
+ candidate_fits : list of lists
174
+ Obtained by invoking list on the generator returned by __iter__.
175
+ param_stores : list of tuples, optional
176
+ List of parameter stores. Each tuple contains candidate data and number
177
+ of candidates.
178
+ **kwargs
179
+ Additional keyword arguments.
180
+
181
+ Returns
182
+ -------
183
+ NDArray
184
+ NDArray of candidates.
185
+ """
186
+ base = cls(**kwargs)
187
+ for candidate in candidates:
188
+ if len(candidate) == 0:
189
+ continue
190
+ peak_positions, rotations, peak_scores, peak_details = candidate
191
+ kwargs["translation_offset"] = backend.zeros(peak_positions.shape[1])
192
+ base._update(
193
+ peak_positions=backend.to_backend_array(peak_positions),
194
+ peak_details=backend.to_backend_array(peak_details),
195
+ peak_scores=backend.to_backend_array(peak_scores),
196
+ rotations=backend.to_backend_array(rotations),
197
+ **kwargs,
198
+ )
199
+ return tuple(base)
200
+
201
+ def _update(
202
+ self,
203
+ peak_positions: NDArray,
204
+ peak_details: NDArray,
205
+ peak_scores: NDArray,
206
+ rotations: NDArray,
207
+ **kwargs,
208
+ ) -> None:
209
+ """
210
+ Update internal parameter store.
211
+
212
+ Parameters
213
+ ----------
214
+ peak_positions : NDArray
215
+ Position of peaks with shape n x d where n is the number of
216
+ peaks and d the dimension.
217
+ peak_scores : NDArray
218
+ Corresponding score obtained at each peak.
219
+ translation_offset : NDArray, optional
220
+ Offset of the score_space, occurs e.g. when template matching
221
+ to parts of a tomogram.
222
+ rotations: NDArray
223
+ Rotations used to obtain the score space from which
224
+ the candidate stem.
225
+ """
226
+ translation_offset = kwargs.get(
227
+ "translation_offset", backend.zeros(peak_positions.shape[1])
228
+ )
229
+ translation_offset = backend.astype(translation_offset, peak_positions.dtype)
230
+
231
+ backend.add(peak_positions, translation_offset, out=peak_positions)
232
+ if not len(self.peak_list):
233
+ self.peak_list = [peak_positions, rotations, peak_scores, peak_details]
234
+ peak_scores, peak_details, dim = (), (), peak_positions.shape[1]
235
+ rotations = backend.zeros((0, dim, dim), rotations.dtype)
236
+ peak_positions = backend.zeros((0, dim), peak_positions.dtype)
237
+
238
+ peaks = backend.concatenate((self.peak_list[0], peak_positions))
239
+ rotations = backend.concatenate((self.peak_list[1], rotations))
240
+ peak_scores = backend.concatenate((self.peak_list[2], peak_scores))
241
+ peak_details = backend.concatenate((self.peak_list[3], peak_details))
242
+
243
+ top_n = min(backend.size(peak_scores), self.number_of_peaks)
244
+ top_scores, *_ = backend.topk_indices(peak_scores, top_n)
245
+
246
+ final_order = top_scores[
247
+ filter_points_indices(peaks[top_scores], self.min_distance)
248
+ ]
249
+
250
+ self.peak_list[0] = peaks[final_order,]
251
+ self.peak_list[1] = rotations[final_order,]
252
+ self.peak_list[2] = peak_scores[final_order]
253
+ self.peak_list[3] = peak_details[final_order]
254
+
255
+
256
+ class PeakCallerSort(PeakCaller):
257
+ """
258
+ A :py:class:`PeakCaller` subclass that first selects ``number_of_peaks``
259
+ highest scores and subsequently filters local maxima to suffice a distance
260
+ from one another of ``min_distance``.
261
+
262
+ """
263
+
264
+ def call_peaks(
265
+ self, score_space: NDArray, rotation_matrix: NDArray, **kwargs
266
+ ) -> Tuple[NDArray, NDArray]:
267
+ """
268
+ Call peaks in the score space.
269
+
270
+ Parameters
271
+ ----------
272
+ score_space : NDArray
273
+ Data array of scores.
274
+ minimum_score : float
275
+ Minimum score value to consider.
276
+ min_distance : float
277
+ Minimum distance between maxima.
278
+
279
+ Returns
280
+ -------
281
+ NDArray
282
+ Array of peak coordiantes.
283
+ NDArray
284
+ Array of peak details.
285
+ """
286
+ flat_score_space = score_space.reshape(-1)
287
+ k = min(self.number_of_peaks, backend.size(flat_score_space))
288
+
289
+ top_k_indices, *_ = backend.topk_indices(flat_score_space, k)
290
+
291
+ coordinates = backend.unravel_index(top_k_indices, score_space.shape)
292
+ coordinates = backend.transpose(backend.stack(coordinates))
293
+
294
+ peaks = filter_points(coordinates, self.min_distance)
295
+ return peaks, None
296
+
297
+
298
+ class PeakCallerMaximumFilter(PeakCaller):
299
+ """
300
+ Find local maxima by applying a maximum filter and enforcing a distance
301
+ constraint subseqquently. This is similar to the strategy implemented in
302
+ skimage.feature.peak_local_max.
303
+ """
304
+
305
+ def call_peaks(
306
+ self, score_space: NDArray, rotation_matrix: NDArray, **kwargs
307
+ ) -> Tuple[NDArray, NDArray]:
308
+ """
309
+ Call peaks in the score space.
310
+
311
+ Parameters
312
+ ----------
313
+ score_space : NDArray
314
+ Data array of scores.
315
+ minimum_score : float
316
+ Minimum score value to consider.
317
+ min_distance : float
318
+ Minimum distance between maxima.
319
+
320
+ Returns
321
+ -------
322
+ NDArray
323
+ Array of peak coordiantes.
324
+ NDArray
325
+ Array of peak details.
326
+ """
327
+ peaks = backend.max_filter_coordinates(score_space, self.min_distance)
328
+
329
+ input_candidates = min(
330
+ self.number_of_peaks, peaks.shape[0] - 1, backend.size(score_space) - 1
331
+ )
332
+ top_indices = backend.topk_indices(
333
+ score_space[tuple(peaks.T)], input_candidates
334
+ )
335
+ peaks = peaks[top_indices]
336
+
337
+ return peaks, None
338
+
339
+
340
+ class PeakCallerFast(PeakCaller):
341
+ """
342
+ Subdivides the score space into squares with edge length ``min_distance``
343
+ and determiens maximum value for each. In a second pass, all local maxima
344
+ that are not the local maxima in a ``min_distance`` square centered around them
345
+ are removed.
346
+
347
+ """
348
+
349
+ def call_peaks(
350
+ self, score_space: NDArray, rotation_matrix: NDArray, **kwargs
351
+ ) -> Tuple[NDArray, NDArray]:
352
+ """
353
+ Call peaks in the score space.
354
+
355
+ Parameters
356
+ ----------
357
+ score_space : NDArray
358
+ Data array of scores.
359
+ minimum_score : float
360
+ Minimum score value to consider.
361
+ min_distance : float
362
+ Minimum distance between maxima.
363
+
364
+ Returns
365
+ -------
366
+ NDArray
367
+ Array of peak coordiantes.
368
+ NDArray
369
+ Array of peak details.
370
+ """
371
+ splits = {
372
+ axis: score_space.shape[axis] // self.min_distance
373
+ for axis in range(score_space.ndim)
374
+ }
375
+ slices = split_numpy_array_slices(score_space.shape, splits)
376
+
377
+ coordinates = backend.to_backend_array(
378
+ [
379
+ backend.unravel_index(
380
+ backend.argmax(score_space[subvol]), score_space[subvol].shape
381
+ )
382
+ for subvol in slices
383
+ ]
384
+ )
385
+ offset = backend.to_backend_array(
386
+ [tuple(x.start for x in subvol) for subvol in slices]
387
+ )
388
+ backend.add(coordinates, offset, out=coordinates)
389
+ coordinates = coordinates[
390
+ backend.flip(backend.argsort(score_space[tuple(coordinates.T)]), (0,))
391
+ ]
392
+
393
+ if coordinates.shape[0] == 0:
394
+ return None
395
+
396
+ peaks = filter_points(coordinates, self.min_distance)
397
+
398
+ starts = backend.maximum(peaks - self.min_distance, 0)
399
+ stops = backend.minimum(peaks + self.min_distance, score_space.shape)
400
+ slices_list = [
401
+ tuple(slice(*coord) for coord in zip(start_row, stop_row))
402
+ for start_row, stop_row in zip(starts, stops)
403
+ ]
404
+
405
+ scores = score_space[tuple(peaks.T)]
406
+ keep = [
407
+ score >= backend.max(score_space[subvol])
408
+ for subvol, score in zip(slices_list, scores)
409
+ ]
410
+ peaks = peaks[keep,]
411
+
412
+ if len(peaks) == 0:
413
+ return peaks, None
414
+
415
+ return peaks, None
416
+
417
+
418
+ class PeakCallerRecursiveMasking(PeakCaller):
419
+ """
420
+ Identifies peaks iteratively by selecting the top score and masking
421
+ a region around it.
422
+ """
423
+
424
+ def call_peaks(
425
+ self, score_space: NDArray, rotation_matrix: NDArray, **kwargs
426
+ ) -> Tuple[NDArray, NDArray]:
427
+ """
428
+ Call peaks in the score space.
429
+
430
+ Parameters
431
+ ----------
432
+ score_space : NDArray
433
+ Data array of scores.
434
+ minimum_score : float
435
+ Minimum score value to consider.
436
+ min_distance : float
437
+ Minimum distance between maxima.
438
+
439
+ Returns
440
+ -------
441
+ NDArray
442
+ Array of peak coordiantes.
443
+ NDArray
444
+ Array of peak details.
445
+ """
446
+ score_box = tuple(self.min_distance for _ in range(score_space.ndim))
447
+ coordinates = []
448
+ while True:
449
+ backend.argmax(score_space)
450
+ max_coord = backend.unravel_index(
451
+ indices=backend.argmax(score_space), shape=score_space.shape
452
+ )
453
+ coordinates.append(max_coord)
454
+ start = backend.maximum(backend.subtract(max_coord, score_box), 0)
455
+ stop = backend.minimum(backend.add(max_coord, score_box), score_space.shape)
456
+ start, stop = backend.astype(start, int), backend.astype(stop, int)
457
+ coords = tuple(slice(*pos) for pos in zip(start, stop))
458
+ score_space[coords] = 0
459
+ if len(coordinates) >= self.number_of_peaks:
460
+ break
461
+ peaks = backend.to_backend_array(coordinates)
462
+ return peaks, None
463
+
464
+
465
+ class PeakCallerScipy(PeakCaller):
466
+ """
467
+ Peak calling using skimage.feature.peak_local_max to compute local maxima.
468
+ """
469
+
470
+ def call_peaks(
471
+ self, score_space: NDArray, rotation_matrix: NDArray, **kwargs
472
+ ) -> Tuple[NDArray, NDArray]:
473
+ """
474
+ Call peaks in the score space.
475
+
476
+ Parameters
477
+ ----------
478
+ score_space : NDArray
479
+ Data array of scores.
480
+ minimum_score : float
481
+ Minimum score value to consider.
482
+ min_distance : float
483
+ Minimum distance between maxima.
484
+
485
+ Returns
486
+ -------
487
+ NDArray
488
+ Array of peak coordiantes.
489
+ NDArray
490
+ Array of peak details.
491
+ """
492
+ peaks = peak_local_max(
493
+ score_space,
494
+ num_peaks=self.number_of_peaks,
495
+ min_distance=self.min_distance,
496
+ )
497
+ return peaks, None
498
+
499
+
500
+ class PeakClustering(PeakCallerSort):
501
+ """
502
+ Use DBScan clustering to identify more reliable peaks.
503
+ """
504
+
505
+ def __init__(
506
+ self,
507
+ number_of_peaks: int = 1000,
508
+ **kwargs,
509
+ ):
510
+ kwargs["min_distance"] = 0
511
+ super().__init__(number_of_peaks=number_of_peaks, **kwargs)
512
+
513
+ @classmethod
514
+ def merge(cls, **kwargs) -> NDArray:
515
+ """
516
+ Merge multiple instances of Analyzer.
517
+
518
+ Parameters
519
+ ----------
520
+ **kwargs
521
+ Additional keyword arguments passed to :py:meth:`PeakCaller.merge`.
522
+
523
+ Returns
524
+ -------
525
+ NDArray
526
+ NDArray of candidates.
527
+ """
528
+ peaks, rotations, scores, details = super().merge(**kwargs)
529
+
530
+ scores = np.array([candidate[2] for candidate in peaks])
531
+ clusters = DBSCAN(eps=np.finfo(float).eps, min_samples=8).fit(peaks)
532
+ labels = clusters.labels_.astype(int)
533
+
534
+ label_max = max_index_by_label(labels=labels, scores=scores)
535
+ if -1 in label_max:
536
+ _ = label_max.pop(-1)
537
+ representatives = set(label_max.values())
538
+
539
+ keep = np.array(
540
+ [
541
+ True if index in representatives else False
542
+ for index in range(peaks.shape[0])
543
+ ]
544
+ )
545
+ peaks = peaks[keep,]
546
+ rotations = rotations[keep,]
547
+ scores = scores[keep]
548
+ details = details[keep]
549
+
550
+ return peaks, rotations, scores, details
551
+
552
+
553
+ class ScoreStatistics(PeakCallerFast):
554
+ """
555
+ Compute basic statistics on score spaces with respect to a reference
556
+ score or value.
557
+
558
+ This class is used to evaluate a blurring or scoring method when the correct fit
559
+ is known. It is thread-safe and is designed to be shared among multiple processes
560
+ with write permissions to the internal parameters.
561
+
562
+ After instantiation, the class's functionality can be accessed through the
563
+ `__call__` method.
564
+
565
+ Parameters
566
+ ----------
567
+ reference_position : int, optional
568
+ Index of the correct fit in the array passed to call. Defaults to None.
569
+ min_distance : float, optional
570
+ Minimum distance for local maxima. Defaults to None.
571
+ reference_fit : float, optional
572
+ Score of the correct fit. If set, `reference_position` will be ignored.
573
+ Defaults to None.
574
+ number_of_peaks : int, optional
575
+ Number of candidate fits to consider. Defaults to 1.
576
+ """
577
+
578
+ def __init__(
579
+ self,
580
+ reference_position: Tuple[int] = None,
581
+ min_distance: float = 10,
582
+ reference_fit: float = None,
583
+ number_of_peaks: int = 1,
584
+ ):
585
+ super().__init__(number_of_peaks=number_of_peaks, min_distance=min_distance)
586
+ self.lock = Lock()
587
+
588
+ self.n = RawValue("Q", 0)
589
+ self.rmean = RawValue("d", 0)
590
+ self.ssqd = RawValue("d", 0)
591
+ self.nbetter_or_equal = RawValue("Q", 0)
592
+ self.maximum_value = RawValue("f", 0)
593
+ self.minimum_value = RawValue("f", 2**32)
594
+ self.shannon_entropy = Manager().list()
595
+ self.candidate_fits = Manager().list()
596
+ self.rotation_names = Manager().list()
597
+ self.reference_fit = RawValue("f", 0)
598
+ self.has_reference = RawValue("i", 0)
599
+
600
+ self.reference_position = reference_position
601
+ if reference_fit is not None:
602
+ self.reference_fit.value = reference_fit
603
+ self.has_reference.value = 1
604
+
605
+ def __call__(
606
+ self, score_space: NDArray, rotation_matrix: NDArray, **kwargs
607
+ ) -> None:
608
+ """
609
+ Processes the input array and rotation matrix.
610
+
611
+ Parameters
612
+ ----------
613
+ arr : NDArray
614
+ Input data array.
615
+ rotation_matrix : NDArray
616
+ Rotation matrix for processing.
617
+ """
618
+ self.set_reference(score_space, rotation_matrix)
619
+
620
+ while not self.has_reference.value:
621
+ print("Stalling processes until reference_fit has been set.")
622
+ sleep(0.5)
623
+
624
+ name = "_".join([str(value) for value in rotation_matrix.ravel()])
625
+ n, rmean, ssqd, nbetter_or_equal, max_value, min_value = online_statistics(
626
+ score_space, 0, 0.0, 0.0, self.reference_fit.value
627
+ )
628
+
629
+ freq, _ = np.histogram(score_space, bins=100)
630
+ shannon_entropy = entropy(freq / score_space.size)
631
+
632
+ peaks, _ = super().call_peaks(
633
+ score_space=score_space, rotation_matrix=rotation_matrix, **kwargs
634
+ )
635
+ scores = score_space[tuple(peaks.T)]
636
+ rotations = np.repeat(
637
+ rotation_matrix.reshape(1, *rotation_matrix.shape),
638
+ peaks.shape[0],
639
+ axis=0,
640
+ )
641
+ distances = np.linalg.norm(peaks - self.reference_position[None, :], axis=1)
642
+
643
+ self._update(
644
+ peak_positions=peaks,
645
+ rotations=rotations,
646
+ peak_scores=scores,
647
+ peak_details=distances,
648
+ n=n,
649
+ rmean=rmean,
650
+ ssqd=ssqd,
651
+ nbetter_or_equal=nbetter_or_equal,
652
+ max_value=max_value,
653
+ min_value=min_value,
654
+ entropy=shannon_entropy,
655
+ name=name,
656
+ )
657
+
658
+ def __iter__(self):
659
+ param_store = (
660
+ self.peak_list[0],
661
+ self.peak_list[1],
662
+ self.peak_list[2],
663
+ self.peak_list[3],
664
+ self.n.value,
665
+ self.rmean.value,
666
+ self.ssqd.value,
667
+ self.nbetter_or_equal.value,
668
+ self.maximum_value.value,
669
+ self.minimum_value.value,
670
+ list(self.shannon_entropy),
671
+ list(self.rotation_names),
672
+ self.reference_fit.value,
673
+ )
674
+ yield from param_store
675
+
676
+ def _update(
677
+ self,
678
+ n: int,
679
+ rmean: float,
680
+ ssqd: float,
681
+ nbetter_or_equal: int,
682
+ max_value: float,
683
+ min_value: float,
684
+ entropy: float,
685
+ name: str,
686
+ **kwargs,
687
+ ) -> None:
688
+ """
689
+ Updates the internal statistics of the analyzer.
690
+
691
+ Parameters
692
+ ----------
693
+ n : int
694
+ Sample size.
695
+ rmean : float
696
+ Running mean.
697
+ ssqd : float
698
+ Sum of squared differences.
699
+ nbetter_or_equal : int
700
+ Number of values better or equal to reference.
701
+ max_value : float
702
+ Maximum value.
703
+ min_value : float
704
+ Minimum value.
705
+ entropy : float
706
+ Shannon entropy.
707
+ candidates : list
708
+ List of candidate fits.
709
+ name : str
710
+ Name or label for the data.
711
+ kwargs : dict
712
+ Keyword arguments passed to PeakCaller._update.
713
+ """
714
+ with self.lock:
715
+ super()._update(**kwargs)
716
+
717
+ n_total = self.n.value + n
718
+ delta = rmean - self.rmean.value
719
+ delta2 = delta * delta
720
+ self.rmean.value += delta * n / n_total
721
+ self.ssqd.value += ssqd + delta2 * (n * self.n.value) / n_total
722
+ self.n.value = n_total
723
+ self.nbetter_or_equal.value += nbetter_or_equal
724
+ self.minimum_value.value = min(self.minimum_value.value, min_value)
725
+ self.maximum_value.value = max(self.maximum_value.value, max_value)
726
+ self.shannon_entropy.append(entropy)
727
+ self.rotation_names.append(name)
728
+
729
+ @classmethod
730
+ def merge(cls, param_stores: List[Tuple]) -> Tuple:
731
+ """
732
+ Merges multiple instances of :py:class`ScoreStatistics`.
733
+
734
+ Parameters
735
+ ----------
736
+ param_stores : list of tuple
737
+ Internal parameter store. Obtained by running `tuple(instance)`.
738
+ Defaults to a list with two empty tuples.
739
+
740
+ Returns
741
+ -------
742
+ tuple
743
+ Contains the reference fit, the z-transform of the reference fit,
744
+ number of scores, and various other statistics.
745
+ """
746
+ base = cls(reference_position=np.zeros(3, int))
747
+ for param_store in param_stores:
748
+ base._update(
749
+ peak_positions=param_store[0],
750
+ rotations=param_store[1],
751
+ peak_scores=param_store[2],
752
+ peak_details=param_store[3],
753
+ n=param_store[4],
754
+ rmean=param_store[5],
755
+ ssqd=param_store[6],
756
+ nbetter_or_equal=param_store[7],
757
+ max_value=param_store[8],
758
+ min_value=param_store[9],
759
+ entropy=param_store[10],
760
+ name=param_store[11],
761
+ )
762
+ base.reference_fit.value = param_store[12]
763
+ return tuple(base)
764
+
765
+ def set_reference(self, score_space: NDArray, rotation_matrix: NDArray) -> None:
766
+ """
767
+ Sets the reference for the analyzer based on the input array
768
+ and rotation matrix.
769
+
770
+ Parameters
771
+ ----------
772
+ score_space : NDArray
773
+ Input data array.
774
+ rotation_matrix : NDArray
775
+ Rotation matrix for setting reference.
776
+ """
777
+ is_ref = np.allclose(
778
+ rotation_matrix,
779
+ np.eye(rotation_matrix.shape[0], dtype=rotation_matrix.dtype),
780
+ )
781
+ if not is_ref:
782
+ return None
783
+
784
+ reference_position = self.reference_position
785
+ if reference_position is None:
786
+ reference_position = np.divide(score_space.shape, 2).astype(int)
787
+ self.reference_position = reference_position
788
+ self.reference_fit.value = score_space[tuple(reference_position)]
789
+ self.has_reference.value = 1
790
+
791
+
792
+ class MaxScoreOverRotations:
793
+ """
794
+ Obtain the maximum translation score over various rotations.
795
+
796
+ Attributes
797
+ ----------
798
+ score_space : NDArray
799
+ The score space for the observed rotations.
800
+ rotations : NDArray
801
+ The rotation identifiers for each score.
802
+ translation_offset : NDArray, optional
803
+ The offset applied during translation.
804
+ observed_rotations : int
805
+ Count of observed rotations.
806
+ use_memmap : bool, optional
807
+ Whether to offload internal data arrays to disk
808
+ thread_safe: bool, optional
809
+ Whether access to internal data arrays should be thread safe
810
+ """
811
+
812
+ def __init__(
813
+ self,
814
+ score_space_shape: Tuple[int],
815
+ score_space_dtype: type,
816
+ translation_offset: NDArray = None,
817
+ shared_memory_handler: object = None,
818
+ rotation_space_dtype: type = int,
819
+ use_memmap: bool = False,
820
+ thread_safe: bool = True,
821
+ **kwargs,
822
+ ):
823
+ score_space_shape = tuple(int(x) for x in score_space_shape)
824
+ self.score_space = backend.arr_to_sharedarr(
825
+ backend.zeros(shape=score_space_shape, dtype=score_space_dtype),
826
+ shared_memory_handler=shared_memory_handler,
827
+ )
828
+ self.rotations = backend.arr_to_sharedarr(
829
+ backend.full(score_space_shape, dtype=rotation_space_dtype, fill_value=-1),
830
+ shared_memory_handler,
831
+ )
832
+ if translation_offset is None:
833
+ translation_offset = backend.zeros(len(score_space_shape))
834
+
835
+ self.translation_offset = backend.astype(translation_offset, int)
836
+ self.score_space_shape = score_space_shape
837
+ self.rotation_space_dtype = rotation_space_dtype
838
+ self.score_space_dtype = score_space_dtype
839
+
840
+ self.use_memmap = use_memmap
841
+ self.lock = Manager().Lock() if thread_safe else nullcontext()
842
+ self.observed_rotations = Manager().dict() if thread_safe else {}
843
+
844
+ def __iter__(self):
845
+ internal_scores = backend.sharedarr_to_arr(
846
+ shape=self.score_space_shape,
847
+ dtype=self.score_space_dtype,
848
+ shm=self.score_space,
849
+ )
850
+ internal_rotations = backend.sharedarr_to_arr(
851
+ shape=self.score_space_shape,
852
+ dtype=self.rotation_space_dtype,
853
+ shm=self.rotations,
854
+ )
855
+
856
+ internal_scores = backend.to_numpy_array(internal_scores)
857
+ internal_rotations = backend.to_numpy_array(internal_rotations)
858
+ if self.use_memmap:
859
+ internal_scores_filename = array_to_memmap(internal_scores)
860
+ internal_rotations_filename = array_to_memmap(internal_rotations)
861
+ internal_scores = np.memmap(
862
+ internal_scores_filename,
863
+ mode="r",
864
+ dtype=internal_scores.dtype,
865
+ shape=internal_scores.shape,
866
+ )
867
+ internal_rotations = np.memmap(
868
+ internal_rotations_filename,
869
+ mode="r",
870
+ dtype=internal_rotations.dtype,
871
+ shape=internal_rotations.shape,
872
+ )
873
+ else:
874
+ # Avoid invalidation by shared memory handler with copy
875
+ internal_scores = internal_scores.copy()
876
+ internal_rotations = internal_rotations.copy()
877
+
878
+ param_store = (
879
+ internal_scores,
880
+ backend.to_numpy_array(self.translation_offset),
881
+ internal_rotations,
882
+ dict(self.observed_rotations),
883
+ )
884
+ yield from param_store
885
+
886
+ def __call__(
887
+ self, score_space: NDArray, rotation_matrix: NDArray, **kwargs
888
+ ) -> None:
889
+ """
890
+ Update internal parameter store based on `score_space`.
891
+
892
+ Parameters
893
+ ----------
894
+ score_space : ndarray
895
+ Numpy array containing the score space.
896
+ rotation_matrix : ndarray
897
+ Square matrix describing the current rotation.
898
+ **kwargs
899
+ Arbitrary keyword arguments.
900
+ """
901
+ with self.lock:
902
+ rotation = backend.tobytes(rotation_matrix)
903
+ if rotation not in self.observed_rotations:
904
+ self.observed_rotations[rotation] = len(self.observed_rotations)
905
+ rotation_index = self.observed_rotations[rotation]
906
+ internal_scores = backend.sharedarr_to_arr(
907
+ shape=self.score_space_shape,
908
+ dtype=self.score_space_dtype,
909
+ shm=self.score_space,
910
+ )
911
+ internal_rotations = backend.sharedarr_to_arr(
912
+ shape=self.score_space_shape,
913
+ dtype=self.rotation_space_dtype,
914
+ shm=self.rotations,
915
+ )
916
+ indices = score_space > internal_scores
917
+ internal_scores[indices] = score_space[indices]
918
+ internal_rotations[indices] = rotation_index
919
+
920
+ @classmethod
921
+ def merge(cls, param_stores=List[Tuple], **kwargs) -> Tuple[NDArray]:
922
+ """
923
+ Merges multiple instances of :py:class:`MaxScoreOverRotations`.
924
+
925
+ Parameters
926
+ ----------
927
+ param_stores : list of tuples, optional
928
+ Internal parameter store. Obtained by running `tuple(instance)`.
929
+ **kwargs
930
+ Arbitrary keyword arguments.
931
+
932
+ Returns
933
+ -------
934
+ tuple
935
+ Max aggregated translation scores, corresponding rotations,
936
+ translation offset that is zero by default and mapping between
937
+ rotation index and rotation matrices.
938
+ """
939
+ if len(param_stores) == 1:
940
+ return param_stores[0]
941
+
942
+ new_rotation_mapping, base_max = {}, None
943
+ scores_out_dtype, rotations_out_dtype = None, None
944
+ for i in range(len(param_stores)):
945
+ if param_stores[i] is None:
946
+ continue
947
+ score_space, offset, rotations, rotation_mapping = param_stores[i]
948
+ if base_max is None:
949
+ base_max = np.zeros(score_space.ndim, int)
950
+ scores_out_dtype = score_space.dtype
951
+ rotations_out_dtype = rotations.dtype
952
+ np.maximum(base_max, np.add(offset, score_space.shape), out=base_max)
953
+
954
+ for key, value in rotation_mapping.items():
955
+ if key not in new_rotation_mapping:
956
+ new_rotation_mapping[key] = len(new_rotation_mapping)
957
+
958
+ if base_max is None:
959
+ return None
960
+
961
+ base_max = tuple(int(x) for x in base_max)
962
+ use_memmap = kwargs.get("use_memmap", False)
963
+ if use_memmap:
964
+ scores_out_filename = generate_tempfile_name()
965
+ rotations_out_filename = generate_tempfile_name()
966
+
967
+ scores_out = np.memmap(
968
+ scores_out_filename, mode="w+", shape=base_max, dtype=scores_out_dtype
969
+ )
970
+ rotations_out = np.memmap(
971
+ rotations_out_filename,
972
+ mode="w+",
973
+ shape=base_max,
974
+ dtype=rotations_out_dtype,
975
+ )
976
+ else:
977
+ scores_out = np.zeros(base_max, dtype=scores_out_dtype)
978
+ rotations_out = np.full(base_max, fill_value=-1, dtype=rotations_out_dtype)
979
+
980
+ for i in range(len(param_stores)):
981
+ if param_stores[i] is None:
982
+ continue
983
+
984
+ if use_memmap:
985
+ scores_out = np.memmap(
986
+ scores_out_filename,
987
+ mode="r+",
988
+ shape=base_max,
989
+ dtype=scores_out_dtype,
990
+ )
991
+ rotations_out = np.memmap(
992
+ rotations_out_filename,
993
+ mode="r+",
994
+ shape=base_max,
995
+ dtype=rotations_out_dtype,
996
+ )
997
+ score_space, offset, rotations, rotation_mapping = param_stores[i]
998
+ stops = np.add(offset, score_space.shape).astype(int)
999
+ indices = tuple(slice(*pos) for pos in zip(offset, stops))
1000
+
1001
+ indices_update = score_space > scores_out[indices]
1002
+ scores_out[indices][indices_update] = score_space[indices_update]
1003
+
1004
+ lookup_table = np.arange(
1005
+ len(rotation_mapping) + 1, dtype=rotations_out.dtype
1006
+ )
1007
+ for key, value in rotation_mapping.items():
1008
+ lookup_table[value] = new_rotation_mapping[key]
1009
+
1010
+ updated_rotations = rotations[indices_update]
1011
+ if len(updated_rotations):
1012
+ rotations_out[indices][indices_update] = lookup_table[updated_rotations]
1013
+
1014
+ if use_memmap:
1015
+ score_space._mmap.close()
1016
+ rotations._mmap.close()
1017
+ scores_out.flush()
1018
+ rotations_out.flush()
1019
+ scores_out, rotations_out = None, None
1020
+
1021
+ param_stores[i] = None
1022
+ score_space, rotations = None, None
1023
+
1024
+ if use_memmap:
1025
+ scores_out = np.memmap(
1026
+ scores_out_filename, mode="r", shape=base_max, dtype=scores_out_dtype
1027
+ )
1028
+ rotations_out = np.memmap(
1029
+ rotations_out_filename,
1030
+ mode="r",
1031
+ shape=base_max,
1032
+ dtype=rotations_out_dtype,
1033
+ )
1034
+ return (
1035
+ scores_out,
1036
+ np.zeros(scores_out.ndim, dtype=int),
1037
+ rotations_out,
1038
+ new_rotation_mapping,
1039
+ )
1040
+
1041
+
1042
+ class MemmapHandler:
1043
+ """
1044
+ Create numpy memmap objects to write score spaces to.
1045
+
1046
+ This is useful in cases where not the entire score space is sampled at once.
1047
+
1048
+ Parameters
1049
+ ----------
1050
+ path_translation : dict
1051
+ Translation between rotation matrix and memmap file path.
1052
+ shape : tuple of int
1053
+ Shape of the memmap array.
1054
+ dtype : type
1055
+ Numpy dtype of the memmap array.
1056
+ mode : str, optional
1057
+ Mode to open the memmap array with.
1058
+ indices : tuple of slice, optional
1059
+ Slices specifying which parts of the memmap array will be updated by `__call__`.
1060
+ **kwargs
1061
+ Arbitrary keyword arguments.
1062
+ """
1063
+
1064
+ def __init__(
1065
+ self,
1066
+ path_translation: Dict,
1067
+ shape: Tuple[int],
1068
+ dtype: type,
1069
+ mode: str = "r+",
1070
+ indices: Tuple[slice] = None,
1071
+ **kwargs,
1072
+ ):
1073
+ filepaths = list(path_translation.values())
1074
+ _ = [
1075
+ np.memmap(filepath, mode=mode, shape=shape, dtype=dtype)
1076
+ for filepath in filepaths
1077
+ ]
1078
+ self._path_translation = path_translation
1079
+ self.lock = Lock()
1080
+ self.shape = shape
1081
+ self.dtype = dtype
1082
+ self._indices = indices
1083
+
1084
+ def __call__(
1085
+ self, score_space: NDArray, rotation_matrix: NDArray, **kwargs
1086
+ ) -> None:
1087
+ """
1088
+ Write `score_space` to memmap object on disk.
1089
+
1090
+ Parameters
1091
+ ----------
1092
+ score_space : ndarray
1093
+ Numpy array containing the score space.
1094
+ rotation_matrix : ndarray
1095
+ Square matrix describing the current rotation.
1096
+ **kwargs
1097
+ Arbitrary keyword arguments.
1098
+ """
1099
+ current_object = self._rotation_matrix_to_filepath(rotation_matrix)
1100
+
1101
+ array = np.memmap(current_object, mode="r+", shape=self.shape, dtype=self.dtype)
1102
+ # Does not really need a lock because processes operate on different rotations
1103
+ with self.lock:
1104
+ array[self._indices] += score_space
1105
+ array.flush()
1106
+
1107
+ def __iter__(self):
1108
+ yield None
1109
+
1110
+ @classmethod
1111
+ def merge(cls, *args, **kwargs) -> None:
1112
+ """
1113
+ Placeholder merge method. Does nothing.
1114
+ """
1115
+ return None
1116
+
1117
+ def update_indices(self, indices: Tuple[slice]) -> None:
1118
+ """
1119
+ Change which parts of the memmap array will be updated.
1120
+
1121
+ Parameters
1122
+ ----------
1123
+ indices : tuple of slice
1124
+ Slices specifying which parts of the memmap array will be
1125
+ updated by `__call__`.
1126
+ """
1127
+ self._indices = indices
1128
+
1129
+ def _rotation_matrix_to_filepath(self, rotation_matrix: NDArray) -> str:
1130
+ """
1131
+ Create string representation of `rotation_matrix`.
1132
+
1133
+ Parameters
1134
+ ----------
1135
+ rotation_matrix : ndarray
1136
+ Rotation matrix to convert to string.
1137
+
1138
+ Returns
1139
+ -------
1140
+ str
1141
+ String representation of the rotation matrix.
1142
+ """
1143
+ rotation_string = "_".join(rotation_matrix.ravel().astype(str))
1144
+ return self._path_translation[rotation_string]