pytme 0.2.9.post1__cp311-cp311-macosx_15_0_arm64.whl → 0.3.0__cp311-cp311-macosx_15_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. pytme-0.3.0.data/scripts/estimate_memory_usage.py +76 -0
  2. pytme-0.3.0.data/scripts/match_template.py +1106 -0
  3. {pytme-0.2.9.post1.data → pytme-0.3.0.data}/scripts/postprocess.py +320 -190
  4. {pytme-0.2.9.post1.data → pytme-0.3.0.data}/scripts/preprocess.py +21 -31
  5. {pytme-0.2.9.post1.data → pytme-0.3.0.data}/scripts/preprocessor_gui.py +85 -19
  6. pytme-0.3.0.data/scripts/pytme_runner.py +771 -0
  7. {pytme-0.2.9.post1.dist-info → pytme-0.3.0.dist-info}/METADATA +21 -20
  8. pytme-0.3.0.dist-info/RECORD +126 -0
  9. {pytme-0.2.9.post1.dist-info → pytme-0.3.0.dist-info}/entry_points.txt +2 -1
  10. pytme-0.3.0.dist-info/licenses/LICENSE +339 -0
  11. scripts/estimate_memory_usage.py +76 -0
  12. scripts/eval.py +93 -0
  13. scripts/extract_candidates.py +224 -0
  14. scripts/match_template.py +349 -378
  15. pytme-0.2.9.post1.data/scripts/match_template.py → scripts/match_template_filters.py +213 -148
  16. scripts/postprocess.py +320 -190
  17. scripts/preprocess.py +21 -31
  18. scripts/preprocessor_gui.py +85 -19
  19. scripts/pytme_runner.py +771 -0
  20. scripts/refine_matches.py +625 -0
  21. tests/preprocessing/test_frequency_filters.py +28 -14
  22. tests/test_analyzer.py +41 -36
  23. tests/test_backends.py +1 -0
  24. tests/test_matching_cli.py +109 -54
  25. tests/test_matching_data.py +5 -5
  26. tests/test_matching_exhaustive.py +1 -2
  27. tests/test_matching_optimization.py +4 -9
  28. tests/test_matching_utils.py +1 -1
  29. tests/test_orientations.py +0 -1
  30. tme/__version__.py +1 -1
  31. tme/analyzer/__init__.py +2 -0
  32. tme/analyzer/_utils.py +26 -21
  33. tme/analyzer/aggregation.py +395 -222
  34. tme/analyzer/base.py +127 -0
  35. tme/analyzer/peaks.py +189 -204
  36. tme/analyzer/proxy.py +123 -0
  37. tme/backends/__init__.py +4 -3
  38. tme/backends/_cupy_utils.py +25 -24
  39. tme/backends/_jax_utils.py +20 -18
  40. tme/backends/cupy_backend.py +13 -26
  41. tme/backends/jax_backend.py +24 -23
  42. tme/backends/matching_backend.py +4 -3
  43. tme/backends/mlx_backend.py +4 -3
  44. tme/backends/npfftw_backend.py +34 -30
  45. tme/backends/pytorch_backend.py +18 -4
  46. tme/cli.py +126 -0
  47. tme/density.py +9 -7
  48. tme/extensions.cpython-311-darwin.so +0 -0
  49. tme/filters/__init__.py +3 -3
  50. tme/filters/_utils.py +36 -10
  51. tme/filters/bandpass.py +229 -188
  52. tme/filters/compose.py +5 -4
  53. tme/filters/ctf.py +516 -254
  54. tme/filters/reconstruction.py +91 -32
  55. tme/filters/wedge.py +196 -135
  56. tme/filters/whitening.py +37 -42
  57. tme/matching_data.py +28 -39
  58. tme/matching_exhaustive.py +31 -27
  59. tme/matching_optimization.py +5 -4
  60. tme/matching_scores.py +25 -15
  61. tme/matching_utils.py +193 -27
  62. tme/memory.py +4 -3
  63. tme/orientations.py +22 -9
  64. tme/parser.py +114 -33
  65. tme/preprocessor.py +6 -5
  66. tme/rotations.py +10 -7
  67. tme/structure.py +4 -3
  68. pytme-0.2.9.post1.data/scripts/estimate_ram_usage.py +0 -97
  69. pytme-0.2.9.post1.dist-info/RECORD +0 -119
  70. pytme-0.2.9.post1.dist-info/licenses/LICENSE +0 -153
  71. scripts/estimate_ram_usage.py +0 -97
  72. tests/data/Maps/.DS_Store +0 -0
  73. tests/data/Structures/.DS_Store +0 -0
  74. {pytme-0.2.9.post1.dist-info → pytme-0.3.0.dist-info}/WHEEL +0 -0
  75. {pytme-0.2.9.post1.dist-info → pytme-0.3.0.dist-info}/top_level.txt +0 -0
tme/matching_utils.py CHANGED
@@ -1,8 +1,9 @@
1
- """ Utility functions for template matching.
1
+ """
2
+ Utility functions for template matching.
2
3
 
3
- Copyright (c) 2023 European Molecular Biology Laboratory
4
+ Copyright (c) 2023 European Molecular Biology Laboratory
4
5
 
5
- Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
+ Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
7
  """
7
8
 
8
9
  import os
@@ -519,7 +520,7 @@ def apply_convolution_mode(
519
520
  elif convolution_mode == "same":
520
521
  return func(arr, s1)
521
522
  elif convolution_mode == "valid":
522
- valid_shape = [s1[i] - s2[i] + s2[i] % 2 for i in range(arr.ndim)]
523
+ valid_shape = [s1[i] - s2[i] + 1 for i in range(arr.ndim)]
523
524
  return func(arr, valid_shape)
524
525
 
525
526
 
@@ -724,13 +725,15 @@ def create_mask(mask_type: str, sigma_decay: float = 0, **kwargs) -> NDArray:
724
725
  mask_type : str
725
726
  Type of the mask to be created. Can be one of:
726
727
 
727
- +---------+----------------------------------------------------------+
728
- | box | Box mask (see :py:meth:`box_mask`) |
729
- +---------+----------------------------------------------------------+
730
- | tube | Cylindrical mask (see :py:meth:`tube_mask`) |
731
- +---------+----------------------------------------------------------+
732
- | ellipse | Ellipsoidal mask (see :py:meth:`elliptical_mask`) |
733
- +---------+----------------------------------------------------------+
728
+ +----------+---------------------------------------------------------+
729
+ | box | Box mask (see :py:meth:`box_mask`) |
730
+ +----------+---------------------------------------------------------+
731
+ | tube | Cylindrical mask (see :py:meth:`tube_mask`) |
732
+ +----------+---------------------------------------------------------+
733
+ | membrane | Cylindrical mask (see :py:meth:`membrane_mask`) |
734
+ +----------+---------------------------------------------------------+
735
+ | ellipse | Ellipsoidal mask (see :py:meth:`elliptical_mask`) |
736
+ +----------+---------------------------------------------------------+
734
737
  sigma_decay : float, optional
735
738
  Smoothing along mask edges using a Gaussian filter, 0 by default.
736
739
  kwargs : dict
@@ -746,16 +749,16 @@ def create_mask(mask_type: str, sigma_decay: float = 0, **kwargs) -> NDArray:
746
749
  ValueError
747
750
  If the mask_type is invalid.
748
751
  """
749
- mapping = {"ellipse": elliptical_mask, "box": box_mask, "tube": tube_mask}
752
+ mapping = {
753
+ "ellipse": elliptical_mask,
754
+ "box": box_mask,
755
+ "tube": tube_mask,
756
+ "membrane": membrane_mask,
757
+ }
750
758
  if mask_type not in mapping:
751
759
  raise ValueError(f"mask_type has to be one of {','.join(mapping.keys())}")
752
760
 
753
- mask = mapping[mask_type](**kwargs)
754
- if sigma_decay > 0:
755
- mask_filter = gaussian_filter(mask.astype(np.float32), sigma=sigma_decay)
756
- mask = np.add(mask, (1 - mask) * mask_filter)
757
- mask[mask < np.exp(-np.square(sigma_decay))] = 0
758
-
761
+ mask = mapping[mask_type](**kwargs, sigma_decay=sigma_decay)
759
762
  return mask
760
763
 
761
764
 
@@ -764,6 +767,8 @@ def elliptical_mask(
764
767
  radius: Tuple[float],
765
768
  center: Optional[Tuple[float]] = None,
766
769
  orientation: Optional[NDArray] = None,
770
+ sigma_decay: float = 0.0,
771
+ cutoff_sigma: float = 3,
767
772
  ) -> NDArray:
768
773
  """
769
774
  Creates an ellipsoidal mask.
@@ -825,9 +830,14 @@ def elliptical_mask(
825
830
  )
826
831
  indices = indices.reshape(*return_shape)
827
832
 
828
- mask = np.linalg.norm(indices / radius, axis=0)
829
- mask = (mask <= 1).astype(int)
830
-
833
+ dist = np.linalg.norm(indices / radius, axis=0)
834
+ if sigma_decay > 0:
835
+ sigma_decay = 2 * (sigma_decay / np.mean(radius)) ** 2
836
+ mask = np.maximum(0, dist - 1)
837
+ mask = np.exp(-(mask**2) / sigma_decay)
838
+ mask *= mask > np.exp(-(cutoff_sigma**2) / 2)
839
+ else:
840
+ mask = (dist <= 1).astype(int)
831
841
  return mask
832
842
 
833
843
 
@@ -925,7 +935,13 @@ def tube_mask2(
925
935
  return mask
926
936
 
927
937
 
928
- def box_mask(shape: Tuple[int], center: Tuple[int], height: Tuple[int]) -> np.ndarray:
938
+ def box_mask(
939
+ shape: Tuple[int],
940
+ center: Tuple[int],
941
+ height: Tuple[int],
942
+ sigma_decay: float = 0.0,
943
+ cutoff_sigma: float = 0.0,
944
+ ) -> np.ndarray:
929
945
  """
930
946
  Creates a box mask centered around the provided center point.
931
947
 
@@ -962,6 +978,11 @@ def box_mask(shape: Tuple[int], center: Tuple[int], height: Tuple[int]) -> np.nd
962
978
 
963
979
  out = np.zeros(shape)
964
980
  out[slice_indices] = 1
981
+
982
+ if sigma_decay > 0:
983
+ mask_filter = gaussian_filter(out.astype(np.float32), sigma=sigma_decay)
984
+ out = np.add(out, (1 - out) * mask_filter)
985
+ out *= out > np.exp(-(cutoff_sigma**2) / 2)
965
986
  return out
966
987
 
967
988
 
@@ -972,6 +993,8 @@ def tube_mask(
972
993
  inner_radius: float,
973
994
  outer_radius: float,
974
995
  height: int,
996
+ sigma_decay: float = 0.0,
997
+ **kwargs,
975
998
  ) -> NDArray:
976
999
  """
977
1000
  Creates a tube mask.
@@ -1027,6 +1050,7 @@ def tube_mask(
1027
1050
  shape=circle_shape,
1028
1051
  radius=inner_radius,
1029
1052
  center=circle_center,
1053
+ sigma_decay=sigma_decay,
1030
1054
  )
1031
1055
  if outer_radius > 0:
1032
1056
  outer_circle = create_mask(
@@ -1034,6 +1058,7 @@ def tube_mask(
1034
1058
  shape=circle_shape,
1035
1059
  radius=outer_radius,
1036
1060
  center=circle_center,
1061
+ sigma_decay=sigma_decay,
1037
1062
  )
1038
1063
  circle = outer_circle - inner_circle
1039
1064
  circle = np.expand_dims(circle, axis=symmetry_axis)
@@ -1054,9 +1079,106 @@ def tube_mask(
1054
1079
  return tube
1055
1080
 
1056
1081
 
1082
+ def membrane_mask(
1083
+ shape: Tuple[int],
1084
+ radius: float,
1085
+ thickness: float,
1086
+ separation: float,
1087
+ symmetry_axis: int = 2,
1088
+ center: Optional[Tuple[float]] = None,
1089
+ sigma_decay: float = 0.5,
1090
+ cutoff_sigma: float = 3,
1091
+ **kwargs,
1092
+ ) -> NDArray:
1093
+ """
1094
+ Creates a membrane mask consisting of two parallel disks with Gaussian intensity profile.
1095
+ Uses efficient broadcasting approach: flat disk mask × height profile.
1096
+
1097
+ Parameters
1098
+ ----------
1099
+ shape : tuple of ints
1100
+ Shape of the mask to be created.
1101
+ radius : float
1102
+ Radius of the membrane disks.
1103
+ thickness : float
1104
+ Thickness of each disk in the membrane.
1105
+ separation : float
1106
+ Distance between the centers of the two disks.
1107
+ symmetry_axis : int, optional
1108
+ The axis perpendicular to the membrane disks, defaults to 2.
1109
+ center : tuple of floats, optional
1110
+ Center of the membrane (midpoint between the two disks), defaults to shape // 2.
1111
+ sigma_decay : float, optional
1112
+ Controls edge sharpness relative to radius, defaults to 0.5.
1113
+ cutoff_sigma : float, optional
1114
+ Cutoff for height profile in standard deviations, defaults to 3.
1115
+
1116
+ Returns
1117
+ -------
1118
+ NDArray
1119
+ The created membrane mask with Gaussian intensity profile.
1120
+
1121
+ Raises
1122
+ ------
1123
+ ValueError
1124
+ If ``thickness`` is negative.
1125
+ If ``separation`` is negative.
1126
+ If ``center`` and ``shape`` do not have the same length.
1127
+ If ``symmetry_axis`` is out of bounds.
1128
+
1129
+ Examples
1130
+ --------
1131
+ >>> from tme.matching_utils import membrane_mask
1132
+ >>> mask = membrane_mask(shape=(50,50,50), radius=10, thickness=2, separation=15)
1133
+ """
1134
+ shape = np.asarray(shape, dtype=int)
1135
+
1136
+ if center is None:
1137
+ center = np.divide(shape, 2).astype(float)
1138
+
1139
+ center = np.asarray(center, dtype=np.float32)
1140
+ center = np.repeat(center, shape.size // center.size)
1141
+
1142
+ if thickness < 0:
1143
+ raise ValueError("thickness must be non-negative.")
1144
+ if separation < 0:
1145
+ raise ValueError("separation must be non-negative.")
1146
+ if symmetry_axis >= len(shape):
1147
+ raise ValueError(f"symmetry_axis must be less than {len(shape)}.")
1148
+ if center.size != shape.size:
1149
+ raise ValueError("Length of center has to be either one or match shape.")
1150
+
1151
+ disk_mask = elliptical_mask(
1152
+ shape=[x for i, x in enumerate(shape) if i != symmetry_axis],
1153
+ radius=radius,
1154
+ sigma_decay=sigma_decay,
1155
+ cutoff_sigma=cutoff_sigma,
1156
+ )
1157
+
1158
+ axial_coord = np.arange(shape[symmetry_axis]) - center[symmetry_axis]
1159
+ height_profile = np.zeros((shape[symmetry_axis],), dtype=np.float32)
1160
+ for leaflet_pos in [-separation / 2, separation / 2]:
1161
+ leaflet_profile = np.exp(
1162
+ -((axial_coord - leaflet_pos) ** 2) / (2 * (thickness / 3) ** 2)
1163
+ )
1164
+ cutoff_threshold = np.exp(-(cutoff_sigma**2) / 2)
1165
+ leaflet_profile *= leaflet_profile > cutoff_threshold
1166
+
1167
+ height_profile = np.maximum(height_profile, leaflet_profile)
1168
+
1169
+ disk_mask = disk_mask.reshape(
1170
+ [x if i != symmetry_axis else 1 for i, x in enumerate(shape)]
1171
+ )
1172
+ height_profile = height_profile.reshape(
1173
+ [1 if i != symmetry_axis else x for i, x in enumerate(shape)]
1174
+ )
1175
+
1176
+ return disk_mask * height_profile
1177
+
1178
+
1057
1179
  def scramble_phases(
1058
1180
  arr: NDArray,
1059
- noise_proportion: float = 0.5,
1181
+ noise_proportion: float = 1.0,
1060
1182
  seed: int = 42,
1061
1183
  normalize_power: bool = False,
1062
1184
  ) -> NDArray:
@@ -1068,7 +1190,7 @@ def scramble_phases(
1068
1190
  arr : NDArray
1069
1191
  Input data.
1070
1192
  noise_proportion : float, optional
1071
- Proportion of scrambled phases, 0.5 by default.
1193
+ Proportion of scrambled phases, 1.0 by default.
1072
1194
  seed : int, optional
1073
1195
  The seed for the random phase scrambling, 42 by default.
1074
1196
  normalize_power : bool, optional
@@ -1079,15 +1201,22 @@ def scramble_phases(
1079
1201
  NDArray
1080
1202
  Phase scrambled version of ``arr``.
1081
1203
  """
1204
+ from tme.filters._utils import fftfreqn
1205
+
1082
1206
  np.random.seed(seed)
1083
1207
  noise_proportion = max(min(noise_proportion, 1), 0)
1084
1208
 
1085
1209
  arr_fft = np.fft.fftn(arr)
1086
1210
  amp, ph = np.abs(arr_fft), np.angle(arr_fft)
1087
1211
 
1088
- ph_noise = np.random.permutation(ph)
1089
- ph_new = ph * (1 - noise_proportion) + ph_noise * noise_proportion
1090
- ret = np.real(np.fft.ifftn(amp * np.exp(1j * ph_new)))
1212
+ # Scrambling up to nyquist gives more uniform noise distribution
1213
+ mask = np.fft.ifftshift(
1214
+ fftfreqn(arr_fft.shape, sampling_rate=1, compute_euclidean_norm=True) <= 0.5
1215
+ )
1216
+
1217
+ ph_noise = np.random.permutation(ph[mask])
1218
+ ph[mask] = ph[mask] * (1 - noise_proportion) + ph_noise * noise_proportion
1219
+ ret = np.real(np.fft.ifftn(amp * np.exp(1j * ph)))
1091
1220
 
1092
1221
  if normalize_power:
1093
1222
  np.divide(ret - ret.min(), ret.max() - ret.min(), out=ret)
@@ -1150,3 +1279,40 @@ def compute_extraction_box(
1150
1279
  keep = be.multiply(keep, clamp_change == 0)
1151
1280
 
1152
1281
  return obs_beg_clamp, obs_end_clamp, cand_beg, cand_end, keep
1282
+
1283
+
1284
+ class TqdmParallel(Parallel):
1285
+ """
1286
+ A minimal Parallel implementation using tqdm for progress reporting.
1287
+
1288
+ Parameters:
1289
+ -----------
1290
+ tqdm_args : dict, optional
1291
+ Dictionary of arguments passed to tqdm.tqdm
1292
+ *args, **kwargs:
1293
+ Arguments to pass to joblib.Parallel
1294
+ """
1295
+
1296
+ def __init__(self, tqdm_args: Dict = {}, *args, **kwargs):
1297
+ from tqdm import tqdm
1298
+
1299
+ super().__init__(*args, **kwargs)
1300
+ self.pbar = tqdm(**tqdm_args)
1301
+
1302
+ def __call__(self, iterable, *args, **kwargs):
1303
+ self.n_tasks = len(iterable) if hasattr(iterable, "__len__") else None
1304
+ return super().__call__(iterable, *args, **kwargs)
1305
+
1306
+ def print_progress(self):
1307
+ if self.n_tasks is None:
1308
+ return super().print_progress()
1309
+
1310
+ if self.n_tasks != self.pbar.total:
1311
+ self.pbar.total = self.n_tasks
1312
+ self.pbar.refresh()
1313
+
1314
+ self.pbar.n = self.n_completed_tasks
1315
+ self.pbar.refresh()
1316
+
1317
+ if self.n_completed_tasks >= self.n_tasks:
1318
+ self.pbar.close()
tme/memory.py CHANGED
@@ -1,8 +1,9 @@
1
- """ Compute memory consumption of template matching components.
1
+ """
2
+ Compute memory consumption of template matching components.
2
3
 
3
- Copyright (c) 2023 European Molecular Biology Laboratory
4
+ Copyright (c) 2023 European Molecular Biology Laboratory
4
5
 
5
- Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
+ Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
7
  """
7
8
 
8
9
  from abc import ABC, abstractmethod
tme/orientations.py CHANGED
@@ -1,10 +1,11 @@
1
- #!python3
2
- """ Handle template matching orientations and conversion between formats.
1
+ """
2
+ Handle template matching orientations and conversion between formats.
3
3
 
4
- Copyright (c) 2024 European Molecular Biology Laboratory
4
+ Copyright (c) 2024 European Molecular Biology Laboratory
5
5
 
6
- Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
+ Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
7
7
  """
8
+
8
9
  from typing import List, Tuple
9
10
  from dataclasses import dataclass
10
11
  from string import ascii_lowercase, ascii_uppercase
@@ -14,7 +15,7 @@ import numpy as np
14
15
  from .parser import StarParser
15
16
  from .matching_utils import compute_extraction_box
16
17
 
17
- # Exceeds available numpy dimensions for default installations.
18
+ # Exceeds available numpy dimensions for default installations
18
19
  NAMES = ["x", "y", "z", *ascii_lowercase[:-3], *ascii_uppercase]
19
20
 
20
21
 
@@ -81,7 +82,7 @@ class Orientations:
81
82
  self.translations = np.array(self.translations).astype(np.float32)
82
83
  self.rotations = np.array(self.rotations).astype(np.float32)
83
84
  self.scores = np.array(self.scores).astype(np.float32)
84
- self.details = np.array(self.details).astype(np.float32)
85
+ self.details = np.array(self.details)
85
86
  n_orientations = set(
86
87
  [
87
88
  self.translations.shape[0],
@@ -324,6 +325,7 @@ class Orientations:
324
325
  "_rlnAngleRot",
325
326
  "_rlnAngleTilt",
326
327
  "_rlnAnglePsi",
328
+ "_rlnClassNumber",
327
329
  ]
328
330
  if source_path is not None:
329
331
  header.append("_rlnMicrographName")
@@ -339,6 +341,7 @@ class Orientations:
339
341
  for index, (translation, rotation, score, detail) in enumerate(self):
340
342
  line = [str(x) for x in translation]
341
343
  line.extend([str(x) for x in rotation])
344
+ line.extend([str(detail)])
342
345
 
343
346
  if source_path is not None:
344
347
  line.append(source_path)
@@ -489,9 +492,12 @@ class Orientations:
489
492
  def _from_star(
490
493
  cls, filename: str, delimiter: str = "\t"
491
494
  ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
492
- ret = StarParser(filename, delimiter=delimiter)
495
+ parser = StarParser(filename, delimiter=delimiter)
496
+
497
+ ret = parser.get("data_particles", None)
498
+ if ret is None:
499
+ ret = parser.get("data_", None)
493
500
 
494
- ret = ret.get("data_particles", None)
495
501
  if ret is None:
496
502
  raise ValueError(f"No data_particles section found in {filename}.")
497
503
 
@@ -500,13 +506,20 @@ class Orientations:
500
506
  )
501
507
  translation = translation.astype(np.float32).T
502
508
 
509
+ default_angle = np.zeros(translation.shape[0], dtype=np.float32)
510
+ for x in ("_rlnAngleRot", "_rlnAngleTilt", "_rlnAnglePsi"):
511
+ if x not in ret:
512
+ ret[x] = default_angle
513
+
503
514
  rotation = np.vstack(
504
515
  (ret["_rlnAngleRot"], ret["_rlnAngleTilt"], ret["_rlnAnglePsi"])
505
516
  )
506
517
  rotation = rotation.astype(np.float32).T
507
518
 
508
519
  default = np.zeros(translation.shape[0])
509
- return translation, rotation, default, default
520
+
521
+ scores = ret.get("_pytmeScore", default)
522
+ return translation, rotation, scores, default
510
523
 
511
524
  @staticmethod
512
525
  def _from_tbl(
tme/parser.py CHANGED
@@ -1,8 +1,9 @@
1
- """ Implements parsers for atomic structure file formats.
1
+ """
2
+ Implements parsers for atomic structure file formats.
2
3
 
3
- Copyright (c) 2023 European Molecular Biology Laboratory
4
+ Copyright (c) 2023 European Molecular Biology Laboratory
4
5
 
5
- Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
+ Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
7
  """
7
8
 
8
9
  import re
@@ -15,7 +16,14 @@ from typing import List, Dict, Union
15
16
 
16
17
  import numpy as np
17
18
 
18
- __all__ = ["PDBParser", "MMCIFParser", "GROParser", "StarParser", "XMLParser"]
19
+ __all__ = [
20
+ "PDBParser",
21
+ "MMCIFParser",
22
+ "GROParser",
23
+ "StarParser",
24
+ "XMLParser",
25
+ "MDOCParser",
26
+ ]
19
27
 
20
28
 
21
29
  class Parser(ABC):
@@ -84,6 +92,34 @@ class Parser(ABC):
84
92
  """
85
93
  return key in self._data
86
94
 
95
+ def __repr__(self) -> str:
96
+ """
97
+ String representation of the Parser showing available keys and their lengths.
98
+
99
+ Returns
100
+ -------
101
+ str
102
+ A formatted string showing each key and the length of its value.
103
+ """
104
+ if not self._data:
105
+ return f"{self.__class__.__name__}(empty)"
106
+
107
+ lines = [f"{self.__class__.__name__}:"]
108
+ try:
109
+ for key, value in sorted(self._data.items()):
110
+ if isinstance(value, (list, tuple)):
111
+ lines.append(f" {key}: length {len(value)}")
112
+ elif isinstance(value, dict):
113
+ lines.append(f" {key}: dict with {len(value)} keys")
114
+ elif isinstance(value, str):
115
+ lines.append(f" {key}: str")
116
+ else:
117
+ lines.append(f" {key}: {type(value).__name__}")
118
+ except Exception:
119
+ pass
120
+
121
+ return "\n".join(lines)
122
+
87
123
  def get(self, key, default=None):
88
124
  """
89
125
  Retrieve a value from the internal data using a given key. If the
@@ -247,19 +283,6 @@ class MMCIFParser(Parser):
247
283
  """
248
284
 
249
285
  def parse_input(self, lines: deque) -> Dict:
250
- """
251
- Parse a list of lines from an MMCIF file and convert the data into a dictionary.
252
-
253
- Parameters
254
- ----------
255
- lines : deque of str
256
- The lines of an MMCIF file to parse.
257
-
258
- Returns
259
- -------
260
- dict
261
- A dictionary containing the parsed data from the MMCIF file.
262
- """
263
286
  lines = self._consolidate_strings(lines)
264
287
  blocks = self._split_in_blocks(lines)
265
288
  mmcif_dict = {}
@@ -448,21 +471,6 @@ class GROParser(Parser):
448
471
  """
449
472
 
450
473
  def parse_input(self, lines, **kwargs) -> Dict:
451
- """
452
- Parse a list of lines from a GRO file and convert the data into a dictionary.
453
-
454
- Parameters
455
- ----------
456
- lines : deque of str
457
- The lines of a GRO file to parse.
458
- kwargs : Dict, optional
459
- Optional keyword arguments.
460
-
461
- Returns
462
- -------
463
- dict
464
- A dictionary containing the parsed data from the GRO file.
465
- """
466
474
  data = {
467
475
  "title": [],
468
476
  "num_atoms": [],
@@ -560,7 +568,7 @@ class StarParser(MMCIFParser):
560
568
  .. [1] https://www.iucr.org/__data/assets/file/0013/11416/star.5.html
561
569
  """
562
570
 
563
- def parse_input(self, lines: List[str], delimiter: str = "\t") -> Dict:
571
+ def parse_input(self, lines: List[str], delimiter: str = None) -> Dict:
564
572
  pattern = re.compile(r"\s*#.*")
565
573
 
566
574
  ret, category, block = {}, None, []
@@ -683,3 +691,76 @@ class XMLParser(Parser):
683
691
  pass
684
692
 
685
693
  return value_str
694
+
695
+
696
+ class MDOCParser(Parser):
697
+ """
698
+ Convert MDOC file (SerialEM metadata) into a dictionary representation.
699
+
700
+ MDOC files contain global parameters and per-tilt metadata for cryo-ET
701
+ tilt series, with sections marked by [ZValue = N] for individual tilts.
702
+ """
703
+
704
+ def parse_input(self, lines: deque, **kwargs) -> Dict:
705
+ data = {}
706
+ global_params = {}
707
+ in_zvalue_section = False
708
+ zvalue_pattern = re.compile(r"\[ZValue\s*=\s*(\d+)\]")
709
+ section_pattern = re.compile(r"\[T\s*=\s*(.*?)\]")
710
+
711
+ if not lines:
712
+ return data
713
+
714
+ while lines:
715
+ line = lines.popleft().strip()
716
+
717
+ if not line:
718
+ continue
719
+
720
+ # Check for ZValue section header
721
+ zvalue_match = zvalue_pattern.match(line)
722
+ if zvalue_match:
723
+ in_zvalue_section = True
724
+
725
+ zvalue = int(zvalue_match.group(1))
726
+ if "ZValue" not in data:
727
+ data["ZValue"] = []
728
+ data["ZValue"].append(zvalue)
729
+ continue
730
+
731
+ # Check for T section header (comments/metadata)
732
+ section_match = section_pattern.match(line)
733
+ if section_match:
734
+ section_content = section_match.group(1)
735
+ if "sections" not in global_params:
736
+ global_params["sections"] = []
737
+ global_params["sections"].append(section_content)
738
+ continue
739
+
740
+ # Parse key-value pairs
741
+ if "=" in line:
742
+ try:
743
+ key, value = line.split("=", 1)
744
+ key = key.strip()
745
+ value = value.strip()
746
+
747
+ try:
748
+ if "." not in value and "e" not in value.lower():
749
+ parsed_value = int(value)
750
+ else:
751
+ parsed_value = float(value)
752
+ except ValueError:
753
+ parsed_value = value
754
+
755
+ if not in_zvalue_section:
756
+ global_params[key] = parsed_value
757
+ else:
758
+ if key not in data:
759
+ data[key] = []
760
+ data[key].append(parsed_value)
761
+
762
+ except ValueError:
763
+ continue
764
+
765
+ data.update(global_params)
766
+ return data
tme/preprocessor.py CHANGED
@@ -1,8 +1,9 @@
1
- """ Implements Preprocessor class for filtering operations.
1
+ """
2
+ Implements Preprocessor class for filtering operations.
2
3
 
3
- Copyright (c) 2023 European Molecular Biology Laboratory
4
+ Copyright (c) 2023 European Molecular Biology Laboratory
4
5
 
5
- Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
+ Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
7
  """
7
8
 
8
9
  import os
@@ -669,9 +670,9 @@ class Preprocessor:
669
670
  NDArray
670
671
  Bandpass filtered.
671
672
  """
672
- from .filters import BandPassFilter
673
+ from .filters import BandPassReconstructed
673
674
 
674
- return BandPassFilter(
675
+ return BandPassReconstructed(
675
676
  sampling_rate=sampling_rate,
676
677
  lowpass=lowpass,
677
678
  highpass=highpass,
tme/rotations.py CHANGED
@@ -1,8 +1,9 @@
1
- """ Implements various means of generating rotation matrices.
1
+ """
2
+ Implements various means of generating rotation matrices.
2
3
 
3
- Copyright (c) 2023-2025 European Molecular Biology Laboratory
4
+ Copyright (c) 2023-2025 European Molecular Biology Laboratory
4
5
 
5
- Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
+ Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
7
  """
7
8
 
8
9
  import yaml
@@ -183,12 +184,14 @@ def euler_to_rotationmatrix(angles: Tuple[float], seq: str = "zyz") -> NDArray:
183
184
  NDArray
184
185
  The generated rotation matrix.
185
186
  """
187
+ angles = np.asarray(angles)
188
+
186
189
  n_angles = len(angles)
187
- angle_convention = seq[:n_angles]
188
- if n_angles == 1:
189
- angles = (angles, 0, 0)
190
+ if angles.ndim == 2:
191
+ n_angles = angles.shape[1]
192
+
190
193
  rotation_matrix = Rotation.from_euler(
191
- seq=angle_convention, angles=angles, degrees=True
194
+ seq=seq[:n_angles], angles=angles, degrees=True
192
195
  )
193
196
  return rotation_matrix.as_matrix().astype(np.float32)
194
197
 
tme/structure.py CHANGED
@@ -1,8 +1,9 @@
1
- """ Implements class Structure to represent atomic structures.
1
+ """
2
+ Implements class Structure to represent atomic structures.
2
3
 
3
- Copyright (c) 2023 European Molecular Biology Laboratory
4
+ Copyright (c) 2023 European Molecular Biology Laboratory
4
5
 
5
- Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
+ Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
6
7
  """
7
8
 
8
9
  import warnings