meteor-maps 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
meteor/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ __all__ = ["__version__", "version"]
2
+
3
+ from ._version import __version__, version
meteor/_version.py ADDED
@@ -0,0 +1,16 @@
1
+ # file generated by setuptools_scm
2
+ # don't change, don't track in version control
3
+ TYPE_CHECKING = False
4
+ if TYPE_CHECKING:
5
+ from typing import Tuple, Union
6
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
7
+ else:
8
+ VERSION_TUPLE = object
9
+
10
+ version: str
11
+ __version__: str
12
+ __version_tuple__: VERSION_TUPLE
13
+ version_tuple: VERSION_TUPLE
14
+
15
+ __version__ = version = '0.2.2'
16
+ __version_tuple__ = version_tuple = (0, 2, 2)
meteor/diffmaps.py ADDED
@@ -0,0 +1,183 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Sequence
4
+
5
+ import numpy as np
6
+ import reciprocalspaceship as rs
7
+
8
+ from .rsmap import Map, _assert_is_map
9
+ from .settings import DEFAULT_KPARAMS_TO_SCAN, MAP_SAMPLING
10
+ from .utils import filter_common_indices
11
+ from .validate import ScalarMaximizer, negentropy
12
+
13
+
14
+ def set_common_crystallographic_metadata(map1: Map, map2: Map, *, output: Map) -> None:
15
+ if hasattr(map1, "cell"):
16
+ if hasattr(map2, "cell") and (map1.cell != map2.cell):
17
+ msg = f"`map1.cell` {map1.cell} != `map2.cell` {map2.cell}"
18
+ raise AttributeError(msg)
19
+ output.cell = map1.cell
20
+
21
+ if hasattr(map1, "spacegroup"):
22
+ if hasattr(map2, "spacegroup") and (map1.spacegroup != map2.spacegroup):
23
+ msg = f"`map1.spacegroup` {map1.spacegroup} != "
24
+ msg += f"`map2.spacegroup` {map2.spacegroup}"
25
+ raise AttributeError(msg)
26
+ output.spacegroup = map1.spacegroup
27
+
28
+
29
+ def compute_difference_map(derivative: Map, native: Map) -> Map:
30
+ """
31
+ Computes amplitude and phase differences between native and derivative structure factor sets.
32
+
33
+ It converts the amplitude and phase pairs from both the native and derivative structure factor
34
+ sets into complex numbers, computes the difference, and then converts the result back
35
+ into amplitudes and phases.
36
+
37
+ If uncertainty columns are provided for both native and derivative data, it also propagates the
38
+ uncertainty of the difference in amplitudes.
39
+
40
+ Parameters
41
+ ----------
42
+ derivative: Map
43
+ the derivative amplitudes, phases, uncertainties
44
+ native: Map
45
+ the native amplitudes, phases, uncertainties
46
+
47
+ Returns
48
+ -------
49
+ diffmap: Map
50
+ map corresponding to the complex difference (derivative - native)
51
+ """
52
+ _assert_is_map(derivative, require_uncertainties=False)
53
+ _assert_is_map(native, require_uncertainties=False)
54
+
55
+ derivative, native = filter_common_indices(derivative, native) # type: ignore[assignment]
56
+
57
+ delta_complex = derivative.complex_amplitudes - native.complex_amplitudes
58
+ delta = Map.from_structurefactor(delta_complex, index=native.index)
59
+
60
+ set_common_crystallographic_metadata(derivative, native, output=delta)
61
+
62
+ if derivative.has_uncertainties and native.has_uncertainties:
63
+ prop_uncertainties = np.sqrt(derivative.uncertainties**2 + native.uncertainties**2)
64
+ delta.set_uncertainties(prop_uncertainties)
65
+
66
+ return delta
67
+
68
+
69
+ def compute_kweights(difference_map: Map, *, k_parameter: float) -> rs.DataSeries:
70
+ """
71
+ Compute weights for each structure factor based on DeltaF and its uncertainty.
72
+
73
+ Parameters
74
+ ----------
75
+ difference_map: Map
76
+ A map of structure factor differences (DeltaF).
77
+ k_parameter: float
78
+ A scaling factor applied to the squared `df` values in the weight calculation.
79
+
80
+ Returns
81
+ -------
82
+ weights: rs.DataSeries
83
+ A series of computed weights, where higher uncertainties and larger differences lead to
84
+ lower weights.
85
+ """
86
+ _assert_is_map(difference_map, require_uncertainties=True)
87
+
88
+ inverse_weights = (
89
+ 1
90
+ + (difference_map.uncertainties**2 / (difference_map.uncertainties**2).mean())
91
+ + k_parameter * (difference_map.amplitudes**2 / (difference_map.amplitudes**2).mean())
92
+ )
93
+ return 1.0 / inverse_weights
94
+
95
+
96
+ def compute_kweighted_difference_map(derivative: Map, native: Map, *, k_parameter: float) -> Map:
97
+ """
98
+ Compute k-weighted derivative - native structure factor map.
99
+
100
+ This function first computes the standard difference map using `compute_difference_map`.
101
+ Then, it applies k-weighting to the amplitude differences based on the provided `k_parameter`.
102
+
103
+ Assumes amplitudes have already been scaled prior to invoking this function.
104
+
105
+ Parameters
106
+ ----------
107
+ derivative: Map
108
+ the derivative amplitudes, phases, uncertainties
109
+ native: Map
110
+ the native amplitudes, phases, uncertainties
111
+
112
+ Returns
113
+ -------
114
+ diffmap: Map
115
+ the k-weighted difference map
116
+ """
117
+ # require uncertainties at the beginning
118
+ _assert_is_map(derivative, require_uncertainties=True)
119
+ _assert_is_map(native, require_uncertainties=True)
120
+
121
+ difference_map = compute_difference_map(derivative, native)
122
+ weights = compute_kweights(difference_map, k_parameter=k_parameter)
123
+
124
+ difference_map.amplitudes *= weights
125
+ difference_map.uncertainties *= weights
126
+
127
+ return difference_map
128
+
129
+
130
+ def max_negentropy_kweighted_difference_map(
131
+ derivative: Map,
132
+ native: Map,
133
+ *,
134
+ k_parameter_values_to_scan: np.ndarray | Sequence[float] = DEFAULT_KPARAMS_TO_SCAN,
135
+ ) -> rs.DataSet:
136
+ """
137
+ Compute k-weighted differences between native and derivative amplitudes and phases.
138
+
139
+ Determines an "optimal" k_parameter, between 0.0 and 1.0, that maximizes the resulting
140
+ difference map negentropy. Assumes that scaling has already been applied to the amplitudes
141
+ before calling this function.
142
+
143
+ Parameters
144
+ ----------
145
+ derivative: Map
146
+ the derivative amplitudes, phases, uncertainties
147
+ native: Map
148
+ the native amplitudes, phases, uncertainties
149
+ k_parameter_values_to_scan : np.ndarray | Sequence[float]
150
+ The values to scan to optimize the k-weighting parameter, by default is 0.00, 0.01 ... 1.00
151
+
152
+ Returns
153
+ -------
154
+ kweighted_dataset: rs.DataSet
155
+ dataset with added columns
156
+
157
+ opt_k_parameter: float
158
+ optimized k-weighting parameter
159
+ """
160
+ _assert_is_map(derivative, require_uncertainties=True)
161
+ _assert_is_map(native, require_uncertainties=True)
162
+
163
+ def negentropy_objective(k_parameter: float) -> float:
164
+ kweighted_dataset = compute_kweighted_difference_map(
165
+ derivative,
166
+ native,
167
+ k_parameter=k_parameter,
168
+ )
169
+ k_weighted_map = kweighted_dataset.to_ccp4_map(map_sampling=MAP_SAMPLING)
170
+ k_weighted_map_array = np.array(k_weighted_map.grid)
171
+ return negentropy(k_weighted_map_array)
172
+
173
+ maximizer = ScalarMaximizer(objective=negentropy_objective)
174
+ maximizer.optimize_over_explicit_values(arguments_to_scan=k_parameter_values_to_scan)
175
+ opt_k_parameter = float(maximizer.argument_optimum)
176
+
177
+ kweighted_dataset = compute_kweighted_difference_map(
178
+ derivative,
179
+ native,
180
+ k_parameter=opt_k_parameter,
181
+ )
182
+
183
+ return kweighted_dataset, opt_k_parameter
meteor/io.py ADDED
@@ -0,0 +1,62 @@
1
+ """https://www.ccp4.ac.uk/html/mtzformat.html
2
+ https://www.globalphasing.com/buster/wiki/index.cgi?MTZcolumns
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import re
8
+
9
+ from .settings import (
10
+ COMPUTED_AMPLITUDE_COLUMNS,
11
+ COMPUTED_PHASE_COLUMNS,
12
+ OBSERVED_AMPLITUDE_COLUMNS,
13
+ OBSERVED_INTENSITY_COLUMNS,
14
+ OBSERVED_UNCERTAINTY_COLUMNS,
15
+ )
16
+
17
+
18
+ class AmbiguousMtzColumnError(ValueError): ...
19
+
20
+
21
+ def _infer_mtz_column(columns_to_search: list[str], columns_to_look_for: list[str]) -> str:
22
+ # the next line consumes ["FOO", "BAR", "BAZ"] and produces regex strings like "^(FOO|BAR|BAZ)$"
23
+ regex = re.compile(f"^({'|'.join(columns_to_look_for)})$")
24
+ matches = [
25
+ regex.match(column) for column in columns_to_search if regex.match(column) is not None
26
+ ]
27
+
28
+ if len(matches) == 0:
29
+ msg = "cannot infer MTZ column name; "
30
+ msg += f"cannot find any of {columns_to_look_for} in {columns_to_search}"
31
+ raise AmbiguousMtzColumnError(msg)
32
+ if len(matches) > 1:
33
+ msg = "cannot infer MTZ column name; "
34
+ msg += f">1 instance of {columns_to_look_for} in {columns_to_search}"
35
+ raise AmbiguousMtzColumnError(msg)
36
+
37
+ [match] = matches
38
+ if match is None:
39
+ msg = "`None` not filtered during regex matching"
40
+ raise RuntimeError(msg)
41
+
42
+ return match.group(0)
43
+
44
+
45
+ def find_observed_intensity_column(mtz_columns: list[str]) -> str:
46
+ return _infer_mtz_column(mtz_columns, OBSERVED_INTENSITY_COLUMNS)
47
+
48
+
49
+ def find_observed_amplitude_column(mtz_columns: list[str]) -> str:
50
+ return _infer_mtz_column(mtz_columns, OBSERVED_AMPLITUDE_COLUMNS)
51
+
52
+
53
+ def find_observed_uncertainty_column(mtz_columns: list[str]) -> str:
54
+ return _infer_mtz_column(mtz_columns, OBSERVED_UNCERTAINTY_COLUMNS)
55
+
56
+
57
+ def find_computed_amplitude_column(mtz_columns: list[str]) -> str:
58
+ return _infer_mtz_column(mtz_columns, COMPUTED_AMPLITUDE_COLUMNS)
59
+
60
+
61
+ def find_computed_phase_column(mtz_columns: list[str]) -> str:
62
+ return _infer_mtz_column(mtz_columns, COMPUTED_PHASE_COLUMNS)
meteor/iterative.py ADDED
@@ -0,0 +1,259 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Callable
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import structlog
8
+
9
+ from .rsmap import Map
10
+ from .settings import (
11
+ DEFAULT_TV_WEIGHTS_TO_SCAN_AT_EACH_ITERATION,
12
+ ITERATIVE_TV_CONVERGENCE_TOLERANCE,
13
+ ITERATIVE_TV_MAX_ITERATIONS,
14
+ )
15
+ from .tv import TvDenoiseResult, tv_denoise_difference_map
16
+ from .utils import (
17
+ average_phase_diff_in_degrees,
18
+ complex_array_to_rs_dataseries,
19
+ filter_common_indices,
20
+ )
21
+
22
+ log = structlog.get_logger()
23
+
24
+
25
+ def _project_derivative_on_experimental_set(
26
+ *,
27
+ native: np.ndarray,
28
+ derivative_amplitudes: np.ndarray,
29
+ difference: np.ndarray,
30
+ ) -> np.ndarray:
31
+ """
32
+ Project the `derivative` structure factor onto the set of experimentally observed amplitudes.
33
+
34
+ Specifically, we change the amplitude of the complex-valued `derivative` to ensure that both
35
+
36
+ difference = derivative - native
37
+
38
+ and that the modulus |derivative| is equal to the specified (user-input) `derivative_amplitudes`
39
+
40
+ Parameters
41
+ ----------
42
+ native: np.ndarray
43
+ The experimentally observed native amplitudes and computed phases, as a complex array.
44
+
45
+ derivative_amplitudes: np.ndarray
46
+ An array of the experimentally observed derivative amplitudes. Typically real-valued, but
47
+ a complex-valued array with arbitrary phase can be passed (phases discarded).
48
+
49
+ difference: np.ndarray
50
+ The estimated complex structure factor difference, derivative-minus-native.
51
+
52
+ Returns
53
+ -------
54
+ projected_derivative: np.ndarray
55
+ The complex-valued derivative structure factors, with experimental amplitude and phase
56
+ adjusted to ensure that difference = derivative - native.
57
+ """
58
+ projected_derivative = difference + native
59
+ projected_derivative *= np.abs(derivative_amplitudes) / np.abs(projected_derivative)
60
+ return projected_derivative
61
+
62
+
63
+ def _complex_derivative_from_iterative_tv( # noqa: PLR0913
64
+ *,
65
+ native: np.ndarray,
66
+ initial_derivative: np.ndarray,
67
+ tv_denoise_function: Callable[[np.ndarray], tuple[np.ndarray, TvDenoiseResult]],
68
+ convergence_tolerance: float = ITERATIVE_TV_CONVERGENCE_TOLERANCE,
69
+ max_iterations: int = ITERATIVE_TV_MAX_ITERATIONS,
70
+ verbose: bool = False,
71
+ ) -> tuple[np.ndarray, pd.DataFrame]:
72
+ """
73
+ Estimate the derivative phases using the iterative TV algorithm.
74
+
75
+ This function contains the algorithm logic.
76
+
77
+ Parameters
78
+ ----------
79
+ native: np.ndarray
80
+ The complex native structure factors, usually experimental amplitudes and calculated phases
81
+
82
+ initial_complex_derivative : np.ndarray
83
+ The complex derivative structure factors, usually with experimental amplitudes and esimated
84
+ phases (often calculated from the native structure)
85
+
86
+ tv_denoise_function: Callable[[np.ndarray], tuple[np.ndarray, TvDenoiseResult]]
87
+ A function capable of applying the TV denoising operation to *Fourier space* objects. This
88
+ function should therefore map one complex np.ndarray to a denoised complex np.ndarray and
89
+ the TvDenoiseResult for that TV run.
90
+
91
+ convergance_tolerance: float
92
+ If the change in the estimated derivative SFs drops below this value (phase, per-component)
93
+ then return. Default 1e-4.
94
+
95
+ max_iterations: int
96
+ If this number of iterations is reached, stop early. Default 1000.
97
+
98
+ verbose: bool
99
+ Log or not.
100
+
101
+ Returns
102
+ -------
103
+ estimated_complex_derivative: np.ndarray
104
+ The derivative SFs, with the same amplitudes but phases altered to minimize the TV.
105
+
106
+ metadata: pd.DataFrame
107
+ Information about the algorithm run as a function of iteration. For each step, includes:
108
+ the tv_weight used, the negentropy (after the TV step), and the average phase change in
109
+ degrees.
110
+ """
111
+ derivative = np.copy(initial_derivative)
112
+ difference = initial_derivative - native
113
+
114
+ converged: bool = False
115
+ num_iterations: int = 0
116
+ metadata: list[dict[str, float]] = []
117
+
118
+ while not converged:
119
+ difference_tvd, tv_metadata = tv_denoise_function(difference)
120
+ updated_derivative = _project_derivative_on_experimental_set(
121
+ native=native,
122
+ derivative_amplitudes=np.abs(derivative),
123
+ difference=difference_tvd,
124
+ )
125
+
126
+ phase_change = average_phase_diff_in_degrees(derivative, updated_derivative)
127
+ derivative = updated_derivative
128
+ difference = derivative - native
129
+
130
+ converged = phase_change < convergence_tolerance
131
+ num_iterations += 1
132
+
133
+ metadata.append(
134
+ {
135
+ "iteration": num_iterations,
136
+ "tv_weight": tv_metadata.optimal_tv_weight,
137
+ "negentropy_after_tv": tv_metadata.optimal_negentropy,
138
+ "average_phase_change": phase_change,
139
+ },
140
+ )
141
+ if verbose:
142
+ log.info(
143
+ f" iteration {num_iterations:04d}", # noqa: G004
144
+ phase_change=round(phase_change, 4),
145
+ negentropy=round(tv_metadata.optimal_negentropy, 4),
146
+ tv_weight=tv_metadata.optimal_tv_weight,
147
+ )
148
+
149
+ if num_iterations > max_iterations:
150
+ break
151
+
152
+ return derivative, pd.DataFrame(metadata)
153
+
154
+
155
+ def iterative_tv_phase_retrieval( # noqa: PLR0913
156
+ initial_derivative: Map,
157
+ native: Map,
158
+ *,
159
+ convergence_tolerance: float = ITERATIVE_TV_CONVERGENCE_TOLERANCE,
160
+ max_iterations: int = ITERATIVE_TV_MAX_ITERATIONS,
161
+ tv_weights_to_scan: list[float] = DEFAULT_TV_WEIGHTS_TO_SCAN_AT_EACH_ITERATION,
162
+ verbose: bool = False,
163
+ ) -> tuple[Map, pd.DataFrame]:
164
+ """
165
+ Here is a brief pseudocode sketch of the alogrithm. Structure factors F below are complex unless
166
+ explicitly annotated |*|.
167
+
168
+ Input: |F|, |Fh|, phi_c
169
+ Note: F = |F| * exp{ phi_c } is the native/dark data,
170
+ |Fh| represents the derivative/triggered/light data
171
+
172
+ Initialize:
173
+ - D_F = ( |Fh| - |F| ) * exp{ phi_c }
174
+
175
+ while not converged:
176
+ D_rho = FT{ D_F } Fourier transform
177
+ D_rho' = TV{ D_rho } TV denoise: apply real space prior
178
+ D_F' = FT-1{ D_rho' } back Fourier transform
179
+ Fh' = (D_F' + F) * [|Fh| / |D_F' + F|] Fourier space projection onto experimental set
180
+ D_F = Fh' - F
181
+
182
+ Where the TV weight parameter is determined using golden section optimization. The algorithm
183
+ iterates until the changes in the derivative phase drop below a specified threshold.
184
+
185
+ Parameters
186
+ ----------
187
+ initial_derivative: Map
188
+ the derivative amplitudes, and initial guess for the phases
189
+
190
+ native: Map
191
+ the native amplitudes, phases
192
+
193
+ convergance_tolerance: float
194
+ If the change in the estimated derivative SFs drops below this value (phase, per-component)
195
+ then return. Default 1e-4.
196
+
197
+ max_iterations: int
198
+ If this number of iterations is reached, stop early. Default 1000.
199
+
200
+ tv_weights_to_scan : list[float], optional
201
+ A list of TV regularization weights (λ values) to be scanned for optimal results,
202
+ by default [0.001, 0.01, 0.1, 1.0].
203
+
204
+ verbose: bool
205
+ Log or not.
206
+
207
+ Returns
208
+ -------
209
+ output_map: Map
210
+ The estimated derivative phases, along with the input amplitudes and input computed phases.
211
+
212
+ metadata: pd.DataFrame
213
+ Information about the algorithm run as a function of iteration. For each step, includes:
214
+ the tv_weight used, the negentropy (after the TV step), and the average phase change in
215
+ degrees.
216
+ """
217
+ # hotfix #52
218
+ initial_derivative, native = filter_common_indices(initial_derivative, native) # type: ignore[assignment]
219
+
220
+ # clean TV denoising interface that is crystallographically intelligent
221
+ # maintains state for the HKL index, spacegroup, and cell information
222
+ def tv_denoise_closure(difference: np.ndarray) -> tuple[np.ndarray, TvDenoiseResult]:
223
+ diffmap = Map.from_structurefactor(difference, index=native.index)
224
+ diffmap.cell = native.cell
225
+ diffmap.spacegroup = native.spacegroup
226
+
227
+ denoised_map, tv_metadata = tv_denoise_difference_map(
228
+ diffmap,
229
+ weights_to_scan=tv_weights_to_scan,
230
+ full_output=True,
231
+ )
232
+
233
+ return denoised_map.complex_amplitudes, tv_metadata
234
+
235
+ # estimate the derivative phases using the iterative TV algorithm
236
+ if verbose:
237
+ log.info(
238
+ "convergence criteria:",
239
+ phase_tolerance=convergence_tolerance,
240
+ max_iterations=max_iterations,
241
+ )
242
+ it_tv_complex_derivative, metadata = _complex_derivative_from_iterative_tv(
243
+ native=native.complex_amplitudes,
244
+ initial_derivative=initial_derivative.complex_amplitudes,
245
+ tv_denoise_function=tv_denoise_closure,
246
+ convergence_tolerance=convergence_tolerance,
247
+ max_iterations=max_iterations,
248
+ verbose=verbose,
249
+ )
250
+ _, derivative_phases = complex_array_to_rs_dataseries(
251
+ it_tv_complex_derivative,
252
+ index=initial_derivative.index,
253
+ )
254
+
255
+ # combine the determined derivative phases with the input to generate a complete output
256
+ output_dataset = initial_derivative.copy()
257
+ output_dataset.phases = derivative_phases
258
+
259
+ return output_dataset, metadata