PyPI - meteor-maps - Versions diffs - 0.2.2__py3-none-any.whl - Mend

meteor-maps 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

meteor/__init__.py +3 -0
meteor/_version.py +16 -0
meteor/diffmaps.py +183 -0
meteor/io.py +62 -0
meteor/iterative.py +259 -0
meteor/rsmap.py +430 -0
meteor/scale.py +209 -0
meteor/scripts/__init__.py +0 -0
meteor/scripts/common.py +335 -0
meteor/scripts/compute_difference_map.py +168 -0
meteor/scripts/compute_iterative_tv_map.py +113 -0
meteor/settings.py +49 -0
meteor/sfcalc.py +29 -0
meteor/testing.py +78 -0
meteor/tv.py +213 -0
meteor/utils.py +167 -0
meteor/validate.py +163 -0
meteor_maps-0.2.2.dist-info/LICENSE +21 -0
meteor_maps-0.2.2.dist-info/METADATA +18 -0
meteor_maps-0.2.2.dist-info/RECORD +23 -0
meteor_maps-0.2.2.dist-info/WHEEL +5 -0
meteor_maps-0.2.2.dist-info/entry_points.txt +3 -0
meteor_maps-0.2.2.dist-info/top_level.txt +1 -0

meteor/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+__all__ = ["__version__", "version"]
+from ._version import __version__, version

meteor/_version.py ADDED Viewed

@@ -0,0 +1,16 @@
+# file generated by setuptools_scm
+# don't change, don't track in version control
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple, Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+else:
+    VERSION_TUPLE = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+__version__ = version = '0.2.2'
+__version_tuple__ = version_tuple = (0, 2, 2)

meteor/diffmaps.py ADDED Viewed

@@ -0,0 +1,183 @@
+from __future__ import annotations
+from collections.abc import Sequence
+import numpy as np
+import reciprocalspaceship as rs
+from .rsmap import Map, _assert_is_map
+from .settings import DEFAULT_KPARAMS_TO_SCAN, MAP_SAMPLING
+from .utils import filter_common_indices
+from .validate import ScalarMaximizer, negentropy
+def set_common_crystallographic_metadata(map1: Map, map2: Map, *, output: Map) -> None:
+    if hasattr(map1, "cell"):
+        if hasattr(map2, "cell") and (map1.cell != map2.cell):
+            msg = f"`map1.cell` {map1.cell} != `map2.cell` {map2.cell}"
+            raise AttributeError(msg)
+        output.cell = map1.cell
+    if hasattr(map1, "spacegroup"):
+        if hasattr(map2, "spacegroup") and (map1.spacegroup != map2.spacegroup):
+            msg = f"`map1.spacegroup` {map1.spacegroup} != "
+            msg += f"`map2.spacegroup` {map2.spacegroup}"
+            raise AttributeError(msg)
+        output.spacegroup = map1.spacegroup
+def compute_difference_map(derivative: Map, native: Map) -> Map:
+    """
+    Computes amplitude and phase differences between native and derivative structure factor sets.
+    It converts the amplitude and phase pairs from both the native and derivative structure factor
+    sets into complex numbers, computes the difference, and then converts the result back
+    into amplitudes and phases.
+    If uncertainty columns are provided for both native and derivative data, it also propagates the
+    uncertainty of the difference in amplitudes.
+    Parameters
+    ----------
+    derivative: Map
+        the derivative amplitudes, phases, uncertainties
+    native: Map
+        the native amplitudes, phases, uncertainties
+    Returns
+    -------
+    diffmap: Map
+        map corresponding to the complex difference (derivative - native)
+    """
+    _assert_is_map(derivative, require_uncertainties=False)
+    _assert_is_map(native, require_uncertainties=False)
+    derivative, native = filter_common_indices(derivative, native)  # type: ignore[assignment]
+    delta_complex = derivative.complex_amplitudes - native.complex_amplitudes
+    delta = Map.from_structurefactor(delta_complex, index=native.index)
+    set_common_crystallographic_metadata(derivative, native, output=delta)
+    if derivative.has_uncertainties and native.has_uncertainties:
+        prop_uncertainties = np.sqrt(derivative.uncertainties**2 + native.uncertainties**2)
+        delta.set_uncertainties(prop_uncertainties)
+    return delta
+def compute_kweights(difference_map: Map, *, k_parameter: float) -> rs.DataSeries:
+    """
+    Compute weights for each structure factor based on DeltaF and its uncertainty.
+    Parameters
+    ----------
+    difference_map: Map
+        A map of structure factor differences (DeltaF).
+    k_parameter: float
+        A scaling factor applied to the squared `df` values in the weight calculation.
+    Returns
+    -------
+    weights: rs.DataSeries
+        A series of computed weights, where higher uncertainties and larger differences lead to
+        lower weights.
+    """
+    _assert_is_map(difference_map, require_uncertainties=True)
+    inverse_weights = (
+        1
+        + (difference_map.uncertainties**2 / (difference_map.uncertainties**2).mean())
+        + k_parameter * (difference_map.amplitudes**2 / (difference_map.amplitudes**2).mean())
+    )
+    return 1.0 / inverse_weights
+def compute_kweighted_difference_map(derivative: Map, native: Map, *, k_parameter: float) -> Map:
+    """
+    Compute k-weighted derivative - native structure factor map.
+    This function first computes the standard difference map using `compute_difference_map`.
+    Then, it applies k-weighting to the amplitude differences based on the provided `k_parameter`.
+    Assumes amplitudes have already been scaled prior to invoking this function.
+    Parameters
+    ----------
+    derivative: Map
+        the derivative amplitudes, phases, uncertainties
+    native: Map
+        the native amplitudes, phases, uncertainties
+    Returns
+    -------
+    diffmap: Map
+        the k-weighted difference map
+    """
+    # require uncertainties at the beginning
+    _assert_is_map(derivative, require_uncertainties=True)
+    _assert_is_map(native, require_uncertainties=True)
+    difference_map = compute_difference_map(derivative, native)
+    weights = compute_kweights(difference_map, k_parameter=k_parameter)
+    difference_map.amplitudes *= weights
+    difference_map.uncertainties *= weights
+    return difference_map
+def max_negentropy_kweighted_difference_map(
+    derivative: Map,
+    native: Map,
+    *,
+    k_parameter_values_to_scan: np.ndarray | Sequence[float] = DEFAULT_KPARAMS_TO_SCAN,
+) -> rs.DataSet:
+    """
+    Compute k-weighted differences between native and derivative amplitudes and phases.
+    Determines an "optimal" k_parameter, between 0.0 and 1.0, that maximizes the resulting
+    difference map negentropy. Assumes that scaling has already been applied to the amplitudes
+    before calling this function.
+    Parameters
+    ----------
+    derivative: Map
+        the derivative amplitudes, phases, uncertainties
+    native: Map
+        the native amplitudes, phases, uncertainties
+    k_parameter_values_to_scan : np.ndarray | Sequence[float]
+        The values to scan to optimize the k-weighting parameter, by default is 0.00, 0.01 ... 1.00
+    Returns
+    -------
+    kweighted_dataset: rs.DataSet
+        dataset with added columns
+    opt_k_parameter: float
+        optimized k-weighting parameter
+    """
+    _assert_is_map(derivative, require_uncertainties=True)
+    _assert_is_map(native, require_uncertainties=True)
+    def negentropy_objective(k_parameter: float) -> float:
+        kweighted_dataset = compute_kweighted_difference_map(
+            derivative,
+            native,
+            k_parameter=k_parameter,
+        )
+        k_weighted_map = kweighted_dataset.to_ccp4_map(map_sampling=MAP_SAMPLING)
+        k_weighted_map_array = np.array(k_weighted_map.grid)
+        return negentropy(k_weighted_map_array)
+    maximizer = ScalarMaximizer(objective=negentropy_objective)
+    maximizer.optimize_over_explicit_values(arguments_to_scan=k_parameter_values_to_scan)
+    opt_k_parameter = float(maximizer.argument_optimum)
+    kweighted_dataset = compute_kweighted_difference_map(
+        derivative,
+        native,
+        k_parameter=opt_k_parameter,
+    )
+    return kweighted_dataset, opt_k_parameter

meteor/io.py ADDED Viewed

@@ -0,0 +1,62 @@
+"""https://www.ccp4.ac.uk/html/mtzformat.html
+https://www.globalphasing.com/buster/wiki/index.cgi?MTZcolumns
+"""
+from __future__ import annotations
+import re
+from .settings import (
+    COMPUTED_AMPLITUDE_COLUMNS,
+    COMPUTED_PHASE_COLUMNS,
+    OBSERVED_AMPLITUDE_COLUMNS,
+    OBSERVED_INTENSITY_COLUMNS,
+    OBSERVED_UNCERTAINTY_COLUMNS,
+)
+class AmbiguousMtzColumnError(ValueError): ...
+def _infer_mtz_column(columns_to_search: list[str], columns_to_look_for: list[str]) -> str:
+    # the next line consumes ["FOO", "BAR", "BAZ"] and produces regex strings like "^(FOO|BAR|BAZ)$"
+    regex = re.compile(f"^({'|'.join(columns_to_look_for)})$")
+    matches = [
+        regex.match(column) for column in columns_to_search if regex.match(column) is not None
+    ]
+    if len(matches) == 0:
+        msg = "cannot infer MTZ column name; "
+        msg += f"cannot find any of {columns_to_look_for} in {columns_to_search}"
+        raise AmbiguousMtzColumnError(msg)
+    if len(matches) > 1:
+        msg = "cannot infer MTZ column name; "
+        msg += f">1 instance of {columns_to_look_for} in {columns_to_search}"
+        raise AmbiguousMtzColumnError(msg)
+    [match] = matches
+    if match is None:
+        msg = "`None` not filtered during regex matching"
+        raise RuntimeError(msg)
+    return match.group(0)
+def find_observed_intensity_column(mtz_columns: list[str]) -> str:
+    return _infer_mtz_column(mtz_columns, OBSERVED_INTENSITY_COLUMNS)
+def find_observed_amplitude_column(mtz_columns: list[str]) -> str:
+    return _infer_mtz_column(mtz_columns, OBSERVED_AMPLITUDE_COLUMNS)
+def find_observed_uncertainty_column(mtz_columns: list[str]) -> str:
+    return _infer_mtz_column(mtz_columns, OBSERVED_UNCERTAINTY_COLUMNS)
+def find_computed_amplitude_column(mtz_columns: list[str]) -> str:
+    return _infer_mtz_column(mtz_columns, COMPUTED_AMPLITUDE_COLUMNS)
+def find_computed_phase_column(mtz_columns: list[str]) -> str:
+    return _infer_mtz_column(mtz_columns, COMPUTED_PHASE_COLUMNS)

meteor/iterative.py ADDED Viewed

@@ -0,0 +1,259 @@
+from __future__ import annotations
+from collections.abc import Callable
+import numpy as np
+import pandas as pd
+import structlog
+from .rsmap import Map
+from .settings import (
+    DEFAULT_TV_WEIGHTS_TO_SCAN_AT_EACH_ITERATION,
+    ITERATIVE_TV_CONVERGENCE_TOLERANCE,
+    ITERATIVE_TV_MAX_ITERATIONS,
+)
+from .tv import TvDenoiseResult, tv_denoise_difference_map
+from .utils import (
+    average_phase_diff_in_degrees,
+    complex_array_to_rs_dataseries,
+    filter_common_indices,
+)
+log = structlog.get_logger()
+def _project_derivative_on_experimental_set(
+    *,
+    native: np.ndarray,
+    derivative_amplitudes: np.ndarray,
+    difference: np.ndarray,
+) -> np.ndarray:
+    """
+    Project the `derivative` structure factor onto the set of experimentally observed amplitudes.
+    Specifically, we change the amplitude of the complex-valued `derivative` to ensure that both
+        difference = derivative - native
+    and that the modulus |derivative| is equal to the specified (user-input) `derivative_amplitudes`
+    Parameters
+    ----------
+    native: np.ndarray
+        The experimentally observed native amplitudes and computed phases, as a complex array.
+    derivative_amplitudes: np.ndarray
+        An array of the experimentally observed derivative amplitudes. Typically real-valued, but
+        a complex-valued array with arbitrary phase can be passed (phases discarded).
+    difference: np.ndarray
+        The estimated complex structure factor difference, derivative-minus-native.
+    Returns
+    -------
+    projected_derivative: np.ndarray
+        The complex-valued derivative structure factors, with experimental amplitude and phase
+        adjusted to ensure that difference = derivative - native.
+    """
+    projected_derivative = difference + native
+    projected_derivative *= np.abs(derivative_amplitudes) / np.abs(projected_derivative)
+    return projected_derivative
+def _complex_derivative_from_iterative_tv(  # noqa: PLR0913
+    *,
+    native: np.ndarray,
+    initial_derivative: np.ndarray,
+    tv_denoise_function: Callable[[np.ndarray], tuple[np.ndarray, TvDenoiseResult]],
+    convergence_tolerance: float = ITERATIVE_TV_CONVERGENCE_TOLERANCE,
+    max_iterations: int = ITERATIVE_TV_MAX_ITERATIONS,
+    verbose: bool = False,
+) -> tuple[np.ndarray, pd.DataFrame]:
+    """
+    Estimate the derivative phases using the iterative TV algorithm.
+    This function contains the algorithm logic.
+    Parameters
+    ----------
+    native: np.ndarray
+        The complex native structure factors, usually experimental amplitudes and calculated phases
+    initial_complex_derivative : np.ndarray
+        The complex derivative structure factors, usually with experimental amplitudes and esimated
+        phases (often calculated from the native structure)
+    tv_denoise_function: Callable[[np.ndarray], tuple[np.ndarray, TvDenoiseResult]]
+        A function capable of applying the TV denoising operation to *Fourier space* objects. This
+        function should therefore map one complex np.ndarray to a denoised complex np.ndarray and
+        the TvDenoiseResult for that TV run.
+    convergance_tolerance: float
+        If the change in the estimated derivative SFs drops below this value (phase, per-component)
+        then return. Default 1e-4.
+    max_iterations: int
+        If this number of iterations is reached, stop early. Default 1000.
+    verbose: bool
+        Log or not.
+    Returns
+    -------
+    estimated_complex_derivative: np.ndarray
+        The derivative SFs, with the same amplitudes but phases altered to minimize the TV.
+    metadata: pd.DataFrame
+        Information about the algorithm run as a function of iteration. For each step, includes:
+        the tv_weight used, the negentropy (after the TV step), and the average phase change in
+        degrees.
+    """
+    derivative = np.copy(initial_derivative)
+    difference = initial_derivative - native
+    converged: bool = False
+    num_iterations: int = 0
+    metadata: list[dict[str, float]] = []
+    while not converged:
+        difference_tvd, tv_metadata = tv_denoise_function(difference)
+        updated_derivative = _project_derivative_on_experimental_set(
+            native=native,
+            derivative_amplitudes=np.abs(derivative),
+            difference=difference_tvd,
+        )
+        phase_change = average_phase_diff_in_degrees(derivative, updated_derivative)
+        derivative = updated_derivative
+        difference = derivative - native
+        converged = phase_change < convergence_tolerance
+        num_iterations += 1
+        metadata.append(
+            {
+                "iteration": num_iterations,
+                "tv_weight": tv_metadata.optimal_tv_weight,
+                "negentropy_after_tv": tv_metadata.optimal_negentropy,
+                "average_phase_change": phase_change,
+            },
+        )
+        if verbose:
+            log.info(
+                f"  iteration {num_iterations:04d}",  # noqa: G004
+                phase_change=round(phase_change, 4),
+                negentropy=round(tv_metadata.optimal_negentropy, 4),
+                tv_weight=tv_metadata.optimal_tv_weight,
+            )
+        if num_iterations > max_iterations:
+            break
+    return derivative, pd.DataFrame(metadata)
+def iterative_tv_phase_retrieval(  # noqa: PLR0913
+    initial_derivative: Map,
+    native: Map,
+    *,
+    convergence_tolerance: float = ITERATIVE_TV_CONVERGENCE_TOLERANCE,
+    max_iterations: int = ITERATIVE_TV_MAX_ITERATIONS,
+    tv_weights_to_scan: list[float] = DEFAULT_TV_WEIGHTS_TO_SCAN_AT_EACH_ITERATION,
+    verbose: bool = False,
+) -> tuple[Map, pd.DataFrame]:
+    """
+    Here is a brief pseudocode sketch of the alogrithm. Structure factors F below are complex unless
+    explicitly annotated |*|.
+        Input: |F|, |Fh|, phi_c
+        Note: F = |F| * exp{ phi_c } is the native/dark data,
+             |Fh| represents the derivative/triggered/light data
+        Initialize:
+         - D_F = ( |Fh| - |F| ) * exp{ phi_c }
+        while not converged:
+            D_rho = FT{ D_F }                       Fourier transform
+            D_rho' = TV{ D_rho }                    TV denoise: apply real space prior
+            D_F' = FT-1{ D_rho' }                   back Fourier transform
+            Fh' = (D_F' + F) * [|Fh| / |D_F' + F|]  Fourier space projection onto experimental set
+            D_F = Fh' - F
+    Where the TV weight parameter is determined using golden section optimization. The algorithm
+    iterates until the changes in the derivative phase drop below a specified threshold.
+    Parameters
+    ----------
+    initial_derivative: Map
+        the derivative amplitudes, and initial guess for the phases
+    native: Map
+        the native amplitudes, phases
+    convergance_tolerance: float
+        If the change in the estimated derivative SFs drops below this value (phase, per-component)
+        then return. Default 1e-4.
+    max_iterations: int
+        If this number of iterations is reached, stop early. Default 1000.
+    tv_weights_to_scan : list[float], optional
+        A list of TV regularization weights (λ values) to be scanned for optimal results,
+        by default [0.001, 0.01, 0.1, 1.0].
+    verbose: bool
+        Log or not.
+    Returns
+    -------
+    output_map: Map
+        The estimated derivative phases, along with the input amplitudes and input computed phases.
+    metadata: pd.DataFrame
+        Information about the algorithm run as a function of iteration. For each step, includes:
+        the tv_weight used, the negentropy (after the TV step), and the average phase change in
+        degrees.
+    """
+    # hotfix #52
+    initial_derivative, native = filter_common_indices(initial_derivative, native)  # type: ignore[assignment]
+    # clean TV denoising interface that is crystallographically intelligent
+    # maintains state for the HKL index, spacegroup, and cell information
+    def tv_denoise_closure(difference: np.ndarray) -> tuple[np.ndarray, TvDenoiseResult]:
+        diffmap = Map.from_structurefactor(difference, index=native.index)
+        diffmap.cell = native.cell
+        diffmap.spacegroup = native.spacegroup
+        denoised_map, tv_metadata = tv_denoise_difference_map(
+            diffmap,
+            weights_to_scan=tv_weights_to_scan,
+            full_output=True,
+        )
+        return denoised_map.complex_amplitudes, tv_metadata
+    # estimate the derivative phases using the iterative TV algorithm
+    if verbose:
+        log.info(
+            "convergence criteria:",
+            phase_tolerance=convergence_tolerance,
+            max_iterations=max_iterations,
+        )
+    it_tv_complex_derivative, metadata = _complex_derivative_from_iterative_tv(
+        native=native.complex_amplitudes,
+        initial_derivative=initial_derivative.complex_amplitudes,
+        tv_denoise_function=tv_denoise_closure,
+        convergence_tolerance=convergence_tolerance,
+        max_iterations=max_iterations,
+        verbose=verbose,
+    )
+    _, derivative_phases = complex_array_to_rs_dataseries(
+        it_tv_complex_derivative,
+        index=initial_derivative.index,
+    )
+    # combine the determined derivative phases with the input to generate a complete output
+    output_dataset = initial_derivative.copy()
+    output_dataset.phases = derivative_phases
+    return output_dataset, metadata