PyPI - pxmeter - Versions diffs - 0.1.5__tar.gz → 0.1.6__tar.gz - Mend

pxmeter 0.1.5tar.gz → 0.1.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{pxmeter-0.1.5/pxmeter.egg-info → pxmeter-0.1.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pxmeter
-Version: 0.1.5
+Version: 0.1.6
 Summary: PXMeter is a comprehensive toolkit for evaluating the quality of         structures generated by biomolecular structure prediction models.
 Author: Bytedance Inc.
 Author-email: ai4s-bio@bytedance.com

{pxmeter-0.1.5 → pxmeter-0.1.6}/README.md RENAMED Viewed

@@ -124,7 +124,6 @@ If you use PXMeter in your research, please cite the following:
 ## 🚧 Limitations
-- PXMeter supports chain/atom permutations but not residue-level permutations. As a result, the accuracy of evaluation for branched chains, such as glycans, cannot be fully guaranteed.
 - It is recommended to use CIF files from the RCSB PDB as references, as they ensure content accuracy. All development and testing were conducted exclusively on CIF files from this source.

{pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/data/parser.py RENAMED Viewed

@@ -360,6 +360,13 @@ class MMCIFParser:
             # First obtain all altlocs, then filter them
             tmp_altloc = "all"
+        if tmp_altloc == "all":
+            logging.warning(
+                "Bond computation is not supported with `altloc='all'`."
+                "include_bonds will be set to False."
+            )
+            include_bonds = False
         if assembly_id is None:
             atom_array = pdbx.get_structure(
                 pdbx_file=self.cif,

{pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/data/struct.py RENAMED Viewed

@@ -96,6 +96,41 @@ class Structure:
             cif_block=cif_parser.cif.block,
         )
+    @classmethod
+    def from_atom_array(
+        cls,
+        atom_array: AtomArray,
+        entity_poly_seq: dict[str, str],
+        entity_poly_type: dict[str, str],
+        entry_id: str = "",
+        exptl_methods: tuple[str] = tuple(),
+        cif_block: dict = None,
+    ) -> "Structure":
+        """
+        Create a Structure object from MMCIF.
+        Args:
+            mmcif (Path or str): Path to MMCIF file.
+            model (int): Model number. Defaults to 1.
+            altloc (str): It could be one of "all", "first", "occupancy", "A", "B", etc.
+                          Defaults to "first".
+            assembly_id (str, optional): Assembly ID. Defaults to None.
+            include_bonds (bool): Whether to include bonds in the AtomArray. Defaults to True.
+        Returns:
+            Structure: Structure object.
+        """
+        return cls(
+            atom_array=atom_array,
+            entity_poly_seq=entity_poly_seq,
+            entity_poly_type=entity_poly_type,
+            uni_chain_id=get_unique_chain_id(atom_array),
+            uni_atom_id=get_unique_atom_id(atom_array),
+            entry_id=entry_id,
+            exptl_methods=exptl_methods,
+            cif_block=cif_block,
+        )
     def _get_hydrogens_mask(self) -> np.ndarray:
         """
         Get mask of hydrogens.

{pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/data/utils.py RENAMED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 from collections import Counter
+from datetime import datetime
 import biotite.sequence as seq
 import biotite.sequence.align as align
@@ -266,3 +267,59 @@ def get_mol_graph_matches(
         if num >= max_matches:
             break
     return matches
+def get_res_graph_matches(
+    res_graph1: nx.Graph, res_graph2: nx.Graph, max_matches: int = 1000
+) -> list[dict]:
+    """
+    Find subgraph isomorphisms between two residue-level graphs using residue names.
+    This function enumerates mappings where a subgraph of `res_graph1` is isomorphic to
+    (i.e., can be relabeled to match) `res_graph2`. Node equivalence is determined
+    solely by the `"res_name"` node attribute; all other node or edge attributes are ignored.
+    Enumeration stops once `max_matches` mappings have been collected.
+    Args:
+        res_graph1 (nx.Graph): The source (typically larger) residue graph.
+            Node attribute required: ``"res_name"`` (e.g., "ALA", "NAG").
+        res_graph2 (nx.Graph): The target (typically smaller) residue graph to match against.
+            Node attribute required: ``"res_name"``.
+        max_matches (int, optional): Maximum number of mappings to return. Defaults to ``1000``.
+    Returns:
+        list[dict]: A list of node-mapping dicts. Each dict maps node IDs from `res_graph1`
+        (keys) to node IDs in `res_graph2` (values) representing one subgraph isomorphism.
+    """
+    isomatcher = nx.algorithms.isomorphism.GraphMatcher(
+        res_graph1,
+        res_graph2,
+        node_match=lambda x, y: (x["res_name"] == y["res_name"])
+        and (x["atom_names"] == y["atom_names"]),
+    )
+    matches = []
+    num = 0
+    for i in isomatcher.subgraph_isomorphisms_iter():
+        matches.append(i)
+        num += 1
+        if num >= max_matches:
+            break
+    return matches
+def is_valid_date_format(date_string: str) -> bool:
+    """
+    Check if the date string is in the format yyyy-mm-dd.
+    Args:
+        date_string (str): The date string to check.
+    Returns:
+        bool: True if the date string is in the format yyyy-mm-dd, False otherwise.
+    """
+    try:
+        datetime.strptime(date_string, "%Y-%m-%d")
+        return True
+    except ValueError:
+        return False

{pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/mapping.py RENAMED Viewed

@@ -37,6 +37,7 @@ from pxmeter.data.utils import (
 )
 from pxmeter.permutation.atom import AtomPermutation
 from pxmeter.permutation.chain import ChainPermutation
+from pxmeter.permutation.residue import ResiduePermutation
 class MappingCIF:
@@ -968,25 +969,20 @@ class MappingResult:
     ref_struct: Structure
     model_struct: Structure
-    ref_indices: np.ndarray
-    model_indices: np.ndarray
+    mapped_ref_struct: Structure
+    mapped_model_struct: Structure
     chain_mapping: dict[str, str]
     chain_mapping_anchors: dict[str, str]
     model_to_ref_entity_id: dict[str, str]
     def get_mapped_structures(self) -> tuple[Structure, Structure]:
         """
-        Selects and returns substructures from reference and model structures based on specified indices.
+        Returns the mapped reference and model structures.
         Returns:
-            tuple: A tuple containing two substructures:
-                - sele_ref_struct: The selected substructure from the reference structure.
-                - sele_model_struct: The selected substructure from the model structure.
+            tuple[Structure, Structure]: A tuple containing the mapped reference and model structures.
         """
-        sele_ref_struct = self.ref_struct.select_substructure(self.ref_indices)
-        sele_model_struct = self.model_struct.select_substructure(self.model_indices)
-        return sele_ref_struct, sele_model_struct
+        return self.mapped_ref_struct, self.mapped_model_struct
     @classmethod
     def from_cifs(
@@ -997,6 +993,7 @@ class MappingResult:
         ref_altloc: str = "first",
         ref_model: int = 1,
         model_chain_id_to_lig_mol: dict[str, Chem.Mol] | None = None,
+        chain_mapping: dict[str, str] | None = None,
         mapping_config: ConfigDict = RUN_CONFIG.mapping,
     ) -> "MappingResult":
         """
@@ -1010,6 +1007,8 @@ class MappingResult:
             ref_model (int): Model number for the reference structure. Defaults to 1.
             model_chain_id_to_lig_mol (dict[str, Chem.Mol], optional): Mapping of model chain IDs
                 to ligand molecules. Defaults to None.
+            chain_mapping (dict[str, str], optional): Mapping of model chain IDs to reference chain IDs.
+                            Defaults to None.
             mapping_config (ConfigDict, optional): Configuration for the mapping process.
                             Defaults to RUN_CONFIG.mapping.
@@ -1035,23 +1034,51 @@ class MappingResult:
             model_to_ref_entity_id,
             enumerate_all_anchors=mapping_config.enumerate_all_anchors,
         )
-        chain_mapping, chain_mapping_anchors = chain_perm.get_heurisitic_chain_mapping()
+        if not chain_mapping:
+            (
+                chain_mapping,
+                chain_mapping_anchors,
+            ) = chain_perm.get_heurisitic_chain_mapping()
+        else:
+            chain_mapping_anchors = {}
         (
             chain_perm_ref_indices,
             chain_perm_model_indices,
         ) = chain_perm.get_permuted_indices(chain_mapping)
+        chain_permed_ref_struct = map_cif.ref_struct.select_substructure(
+            chain_perm_ref_indices
+        )
+        chain_permed_model_struct = map_cif.model_struct.select_substructure(
+            chain_perm_model_indices
+        )
+        residue_perm = ResiduePermutation(
+            chain_permed_ref_struct,
+            chain_permed_model_struct,
+        )
+        residue_permuted_indices = residue_perm.run()
+        chain_permed_model_struct.reset_atom_array_annot(
+            "coord",
+            chain_permed_model_struct.atom_array.coord[residue_permuted_indices],
+        )
         atom_perm = AtomPermutation(
-            map_cif.ref_struct.select_substructure(chain_perm_ref_indices),
-            map_cif.model_struct.select_substructure(chain_perm_model_indices),
+            chain_permed_ref_struct,
+            chain_permed_model_struct,
         )
         atom_permuted_indices = atom_perm.run()
+        permed_model_struct = chain_permed_model_struct.select_substructure(
+            atom_permuted_indices
+        )
         return cls(
             ref_struct=map_cif.ref_struct,
             model_struct=map_cif.model_struct,
-            ref_indices=chain_perm_ref_indices,
-            model_indices=chain_perm_model_indices[atom_permuted_indices],
+            mapped_ref_struct=chain_permed_ref_struct,
+            mapped_model_struct=permed_model_struct,
             chain_mapping=chain_mapping,
             chain_mapping_anchors=chain_mapping_anchors,
             model_to_ref_entity_id=model_to_ref_entity_id,

{pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/permutation/atom.py RENAMED Viewed

@@ -24,8 +24,8 @@ class AtomPermutation:
     Generating and applying atom permutations based on a reference structure.
     Args:
-            ref_struct (Structure): The reference structure used for permutation generation.
-            model_struct (Structure): The model structure used for permutation application.
+        ref_struct (Structure): The reference structure used for permutation generation.
+        model_struct (Structure): The model structure used for permutation application.
     """
     def __init__(self, ref_struct: Structure, model_struct: Structure):

{pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/permutation/chain.py RENAMED Viewed

@@ -29,11 +29,11 @@ class ChainPermutation:
     already aligned chains.
     Args:
-            ref_struct (Structure): Reference structure object
-            model_struct (Structure): Model structure object
-            model_to_ref_entity_id (dict[str, str]): Mapping of model entity IDs
-                                    to reference entity IDs
-            enumerate_all_anchors (bool): Whether to enumerate all anchor chains.
+        ref_struct (Structure): Reference structure object
+        model_struct (Structure): Model structure object
+        model_to_ref_entity_id (dict[str, str]): Mapping of model entity IDs
+                                to reference entity IDs
+        enumerate_all_anchors (bool): Whether to enumerate all anchor chains.
     """
     def __init__(

pxmeter-0.1.6/pxmeter/permutation/residue.py ADDED Viewed

@@ -0,0 +1,267 @@
+# Copyright 2025 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import networkx as nx
+import numpy as np
+from pxmeter.data.struct import Structure
+from pxmeter.data.utils import get_res_graph_matches
+from pxmeter.metrics.rmsd import align_src_to_tar, apply_transform, rmsd
+class ResiduePermutation:
+    """
+    Generating and applying residue permutations based on a reference structure.
+    Args:
+        ref_struct (Structure): The reference structure used for permutation generation.
+        model_struct (Structure): The model structure used for permutation application.
+    """
+    def __init__(self, ref_struct: Structure, model_struct: Structure):
+        self.ref_struct = ref_struct
+        self.model_struct = model_struct
+    @staticmethod
+    def _calc_residue_centers(
+        res_ids: np.ndarray, coords: np.ndarray
+    ) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Compute the geometric center (mean coordinate) for each unique residue ID.
+        Args:
+            res_ids (np.ndarray): Array of residue IDs, shape (N_atom,)
+            coords (np.ndarray): Cartesian coordinates, shape (N_atom, 3)
+        Returns:
+            Tuple[np.ndarray, np.ndarray]: (uniq_res_ids, centers) where
+            uniq_res_ids shape (N_res,), centers shape (N_res, 3) in the same order.
+        """
+        res_ids = np.asarray(res_ids)
+        coords = np.asarray(coords)
+        if (
+            res_ids.ndim != 1
+            or coords.ndim != 2
+            or coords.shape[1] != 3
+            or len(res_ids) != len(coords)
+        ):
+            raise ValueError("Shape mismatch: res_ids (N,), coords (N, 3) required.")
+        uniq_ids, inv = np.unique(res_ids, return_inverse=True)
+        centers = np.zeros((len(uniq_ids), 3), dtype=float)
+        counts = np.bincount(inv).astype(float)
+        for i in range(3):
+            centers[:, i] = np.bincount(inv, weights=coords[:, i])
+        centers /= counts[:, None]
+        return uniq_ids, centers
+    @staticmethod
+    def _get_branch_residue_permutations(
+        struct: Structure, chain_id: str
+    ) -> np.ndarray | None:
+        """
+        Detect branch-like connectivity within a chain using non-adjacent residue bonds and,
+        if the induced residue-level graph is a single tree, return residue permutations
+        corresponding to its graph automorphisms.
+        The procedure:
+        1) Filter atoms by ``chain_id`` and collect inter-residue bonds from the chain's
+            atom-level ``BondList``.
+        2) If any inter-residue bond connects residues whose numeric IDs differ by more than 1
+            (``|res_id_i - res_id_j| > 1``), mark the chain as having a branch-like connection.
+        3) Lift inter-residue bonds to a residue-level undirected graph G (nodes = ``res_id``,
+            edges = covalent connections between residues).
+        4) If G is disconnected or contains cycles, return ``None`` (only tree-shaped branches
+            are supported).
+        5) Annotate nodes with ``res_name`` and enumerate automorphisms via subgraph isomorphism
+            of G onto itself (constrained by equal ``res_name``). Return their induced permutations.
+        Args:
+            struct (Structure): A structure object exposing ``atom_array`` with fields
+                ``res_id``, ``res_name``, ``uni_chain_id``, and ``bonds``; and where
+                ``atom_array.bonds[mask].as_array()`` yields an ``(n_bond, 2)`` integer array
+                of atom index pairs for the selected chain.
+            chain_id (str): The target chain identifier matched against ``uni_chain_id``.
+        Returns:
+            np.ndarray | None: ``None`` if no branch-like non-adjacent residue bond is detected,
+            or if the residue graph is not a single tree. Otherwise an integer array of shape
+            ``(K, N)`` where each row encodes one automorphism as a permutation of the ``N``
+            residue nodes (ordered by ascending source node id). ``K`` is the number of
+            automorphisms found (capped internally at 1000).
+        """
+        mask = struct.uni_chain_id == chain_id
+        arr = struct.atom_array
+        if not np.any(mask):
+            return
+        bond_arr = arr.bonds[mask].as_array()
+        res_id_i = arr.res_id[mask][bond_arr[:, 0]]
+        res_id_j = arr.res_id[mask][bond_arr[:, 1]]
+        res_id_pairs = set(tuple(zip(res_id_i, res_id_j)))
+        has_branch = False
+        nodes_adj = set()
+        for i, j in res_id_pairs:
+            if i == j:
+                continue
+            nodes_adj.add((i, j))
+            if abs(i - j) > 1:
+                has_branch = True
+        if has_branch:
+            G = nx.Graph()
+            G.add_edges_from(nodes_adj)
+            if (
+                nx.number_connected_components(G) > 1
+                or len(nx.cycle_basis(G)) > 0
+                or (1 not in G.nodes)
+            ):
+                return
+            attrs = {}
+            for node in G.nodes:
+                node_res_name = arr.res_name[mask][arr.res_id[mask] == node][0]
+                node_atom_names = "_".join(
+                    arr.atom_name[mask][arr.res_id[mask] == node]
+                )
+                if node == 1:
+                    # Do not permute the root residue
+                    node_res_name += "_root"
+                attrs[node] = {"res_name": node_res_name, "atom_names": node_atom_names}
+            nx.set_node_attributes(G, attrs)
+            matches = get_res_graph_matches(G, G, max_matches=1000)
+            perm = []
+            for match in matches:
+                sorted_result = sorted(match.items(), key=lambda x: x[0])
+                match_values = [i[1] for i in sorted_result]
+                if match_values[0] != 1:
+                    continue
+                perm.append(match_values)
+            if len(perm) > 1:
+                perm = np.array(perm)
+                return perm
+    def _get_optimal_perm_ids_for_chain(self, chain_id: str) -> np.ndarray | None:
+        """
+        Compute the residue-ID permutation for a branch-like chain that best aligns
+        the model to the reference, measured by centroid RMSD.
+        Steps:
+            1) Detects residue-level graph automorphisms for the chain (if the chain
+                exhibits non-adjacent inter-residue bonds indicating a branch-like tree).
+            2) Treats residue 1 as fixed (root) to define the rigid alignment between
+                the model and the reference using root-atom coordinates.
+            3) For each candidate permutation of residue IDs, applies the rigid
+                transform to model residue centroids and computes RMSD to the reference
+                residue centroids.
+            4) Returns the residue-ID permutation that minimizes this RMSD.
+        Args:
+        chain_id (str): Target chain identifier to evaluate.
+        Returns:
+            np.ndarray | None: If the chain has a valid branch-like tree and at least
+                one non-trivial automorphism, returns an integer array of shape (N,)
+                containing the residue IDs in the selected order (1-based, matching the
+                original residue numbering). Returns ``None`` if no branch-like structure
+                is detected or no valid permutations are found.
+        """
+        perm = self._get_branch_residue_permutations(self.model_struct, chain_id)
+        if perm is None:
+            return
+        chain_mask = self.model_struct.uni_chain_id == chain_id
+        # Use the residue 1 as the root
+        root_coord_mask = chain_mask & (self.model_struct.atom_array.res_id == 1)
+        model_root = self.model_struct.atom_array.coord[root_coord_mask]
+        ref_root = self.ref_struct.atom_array.coord[root_coord_mask]
+        if (len(model_root) == 0) or (len(ref_root) == 0):
+            return
+        assert model_root.shape == ref_root.shape
+        rot, trans = align_src_to_tar(model_root, ref_root)
+        _ref_ids, ref_centers = self._calc_residue_centers(
+            self.ref_struct.atom_array.res_id[chain_mask],
+            self.ref_struct.atom_array.coord[chain_mask],
+        )
+        model_ids, model_centers = self._calc_residue_centers(
+            self.model_struct.atom_array.res_id[chain_mask],
+            self.model_struct.atom_array.coord[chain_mask],
+        )
+        model_pos = {rid: i for i, rid in enumerate(model_ids)}
+        best_perm = None
+        best_rmsd = np.inf
+        for ids in perm:
+            ordered = np.array([model_pos[i] for i in ids], dtype=int)
+            model_mat = model_centers[ordered]
+            transformed = apply_transform(model_mat, rot, trans)
+            v = rmsd(transformed, ref_centers)
+            if v < best_rmsd:
+                best_rmsd = v
+                best_perm = ids
+        return best_perm
+    def run(self):
+        """
+        Reorder model atoms within non-polymer chains according to the
+        RMSD-optimal residue-ID permutation per chain.
+        For each non-polymer entity and its chains:
+            - Detect branch-like residue graphs and enumerate automorphisms.
+            - Select the permutation of residue IDs that minimizes centroid RMSD
+                to the reference (via rigid alignment anchored at residue 1).
+            - Stably reorder atom indices of that chain so atoms follow the selected
+                residue-ID order (preserving within-residue atom order).
+        Returns:
+            np.ndarray: A 1-D integer array of length,
+                representing the remapped atom indices.
+        """
+        model_index = np.arange(len(self.model_struct.atom_array))
+        model_entity_id_to_chain_ids = self.model_struct.get_entity_id_to_chain_ids()
+        for entity_id, chain_ids in model_entity_id_to_chain_ids.items():
+            if entity_id in self.model_struct.entity_poly_type:
+                # Skip polymer
+                continue
+            for chain_id in chain_ids:
+                optimal_perm_ids = self._get_optimal_perm_ids_for_chain(chain_id)
+                if optimal_perm_ids is None:
+                    continue
+                chain_mask = self.model_struct.uni_chain_id == chain_id
+                model_chain_index = model_index[chain_mask]
+                sorted_atom_index = np.concatenate(
+                    [
+                        model_chain_index[
+                            self.model_struct.atom_array.res_id[model_chain_index] == i
+                        ]
+                        for i in optimal_perm_ids
+                    ]
+                )
+                model_index[chain_mask] = sorted_atom_index
+        return model_index

{pxmeter-0.1.5 → pxmeter-0.1.6/pxmeter.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pxmeter
-Version: 0.1.5
+Version: 0.1.6
 Summary: PXMeter is a comprehensive toolkit for evaluating the quality of         structures generated by biomolecular structure prediction models.
 Author: Bytedance Inc.
 Author-email: ai4s-bio@bytedance.com

{pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter.egg-info/SOURCES.txt RENAMED Viewed

@@ -32,4 +32,5 @@ pxmeter/metrics/rmsd.py
 pxmeter/metrics/rmsd_metrics.py
 pxmeter/permutation/__init__.py
 pxmeter/permutation/atom.py
-pxmeter/permutation/chain.py
+pxmeter/permutation/chain.py
+pxmeter/permutation/residue.py

{pxmeter-0.1.5 → pxmeter-0.1.6}/setup.py RENAMED Viewed

@@ -20,7 +20,7 @@ with open("requirements.txt") as f:
 setup(
     name="pxmeter",
     python_requires=">=3.11",
-    version="0.1.5",
+    version="0.1.6",
     description="PXMeter is a comprehensive toolkit for evaluating the quality of \
         structures generated by biomolecular structure prediction models.",
     author="Bytedance Inc.",