pxmeter 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {pxmeter-0.1.4/pxmeter.egg-info → pxmeter-0.1.6}/PKG-INFO +1 -1
  2. {pxmeter-0.1.4 → pxmeter-0.1.6}/README.md +0 -1
  3. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/configs/data_config.py +10 -15
  4. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/data/parser.py +7 -0
  5. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/data/struct.py +35 -0
  6. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/data/utils.py +57 -0
  7. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/mapping.py +42 -15
  8. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/permutation/atom.py +2 -2
  9. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/permutation/chain.py +5 -5
  10. pxmeter-0.1.6/pxmeter/permutation/residue.py +267 -0
  11. {pxmeter-0.1.4 → pxmeter-0.1.6/pxmeter.egg-info}/PKG-INFO +1 -1
  12. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter.egg-info/SOURCES.txt +2 -1
  13. {pxmeter-0.1.4 → pxmeter-0.1.6}/setup.py +1 -1
  14. {pxmeter-0.1.4 → pxmeter-0.1.6}/LICENSE +0 -0
  15. {pxmeter-0.1.4 → pxmeter-0.1.6}/MANIFEST.in +0 -0
  16. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/__init__.py +0 -0
  17. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/calc_metric.py +0 -0
  18. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/cli.py +0 -0
  19. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/configs/__init__.py +0 -0
  20. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/configs/run_config.py +0 -0
  21. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/constants.py +0 -0
  22. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/data/__init__.py +0 -0
  23. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/data/ccd.py +0 -0
  24. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/data/writer.py +0 -0
  25. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/eval.py +0 -0
  26. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/metrics/__init__.py +0 -0
  27. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/metrics/clashes.py +0 -0
  28. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/metrics/lddt_metrics.py +0 -0
  29. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/metrics/rmsd.py +0 -0
  30. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/metrics/rmsd_metrics.py +0 -0
  31. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/permutation/__init__.py +0 -0
  32. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter/utils.py +0 -0
  33. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter.egg-info/dependency_links.txt +0 -0
  34. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter.egg-info/entry_points.txt +0 -0
  35. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter.egg-info/requires.txt +0 -0
  36. {pxmeter-0.1.4 → pxmeter-0.1.6}/pxmeter.egg-info/top_level.txt +0 -0
  37. {pxmeter-0.1.4 → pxmeter-0.1.6}/requirements.txt +0 -0
  38. {pxmeter-0.1.4 → pxmeter-0.1.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pxmeter
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: PXMeter is a comprehensive toolkit for evaluating the quality of structures generated by biomolecular structure prediction models.
5
5
  Author: Bytedance Inc.
6
6
  Author-email: ai4s-bio@bytedance.com
@@ -124,7 +124,6 @@ If you use PXMeter in your research, please cite the following:
124
124
 
125
125
 
126
126
  ## 🚧 Limitations
127
- - PXMeter supports chain/atom permutations but not residue-level permutations. As a result, the accuracy of evaluation for branched chains, such as glycans, cannot be fully guaranteed.
128
127
  - It is recommended to use CIF files from the RCSB PDB as references, as they ensure content accuracy. All development and testing were conducted exclusively on CIF files from this source.
129
128
 
130
129
 
@@ -12,13 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import gzip
15
16
  import json
16
17
  import logging
17
18
  import os
18
- import subprocess as sp
19
19
  from pathlib import Path
20
20
 
21
21
  import gemmi
22
+ import requests
22
23
 
23
24
  logging.basicConfig(level=logging.INFO)
24
25
 
@@ -30,27 +31,21 @@ def download_ccd_cif(output_path: Path):
30
31
  Args:
31
32
  output_path (Path): The output path for saving the downloaded CCD CIF file.
32
33
  """
33
- output_path.parent.mkdir(parents=True, exist_ok=True)
34
+ output_path.mkdir(parents=True, exist_ok=True)
34
35
 
35
36
  logging.info("Downloading CCD CIF file from rcsb.org ...")
36
37
 
37
- output_cif_gz = output_path / "components.cif.gz"
38
- if output_cif_gz.exists():
39
- logging.info("Remove old zipped CCD CIF file: %s", output_cif_gz)
40
- output_cif_gz.unlink()
41
-
42
- output_cif = output_cif_gz.with_suffix("")
38
+ output_cif = output_path / "components.cif"
43
39
  if output_cif.exists():
44
40
  logging.info("Remove old CCD CIF file: %s", output_cif)
45
41
  output_cif.unlink()
46
42
 
47
- sp.run(
48
- f"wget https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz -P {output_path}",
49
- shell=True,
50
- check=True,
51
- )
52
-
53
- sp.run(f"gunzip -d {output_cif_gz}", shell=True, check=True)
43
+ url = "https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
44
+ with requests.get(url, stream=True, timeout=60) as r:
45
+ r.raise_for_status()
46
+ with gzip.GzipFile(fileobj=r.raw) as f_in, output_cif.open("wb") as f_out:
47
+ for chunk in iter(lambda: f_in.read(8192), b""):
48
+ f_out.write(chunk)
54
49
 
55
50
  logging.info("Download CCD CIF file successfully: %s", output_cif)
56
51
 
@@ -360,6 +360,13 @@ class MMCIFParser:
360
360
  # First obtain all altlocs, then filter them
361
361
  tmp_altloc = "all"
362
362
 
363
+ if tmp_altloc == "all":
364
+ logging.warning(
365
+ "Bond computation is not supported with `altloc='all'`."
366
+ "include_bonds will be set to False."
367
+ )
368
+ include_bonds = False
369
+
363
370
  if assembly_id is None:
364
371
  atom_array = pdbx.get_structure(
365
372
  pdbx_file=self.cif,
@@ -96,6 +96,41 @@ class Structure:
96
96
  cif_block=cif_parser.cif.block,
97
97
  )
98
98
 
99
+ @classmethod
100
+ def from_atom_array(
101
+ cls,
102
+ atom_array: AtomArray,
103
+ entity_poly_seq: dict[str, str],
104
+ entity_poly_type: dict[str, str],
105
+ entry_id: str = "",
106
+ exptl_methods: tuple[str] = tuple(),
107
+ cif_block: dict = None,
108
+ ) -> "Structure":
109
+ """
110
+ Create a Structure object from MMCIF.
111
+
112
+ Args:
113
+ mmcif (Path or str): Path to MMCIF file.
114
+ model (int): Model number. Defaults to 1.
115
+ altloc (str): It could be one of "all", "first", "occupancy", "A", "B", etc.
116
+ Defaults to "first".
117
+ assembly_id (str, optional): Assembly ID. Defaults to None.
118
+ include_bonds (bool): Whether to include bonds in the AtomArray. Defaults to True.
119
+
120
+ Returns:
121
+ Structure: Structure object.
122
+ """
123
+ return cls(
124
+ atom_array=atom_array,
125
+ entity_poly_seq=entity_poly_seq,
126
+ entity_poly_type=entity_poly_type,
127
+ uni_chain_id=get_unique_chain_id(atom_array),
128
+ uni_atom_id=get_unique_atom_id(atom_array),
129
+ entry_id=entry_id,
130
+ exptl_methods=exptl_methods,
131
+ cif_block=cif_block,
132
+ )
133
+
99
134
  def _get_hydrogens_mask(self) -> np.ndarray:
100
135
  """
101
136
  Get mask of hydrogens.
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from collections import Counter
16
+ from datetime import datetime
16
17
 
17
18
  import biotite.sequence as seq
18
19
  import biotite.sequence.align as align
@@ -266,3 +267,59 @@ def get_mol_graph_matches(
266
267
  if num >= max_matches:
267
268
  break
268
269
  return matches
270
+
271
+
272
+ def get_res_graph_matches(
273
+ res_graph1: nx.Graph, res_graph2: nx.Graph, max_matches: int = 1000
274
+ ) -> list[dict]:
275
+ """
276
+ Find subgraph isomorphisms between two residue-level graphs using residue names.
277
+
278
+ This function enumerates mappings where a subgraph of `res_graph1` is isomorphic to
279
+ (i.e., can be relabeled to match) `res_graph2`. Node equivalence is determined
280
+ solely by the `"res_name"` node attribute; all other node or edge attributes are ignored.
281
+ Enumeration stops once `max_matches` mappings have been collected.
282
+
283
+ Args:
284
+ res_graph1 (nx.Graph): The source (typically larger) residue graph.
285
+ Node attribute required: ``"res_name"`` (e.g., "ALA", "NAG").
286
+ res_graph2 (nx.Graph): The target (typically smaller) residue graph to match against.
287
+ Node attribute required: ``"res_name"``.
288
+ max_matches (int, optional): Maximum number of mappings to return. Defaults to ``1000``.
289
+
290
+ Returns:
291
+ list[dict]: A list of node-mapping dicts. Each dict maps node IDs from `res_graph1`
292
+ (keys) to node IDs in `res_graph2` (values) representing one subgraph isomorphism.
293
+ """
294
+ isomatcher = nx.algorithms.isomorphism.GraphMatcher(
295
+ res_graph1,
296
+ res_graph2,
297
+ node_match=lambda x, y: (x["res_name"] == y["res_name"])
298
+ and (x["atom_names"] == y["atom_names"]),
299
+ )
300
+
301
+ matches = []
302
+ num = 0
303
+ for i in isomatcher.subgraph_isomorphisms_iter():
304
+ matches.append(i)
305
+ num += 1
306
+ if num >= max_matches:
307
+ break
308
+ return matches
309
+
310
+
311
+ def is_valid_date_format(date_string: str) -> bool:
312
+ """
313
+ Check if the date string is in the format yyyy-mm-dd.
314
+
315
+ Args:
316
+ date_string (str): The date string to check.
317
+
318
+ Returns:
319
+ bool: True if the date string is in the format yyyy-mm-dd, False otherwise.
320
+ """
321
+ try:
322
+ datetime.strptime(date_string, "%Y-%m-%d")
323
+ return True
324
+ except ValueError:
325
+ return False
@@ -37,6 +37,7 @@ from pxmeter.data.utils import (
37
37
  )
38
38
  from pxmeter.permutation.atom import AtomPermutation
39
39
  from pxmeter.permutation.chain import ChainPermutation
40
+ from pxmeter.permutation.residue import ResiduePermutation
40
41
 
41
42
 
42
43
  class MappingCIF:
@@ -968,25 +969,20 @@ class MappingResult:
968
969
 
969
970
  ref_struct: Structure
970
971
  model_struct: Structure
971
- ref_indices: np.ndarray
972
- model_indices: np.ndarray
972
+ mapped_ref_struct: Structure
973
+ mapped_model_struct: Structure
973
974
  chain_mapping: dict[str, str]
974
975
  chain_mapping_anchors: dict[str, str]
975
976
  model_to_ref_entity_id: dict[str, str]
976
977
 
977
978
  def get_mapped_structures(self) -> tuple[Structure, Structure]:
978
979
  """
979
- Selects and returns substructures from reference and model structures based on specified indices.
980
+ Returns the mapped reference and model structures.
980
981
 
981
982
  Returns:
982
- tuple: A tuple containing two substructures:
983
- - sele_ref_struct: The selected substructure from the reference structure.
984
- - sele_model_struct: The selected substructure from the model structure.
983
+ tuple[Structure, Structure]: A tuple containing the mapped reference and model structures.
985
984
  """
986
-
987
- sele_ref_struct = self.ref_struct.select_substructure(self.ref_indices)
988
- sele_model_struct = self.model_struct.select_substructure(self.model_indices)
989
- return sele_ref_struct, sele_model_struct
985
+ return self.mapped_ref_struct, self.mapped_model_struct
990
986
 
991
987
  @classmethod
992
988
  def from_cifs(
@@ -997,6 +993,7 @@ class MappingResult:
997
993
  ref_altloc: str = "first",
998
994
  ref_model: int = 1,
999
995
  model_chain_id_to_lig_mol: dict[str, Chem.Mol] | None = None,
996
+ chain_mapping: dict[str, str] | None = None,
1000
997
  mapping_config: ConfigDict = RUN_CONFIG.mapping,
1001
998
  ) -> "MappingResult":
1002
999
  """
@@ -1010,6 +1007,8 @@ class MappingResult:
1010
1007
  ref_model (int): Model number for the reference structure. Defaults to 1.
1011
1008
  model_chain_id_to_lig_mol (dict[str, Chem.Mol], optional): Mapping of model chain IDs
1012
1009
  to ligand molecules. Defaults to None.
1010
+ chain_mapping (dict[str, str], optional): Mapping of model chain IDs to reference chain IDs.
1011
+ Defaults to None.
1013
1012
  mapping_config (ConfigDict, optional): Configuration for the mapping process.
1014
1013
  Defaults to RUN_CONFIG.mapping.
1015
1014
 
@@ -1035,23 +1034,51 @@ class MappingResult:
1035
1034
  model_to_ref_entity_id,
1036
1035
  enumerate_all_anchors=mapping_config.enumerate_all_anchors,
1037
1036
  )
1038
- chain_mapping, chain_mapping_anchors = chain_perm.get_heurisitic_chain_mapping()
1037
+
1038
+ if not chain_mapping:
1039
+ (
1040
+ chain_mapping,
1041
+ chain_mapping_anchors,
1042
+ ) = chain_perm.get_heurisitic_chain_mapping()
1043
+ else:
1044
+ chain_mapping_anchors = {}
1045
+
1039
1046
  (
1040
1047
  chain_perm_ref_indices,
1041
1048
  chain_perm_model_indices,
1042
1049
  ) = chain_perm.get_permuted_indices(chain_mapping)
1043
1050
 
1051
+ chain_permed_ref_struct = map_cif.ref_struct.select_substructure(
1052
+ chain_perm_ref_indices
1053
+ )
1054
+ chain_permed_model_struct = map_cif.model_struct.select_substructure(
1055
+ chain_perm_model_indices
1056
+ )
1057
+
1058
+ residue_perm = ResiduePermutation(
1059
+ chain_permed_ref_struct,
1060
+ chain_permed_model_struct,
1061
+ )
1062
+ residue_permuted_indices = residue_perm.run()
1063
+ chain_permed_model_struct.reset_atom_array_annot(
1064
+ "coord",
1065
+ chain_permed_model_struct.atom_array.coord[residue_permuted_indices],
1066
+ )
1067
+
1044
1068
  atom_perm = AtomPermutation(
1045
- map_cif.ref_struct.select_substructure(chain_perm_ref_indices),
1046
- map_cif.model_struct.select_substructure(chain_perm_model_indices),
1069
+ chain_permed_ref_struct,
1070
+ chain_permed_model_struct,
1047
1071
  )
1048
1072
  atom_permuted_indices = atom_perm.run()
1073
+ permed_model_struct = chain_permed_model_struct.select_substructure(
1074
+ atom_permuted_indices
1075
+ )
1049
1076
 
1050
1077
  return cls(
1051
1078
  ref_struct=map_cif.ref_struct,
1052
1079
  model_struct=map_cif.model_struct,
1053
- ref_indices=chain_perm_ref_indices,
1054
- model_indices=chain_perm_model_indices[atom_permuted_indices],
1080
+ mapped_ref_struct=chain_permed_ref_struct,
1081
+ mapped_model_struct=permed_model_struct,
1055
1082
  chain_mapping=chain_mapping,
1056
1083
  chain_mapping_anchors=chain_mapping_anchors,
1057
1084
  model_to_ref_entity_id=model_to_ref_entity_id,
@@ -24,8 +24,8 @@ class AtomPermutation:
24
24
  Generating and applying atom permutations based on a reference structure.
25
25
 
26
26
  Args:
27
- ref_struct (Structure): The reference structure used for permutation generation.
28
- model_struct (Structure): The model structure used for permutation application.
27
+ ref_struct (Structure): The reference structure used for permutation generation.
28
+ model_struct (Structure): The model structure used for permutation application.
29
29
  """
30
30
 
31
31
  def __init__(self, ref_struct: Structure, model_struct: Structure):
@@ -29,11 +29,11 @@ class ChainPermutation:
29
29
  already aligned chains.
30
30
 
31
31
  Args:
32
- ref_struct (Structure): Reference structure object
33
- model_struct (Structure): Model structure object
34
- model_to_ref_entity_id (dict[str, str]): Mapping of model entity IDs
35
- to reference entity IDs
36
- enumerate_all_anchors (bool): Whether to enumerate all anchor chains.
32
+ ref_struct (Structure): Reference structure object
33
+ model_struct (Structure): Model structure object
34
+ model_to_ref_entity_id (dict[str, str]): Mapping of model entity IDs
35
+ to reference entity IDs
36
+ enumerate_all_anchors (bool): Whether to enumerate all anchor chains.
37
37
  """
38
38
 
39
39
  def __init__(
@@ -0,0 +1,267 @@
1
+ # Copyright 2025 ByteDance and/or its affiliates.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import networkx as nx
16
+ import numpy as np
17
+
18
+ from pxmeter.data.struct import Structure
19
+ from pxmeter.data.utils import get_res_graph_matches
20
+ from pxmeter.metrics.rmsd import align_src_to_tar, apply_transform, rmsd
21
+
22
+
23
+ class ResiduePermutation:
24
+ """
25
+ Generating and applying residue permutations based on a reference structure.
26
+
27
+ Args:
28
+ ref_struct (Structure): The reference structure used for permutation generation.
29
+ model_struct (Structure): The model structure used for permutation application.
30
+ """
31
+
32
+ def __init__(self, ref_struct: Structure, model_struct: Structure):
33
+ self.ref_struct = ref_struct
34
+ self.model_struct = model_struct
35
+
36
+ @staticmethod
37
+ def _calc_residue_centers(
38
+ res_ids: np.ndarray, coords: np.ndarray
39
+ ) -> tuple[np.ndarray, np.ndarray]:
40
+ """
41
+ Compute the geometric center (mean coordinate) for each unique residue ID.
42
+
43
+ Args:
44
+ res_ids (np.ndarray): Array of residue IDs, shape (N_atom,)
45
+ coords (np.ndarray): Cartesian coordinates, shape (N_atom, 3)
46
+
47
+ Returns:
48
+ Tuple[np.ndarray, np.ndarray]: (uniq_res_ids, centers) where
49
+ uniq_res_ids shape (N_res,), centers shape (N_res, 3) in the same order.
50
+ """
51
+ res_ids = np.asarray(res_ids)
52
+ coords = np.asarray(coords)
53
+ if (
54
+ res_ids.ndim != 1
55
+ or coords.ndim != 2
56
+ or coords.shape[1] != 3
57
+ or len(res_ids) != len(coords)
58
+ ):
59
+ raise ValueError("Shape mismatch: res_ids (N,), coords (N, 3) required.")
60
+
61
+ uniq_ids, inv = np.unique(res_ids, return_inverse=True)
62
+ centers = np.zeros((len(uniq_ids), 3), dtype=float)
63
+ counts = np.bincount(inv).astype(float)
64
+ for i in range(3):
65
+ centers[:, i] = np.bincount(inv, weights=coords[:, i])
66
+ centers /= counts[:, None]
67
+ return uniq_ids, centers
68
+
69
+ @staticmethod
70
+ def _get_branch_residue_permutations(
71
+ struct: Structure, chain_id: str
72
+ ) -> np.ndarray | None:
73
+ """
74
+ Detect branch-like connectivity within a chain using non-adjacent residue bonds and,
75
+ if the induced residue-level graph is a single tree, return residue permutations
76
+ corresponding to its graph automorphisms.
77
+
78
+ The procedure:
79
+ 1) Filter atoms by ``chain_id`` and collect inter-residue bonds from the chain's
80
+ atom-level ``BondList``.
81
+ 2) If any inter-residue bond connects residues whose numeric IDs differ by more than 1
82
+ (``|res_id_i - res_id_j| > 1``), mark the chain as having a branch-like connection.
83
+ 3) Lift inter-residue bonds to a residue-level undirected graph G (nodes = ``res_id``,
84
+ edges = covalent connections between residues).
85
+ 4) If G is disconnected or contains cycles, return ``None`` (only tree-shaped branches
86
+ are supported).
87
+ 5) Annotate nodes with ``res_name`` and enumerate automorphisms via subgraph isomorphism
88
+ of G onto itself (constrained by equal ``res_name``). Return their induced permutations.
89
+
90
+ Args:
91
+ struct (Structure): A structure object exposing ``atom_array`` with fields
92
+ ``res_id``, ``res_name``, ``uni_chain_id``, and ``bonds``; and where
93
+ ``atom_array.bonds[mask].as_array()`` yields an ``(n_bond, 2)`` integer array
94
+ of atom index pairs for the selected chain.
95
+ chain_id (str): The target chain identifier matched against ``uni_chain_id``.
96
+
97
+ Returns:
98
+ np.ndarray | None: ``None`` if no branch-like non-adjacent residue bond is detected,
99
+ or if the residue graph is not a single tree. Otherwise an integer array of shape
100
+ ``(K, N)`` where each row encodes one automorphism as a permutation of the ``N``
101
+ residue nodes (ordered by ascending source node id). ``K`` is the number of
102
+ automorphisms found (capped internally at 1000).
103
+ """
104
+ mask = struct.uni_chain_id == chain_id
105
+ arr = struct.atom_array
106
+
107
+ if not np.any(mask):
108
+ return
109
+
110
+ bond_arr = arr.bonds[mask].as_array()
111
+
112
+ res_id_i = arr.res_id[mask][bond_arr[:, 0]]
113
+ res_id_j = arr.res_id[mask][bond_arr[:, 1]]
114
+
115
+ res_id_pairs = set(tuple(zip(res_id_i, res_id_j)))
116
+ has_branch = False
117
+ nodes_adj = set()
118
+ for i, j in res_id_pairs:
119
+ if i == j:
120
+ continue
121
+ nodes_adj.add((i, j))
122
+
123
+ if abs(i - j) > 1:
124
+ has_branch = True
125
+
126
+ if has_branch:
127
+ G = nx.Graph()
128
+ G.add_edges_from(nodes_adj)
129
+ if (
130
+ nx.number_connected_components(G) > 1
131
+ or len(nx.cycle_basis(G)) > 0
132
+ or (1 not in G.nodes)
133
+ ):
134
+ return
135
+
136
+ attrs = {}
137
+ for node in G.nodes:
138
+ node_res_name = arr.res_name[mask][arr.res_id[mask] == node][0]
139
+ node_atom_names = "_".join(
140
+ arr.atom_name[mask][arr.res_id[mask] == node]
141
+ )
142
+
143
+ if node == 1:
144
+ # Do not permute the root residue
145
+ node_res_name += "_root"
146
+
147
+ attrs[node] = {"res_name": node_res_name, "atom_names": node_atom_names}
148
+ nx.set_node_attributes(G, attrs)
149
+ matches = get_res_graph_matches(G, G, max_matches=1000)
150
+
151
+ perm = []
152
+ for match in matches:
153
+ sorted_result = sorted(match.items(), key=lambda x: x[0])
154
+ match_values = [i[1] for i in sorted_result]
155
+ if match_values[0] != 1:
156
+ continue
157
+ perm.append(match_values)
158
+
159
+ if len(perm) > 1:
160
+ perm = np.array(perm)
161
+ return perm
162
+
163
+ def _get_optimal_perm_ids_for_chain(self, chain_id: str) -> np.ndarray | None:
164
+ """
165
+ Compute the residue-ID permutation for a branch-like chain that best aligns
166
+ the model to the reference, measured by centroid RMSD.
167
+
168
+ Steps:
169
+ 1) Detects residue-level graph automorphisms for the chain (if the chain
170
+ exhibits non-adjacent inter-residue bonds indicating a branch-like tree).
171
+ 2) Treats residue 1 as fixed (root) to define the rigid alignment between
172
+ the model and the reference using root-atom coordinates.
173
+ 3) For each candidate permutation of residue IDs, applies the rigid
174
+ transform to model residue centroids and computes RMSD to the reference
175
+ residue centroids.
176
+ 4) Returns the residue-ID permutation that minimizes this RMSD.
177
+
178
+ Args:
179
+ chain_id (str): Target chain identifier to evaluate.
180
+
181
+ Returns:
182
+ np.ndarray | None: If the chain has a valid branch-like tree and at least
183
+ one non-trivial automorphism, returns an integer array of shape (N,)
184
+ containing the residue IDs in the selected order (1-based, matching the
185
+ original residue numbering). Returns ``None`` if no branch-like structure
186
+ is detected or no valid permutations are found.
187
+ """
188
+ perm = self._get_branch_residue_permutations(self.model_struct, chain_id)
189
+ if perm is None:
190
+ return
191
+
192
+ chain_mask = self.model_struct.uni_chain_id == chain_id
193
+
194
+ # Use the residue 1 as the root
195
+ root_coord_mask = chain_mask & (self.model_struct.atom_array.res_id == 1)
196
+ model_root = self.model_struct.atom_array.coord[root_coord_mask]
197
+ ref_root = self.ref_struct.atom_array.coord[root_coord_mask]
198
+ if (len(model_root) == 0) or (len(ref_root) == 0):
199
+ return
200
+
201
+ assert model_root.shape == ref_root.shape
202
+
203
+ rot, trans = align_src_to_tar(model_root, ref_root)
204
+
205
+ _ref_ids, ref_centers = self._calc_residue_centers(
206
+ self.ref_struct.atom_array.res_id[chain_mask],
207
+ self.ref_struct.atom_array.coord[chain_mask],
208
+ )
209
+ model_ids, model_centers = self._calc_residue_centers(
210
+ self.model_struct.atom_array.res_id[chain_mask],
211
+ self.model_struct.atom_array.coord[chain_mask],
212
+ )
213
+ model_pos = {rid: i for i, rid in enumerate(model_ids)}
214
+
215
+ best_perm = None
216
+ best_rmsd = np.inf
217
+ for ids in perm:
218
+ ordered = np.array([model_pos[i] for i in ids], dtype=int)
219
+ model_mat = model_centers[ordered]
220
+
221
+ transformed = apply_transform(model_mat, rot, trans)
222
+ v = rmsd(transformed, ref_centers)
223
+ if v < best_rmsd:
224
+ best_rmsd = v
225
+ best_perm = ids
226
+
227
+ return best_perm
228
+
229
+ def run(self):
230
+ """
231
+ Reorder model atoms within non-polymer chains according to the
232
+ RMSD-optimal residue-ID permutation per chain.
233
+
234
+ For each non-polymer entity and its chains:
235
+ - Detect branch-like residue graphs and enumerate automorphisms.
236
+ - Select the permutation of residue IDs that minimizes centroid RMSD
237
+ to the reference (via rigid alignment anchored at residue 1).
238
+ - Stably reorder atom indices of that chain so atoms follow the selected
239
+ residue-ID order (preserving within-residue atom order).
240
+
241
+ Returns:
242
+ np.ndarray: A 1-D integer array of length,
243
+ representing the remapped atom indices.
244
+ """
245
+ model_index = np.arange(len(self.model_struct.atom_array))
246
+ model_entity_id_to_chain_ids = self.model_struct.get_entity_id_to_chain_ids()
247
+ for entity_id, chain_ids in model_entity_id_to_chain_ids.items():
248
+ if entity_id in self.model_struct.entity_poly_type:
249
+ # Skip polymer
250
+ continue
251
+
252
+ for chain_id in chain_ids:
253
+ optimal_perm_ids = self._get_optimal_perm_ids_for_chain(chain_id)
254
+ if optimal_perm_ids is None:
255
+ continue
256
+ chain_mask = self.model_struct.uni_chain_id == chain_id
257
+ model_chain_index = model_index[chain_mask]
258
+ sorted_atom_index = np.concatenate(
259
+ [
260
+ model_chain_index[
261
+ self.model_struct.atom_array.res_id[model_chain_index] == i
262
+ ]
263
+ for i in optimal_perm_ids
264
+ ]
265
+ )
266
+ model_index[chain_mask] = sorted_atom_index
267
+ return model_index
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pxmeter
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: PXMeter is a comprehensive toolkit for evaluating the quality of structures generated by biomolecular structure prediction models.
5
5
  Author: Bytedance Inc.
6
6
  Author-email: ai4s-bio@bytedance.com
@@ -32,4 +32,5 @@ pxmeter/metrics/rmsd.py
32
32
  pxmeter/metrics/rmsd_metrics.py
33
33
  pxmeter/permutation/__init__.py
34
34
  pxmeter/permutation/atom.py
35
- pxmeter/permutation/chain.py
35
+ pxmeter/permutation/chain.py
36
+ pxmeter/permutation/residue.py
@@ -20,7 +20,7 @@ with open("requirements.txt") as f:
20
20
  setup(
21
21
  name="pxmeter",
22
22
  python_requires=">=3.11",
23
- version="0.1.4",
23
+ version="0.1.6",
24
24
  description="PXMeter is a comprehensive toolkit for evaluating the quality of \
25
25
  structures generated by biomolecular structure prediction models.",
26
26
  author="Bytedance Inc.",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes