pxmeter 0.1.5__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pxmeter-0.1.5/pxmeter.egg-info → pxmeter-0.1.6}/PKG-INFO +1 -1
- {pxmeter-0.1.5 → pxmeter-0.1.6}/README.md +0 -1
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/data/parser.py +7 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/data/struct.py +35 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/data/utils.py +57 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/mapping.py +42 -15
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/permutation/atom.py +2 -2
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/permutation/chain.py +5 -5
- pxmeter-0.1.6/pxmeter/permutation/residue.py +267 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6/pxmeter.egg-info}/PKG-INFO +1 -1
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter.egg-info/SOURCES.txt +2 -1
- {pxmeter-0.1.5 → pxmeter-0.1.6}/setup.py +1 -1
- {pxmeter-0.1.5 → pxmeter-0.1.6}/LICENSE +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/MANIFEST.in +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/__init__.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/calc_metric.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/cli.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/configs/__init__.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/configs/data_config.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/configs/run_config.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/constants.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/data/__init__.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/data/ccd.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/data/writer.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/eval.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/metrics/__init__.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/metrics/clashes.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/metrics/lddt_metrics.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/metrics/rmsd.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/metrics/rmsd_metrics.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/permutation/__init__.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter/utils.py +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter.egg-info/dependency_links.txt +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter.egg-info/entry_points.txt +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter.egg-info/requires.txt +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/pxmeter.egg-info/top_level.txt +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/requirements.txt +0 -0
- {pxmeter-0.1.5 → pxmeter-0.1.6}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pxmeter
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: PXMeter is a comprehensive toolkit for evaluating the quality of structures generated by biomolecular structure prediction models.
|
|
5
5
|
Author: Bytedance Inc.
|
|
6
6
|
Author-email: ai4s-bio@bytedance.com
|
|
@@ -124,7 +124,6 @@ If you use PXMeter in your research, please cite the following:
|
|
|
124
124
|
|
|
125
125
|
|
|
126
126
|
## 🚧 Limitations
|
|
127
|
-
- PXMeter supports chain/atom permutations but not residue-level permutations. As a result, the accuracy of evaluation for branched chains, such as glycans, cannot be fully guaranteed.
|
|
128
127
|
- It is recommended to use CIF files from the RCSB PDB as references, as they ensure content accuracy. All development and testing were conducted exclusively on CIF files from this source.
|
|
129
128
|
|
|
130
129
|
|
|
@@ -360,6 +360,13 @@ class MMCIFParser:
|
|
|
360
360
|
# First obtain all altlocs, then filter them
|
|
361
361
|
tmp_altloc = "all"
|
|
362
362
|
|
|
363
|
+
if tmp_altloc == "all":
|
|
364
|
+
logging.warning(
|
|
365
|
+
"Bond computation is not supported with `altloc='all'`."
|
|
366
|
+
"include_bonds will be set to False."
|
|
367
|
+
)
|
|
368
|
+
include_bonds = False
|
|
369
|
+
|
|
363
370
|
if assembly_id is None:
|
|
364
371
|
atom_array = pdbx.get_structure(
|
|
365
372
|
pdbx_file=self.cif,
|
|
@@ -96,6 +96,41 @@ class Structure:
|
|
|
96
96
|
cif_block=cif_parser.cif.block,
|
|
97
97
|
)
|
|
98
98
|
|
|
99
|
+
@classmethod
|
|
100
|
+
def from_atom_array(
|
|
101
|
+
cls,
|
|
102
|
+
atom_array: AtomArray,
|
|
103
|
+
entity_poly_seq: dict[str, str],
|
|
104
|
+
entity_poly_type: dict[str, str],
|
|
105
|
+
entry_id: str = "",
|
|
106
|
+
exptl_methods: tuple[str] = tuple(),
|
|
107
|
+
cif_block: dict = None,
|
|
108
|
+
) -> "Structure":
|
|
109
|
+
"""
|
|
110
|
+
Create a Structure object from MMCIF.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
mmcif (Path or str): Path to MMCIF file.
|
|
114
|
+
model (int): Model number. Defaults to 1.
|
|
115
|
+
altloc (str): It could be one of "all", "first", "occupancy", "A", "B", etc.
|
|
116
|
+
Defaults to "first".
|
|
117
|
+
assembly_id (str, optional): Assembly ID. Defaults to None.
|
|
118
|
+
include_bonds (bool): Whether to include bonds in the AtomArray. Defaults to True.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
Structure: Structure object.
|
|
122
|
+
"""
|
|
123
|
+
return cls(
|
|
124
|
+
atom_array=atom_array,
|
|
125
|
+
entity_poly_seq=entity_poly_seq,
|
|
126
|
+
entity_poly_type=entity_poly_type,
|
|
127
|
+
uni_chain_id=get_unique_chain_id(atom_array),
|
|
128
|
+
uni_atom_id=get_unique_atom_id(atom_array),
|
|
129
|
+
entry_id=entry_id,
|
|
130
|
+
exptl_methods=exptl_methods,
|
|
131
|
+
cif_block=cif_block,
|
|
132
|
+
)
|
|
133
|
+
|
|
99
134
|
def _get_hydrogens_mask(self) -> np.ndarray:
|
|
100
135
|
"""
|
|
101
136
|
Get mask of hydrogens.
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from collections import Counter
|
|
16
|
+
from datetime import datetime
|
|
16
17
|
|
|
17
18
|
import biotite.sequence as seq
|
|
18
19
|
import biotite.sequence.align as align
|
|
@@ -266,3 +267,59 @@ def get_mol_graph_matches(
|
|
|
266
267
|
if num >= max_matches:
|
|
267
268
|
break
|
|
268
269
|
return matches
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def get_res_graph_matches(
|
|
273
|
+
res_graph1: nx.Graph, res_graph2: nx.Graph, max_matches: int = 1000
|
|
274
|
+
) -> list[dict]:
|
|
275
|
+
"""
|
|
276
|
+
Find subgraph isomorphisms between two residue-level graphs using residue names.
|
|
277
|
+
|
|
278
|
+
This function enumerates mappings where a subgraph of `res_graph1` is isomorphic to
|
|
279
|
+
(i.e., can be relabeled to match) `res_graph2`. Node equivalence is determined
|
|
280
|
+
solely by the `"res_name"` node attribute; all other node or edge attributes are ignored.
|
|
281
|
+
Enumeration stops once `max_matches` mappings have been collected.
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
res_graph1 (nx.Graph): The source (typically larger) residue graph.
|
|
285
|
+
Node attribute required: ``"res_name"`` (e.g., "ALA", "NAG").
|
|
286
|
+
res_graph2 (nx.Graph): The target (typically smaller) residue graph to match against.
|
|
287
|
+
Node attribute required: ``"res_name"``.
|
|
288
|
+
max_matches (int, optional): Maximum number of mappings to return. Defaults to ``1000``.
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
list[dict]: A list of node-mapping dicts. Each dict maps node IDs from `res_graph1`
|
|
292
|
+
(keys) to node IDs in `res_graph2` (values) representing one subgraph isomorphism.
|
|
293
|
+
"""
|
|
294
|
+
isomatcher = nx.algorithms.isomorphism.GraphMatcher(
|
|
295
|
+
res_graph1,
|
|
296
|
+
res_graph2,
|
|
297
|
+
node_match=lambda x, y: (x["res_name"] == y["res_name"])
|
|
298
|
+
and (x["atom_names"] == y["atom_names"]),
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
matches = []
|
|
302
|
+
num = 0
|
|
303
|
+
for i in isomatcher.subgraph_isomorphisms_iter():
|
|
304
|
+
matches.append(i)
|
|
305
|
+
num += 1
|
|
306
|
+
if num >= max_matches:
|
|
307
|
+
break
|
|
308
|
+
return matches
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def is_valid_date_format(date_string: str) -> bool:
|
|
312
|
+
"""
|
|
313
|
+
Check if the date string is in the format yyyy-mm-dd.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
date_string (str): The date string to check.
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
bool: True if the date string is in the format yyyy-mm-dd, False otherwise.
|
|
320
|
+
"""
|
|
321
|
+
try:
|
|
322
|
+
datetime.strptime(date_string, "%Y-%m-%d")
|
|
323
|
+
return True
|
|
324
|
+
except ValueError:
|
|
325
|
+
return False
|
|
@@ -37,6 +37,7 @@ from pxmeter.data.utils import (
|
|
|
37
37
|
)
|
|
38
38
|
from pxmeter.permutation.atom import AtomPermutation
|
|
39
39
|
from pxmeter.permutation.chain import ChainPermutation
|
|
40
|
+
from pxmeter.permutation.residue import ResiduePermutation
|
|
40
41
|
|
|
41
42
|
|
|
42
43
|
class MappingCIF:
|
|
@@ -968,25 +969,20 @@ class MappingResult:
|
|
|
968
969
|
|
|
969
970
|
ref_struct: Structure
|
|
970
971
|
model_struct: Structure
|
|
971
|
-
|
|
972
|
-
|
|
972
|
+
mapped_ref_struct: Structure
|
|
973
|
+
mapped_model_struct: Structure
|
|
973
974
|
chain_mapping: dict[str, str]
|
|
974
975
|
chain_mapping_anchors: dict[str, str]
|
|
975
976
|
model_to_ref_entity_id: dict[str, str]
|
|
976
977
|
|
|
977
978
|
def get_mapped_structures(self) -> tuple[Structure, Structure]:
|
|
978
979
|
"""
|
|
979
|
-
|
|
980
|
+
Returns the mapped reference and model structures.
|
|
980
981
|
|
|
981
982
|
Returns:
|
|
982
|
-
tuple: A tuple containing
|
|
983
|
-
- sele_ref_struct: The selected substructure from the reference structure.
|
|
984
|
-
- sele_model_struct: The selected substructure from the model structure.
|
|
983
|
+
tuple[Structure, Structure]: A tuple containing the mapped reference and model structures.
|
|
985
984
|
"""
|
|
986
|
-
|
|
987
|
-
sele_ref_struct = self.ref_struct.select_substructure(self.ref_indices)
|
|
988
|
-
sele_model_struct = self.model_struct.select_substructure(self.model_indices)
|
|
989
|
-
return sele_ref_struct, sele_model_struct
|
|
985
|
+
return self.mapped_ref_struct, self.mapped_model_struct
|
|
990
986
|
|
|
991
987
|
@classmethod
|
|
992
988
|
def from_cifs(
|
|
@@ -997,6 +993,7 @@ class MappingResult:
|
|
|
997
993
|
ref_altloc: str = "first",
|
|
998
994
|
ref_model: int = 1,
|
|
999
995
|
model_chain_id_to_lig_mol: dict[str, Chem.Mol] | None = None,
|
|
996
|
+
chain_mapping: dict[str, str] | None = None,
|
|
1000
997
|
mapping_config: ConfigDict = RUN_CONFIG.mapping,
|
|
1001
998
|
) -> "MappingResult":
|
|
1002
999
|
"""
|
|
@@ -1010,6 +1007,8 @@ class MappingResult:
|
|
|
1010
1007
|
ref_model (int): Model number for the reference structure. Defaults to 1.
|
|
1011
1008
|
model_chain_id_to_lig_mol (dict[str, Chem.Mol], optional): Mapping of model chain IDs
|
|
1012
1009
|
to ligand molecules. Defaults to None.
|
|
1010
|
+
chain_mapping (dict[str, str], optional): Mapping of model chain IDs to reference chain IDs.
|
|
1011
|
+
Defaults to None.
|
|
1013
1012
|
mapping_config (ConfigDict, optional): Configuration for the mapping process.
|
|
1014
1013
|
Defaults to RUN_CONFIG.mapping.
|
|
1015
1014
|
|
|
@@ -1035,23 +1034,51 @@ class MappingResult:
|
|
|
1035
1034
|
model_to_ref_entity_id,
|
|
1036
1035
|
enumerate_all_anchors=mapping_config.enumerate_all_anchors,
|
|
1037
1036
|
)
|
|
1038
|
-
|
|
1037
|
+
|
|
1038
|
+
if not chain_mapping:
|
|
1039
|
+
(
|
|
1040
|
+
chain_mapping,
|
|
1041
|
+
chain_mapping_anchors,
|
|
1042
|
+
) = chain_perm.get_heurisitic_chain_mapping()
|
|
1043
|
+
else:
|
|
1044
|
+
chain_mapping_anchors = {}
|
|
1045
|
+
|
|
1039
1046
|
(
|
|
1040
1047
|
chain_perm_ref_indices,
|
|
1041
1048
|
chain_perm_model_indices,
|
|
1042
1049
|
) = chain_perm.get_permuted_indices(chain_mapping)
|
|
1043
1050
|
|
|
1051
|
+
chain_permed_ref_struct = map_cif.ref_struct.select_substructure(
|
|
1052
|
+
chain_perm_ref_indices
|
|
1053
|
+
)
|
|
1054
|
+
chain_permed_model_struct = map_cif.model_struct.select_substructure(
|
|
1055
|
+
chain_perm_model_indices
|
|
1056
|
+
)
|
|
1057
|
+
|
|
1058
|
+
residue_perm = ResiduePermutation(
|
|
1059
|
+
chain_permed_ref_struct,
|
|
1060
|
+
chain_permed_model_struct,
|
|
1061
|
+
)
|
|
1062
|
+
residue_permuted_indices = residue_perm.run()
|
|
1063
|
+
chain_permed_model_struct.reset_atom_array_annot(
|
|
1064
|
+
"coord",
|
|
1065
|
+
chain_permed_model_struct.atom_array.coord[residue_permuted_indices],
|
|
1066
|
+
)
|
|
1067
|
+
|
|
1044
1068
|
atom_perm = AtomPermutation(
|
|
1045
|
-
|
|
1046
|
-
|
|
1069
|
+
chain_permed_ref_struct,
|
|
1070
|
+
chain_permed_model_struct,
|
|
1047
1071
|
)
|
|
1048
1072
|
atom_permuted_indices = atom_perm.run()
|
|
1073
|
+
permed_model_struct = chain_permed_model_struct.select_substructure(
|
|
1074
|
+
atom_permuted_indices
|
|
1075
|
+
)
|
|
1049
1076
|
|
|
1050
1077
|
return cls(
|
|
1051
1078
|
ref_struct=map_cif.ref_struct,
|
|
1052
1079
|
model_struct=map_cif.model_struct,
|
|
1053
|
-
|
|
1054
|
-
|
|
1080
|
+
mapped_ref_struct=chain_permed_ref_struct,
|
|
1081
|
+
mapped_model_struct=permed_model_struct,
|
|
1055
1082
|
chain_mapping=chain_mapping,
|
|
1056
1083
|
chain_mapping_anchors=chain_mapping_anchors,
|
|
1057
1084
|
model_to_ref_entity_id=model_to_ref_entity_id,
|
|
@@ -24,8 +24,8 @@ class AtomPermutation:
|
|
|
24
24
|
Generating and applying atom permutations based on a reference structure.
|
|
25
25
|
|
|
26
26
|
Args:
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
ref_struct (Structure): The reference structure used for permutation generation.
|
|
28
|
+
model_struct (Structure): The model structure used for permutation application.
|
|
29
29
|
"""
|
|
30
30
|
|
|
31
31
|
def __init__(self, ref_struct: Structure, model_struct: Structure):
|
|
@@ -29,11 +29,11 @@ class ChainPermutation:
|
|
|
29
29
|
already aligned chains.
|
|
30
30
|
|
|
31
31
|
Args:
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
32
|
+
ref_struct (Structure): Reference structure object
|
|
33
|
+
model_struct (Structure): Model structure object
|
|
34
|
+
model_to_ref_entity_id (dict[str, str]): Mapping of model entity IDs
|
|
35
|
+
to reference entity IDs
|
|
36
|
+
enumerate_all_anchors (bool): Whether to enumerate all anchor chains.
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
39
|
def __init__(
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
# Copyright 2025 ByteDance and/or its affiliates.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import networkx as nx
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
from pxmeter.data.struct import Structure
|
|
19
|
+
from pxmeter.data.utils import get_res_graph_matches
|
|
20
|
+
from pxmeter.metrics.rmsd import align_src_to_tar, apply_transform, rmsd
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ResiduePermutation:
|
|
24
|
+
"""
|
|
25
|
+
Generating and applying residue permutations based on a reference structure.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
ref_struct (Structure): The reference structure used for permutation generation.
|
|
29
|
+
model_struct (Structure): The model structure used for permutation application.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, ref_struct: Structure, model_struct: Structure):
|
|
33
|
+
self.ref_struct = ref_struct
|
|
34
|
+
self.model_struct = model_struct
|
|
35
|
+
|
|
36
|
+
@staticmethod
|
|
37
|
+
def _calc_residue_centers(
|
|
38
|
+
res_ids: np.ndarray, coords: np.ndarray
|
|
39
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
40
|
+
"""
|
|
41
|
+
Compute the geometric center (mean coordinate) for each unique residue ID.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
res_ids (np.ndarray): Array of residue IDs, shape (N_atom,)
|
|
45
|
+
coords (np.ndarray): Cartesian coordinates, shape (N_atom, 3)
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Tuple[np.ndarray, np.ndarray]: (uniq_res_ids, centers) where
|
|
49
|
+
uniq_res_ids shape (N_res,), centers shape (N_res, 3) in the same order.
|
|
50
|
+
"""
|
|
51
|
+
res_ids = np.asarray(res_ids)
|
|
52
|
+
coords = np.asarray(coords)
|
|
53
|
+
if (
|
|
54
|
+
res_ids.ndim != 1
|
|
55
|
+
or coords.ndim != 2
|
|
56
|
+
or coords.shape[1] != 3
|
|
57
|
+
or len(res_ids) != len(coords)
|
|
58
|
+
):
|
|
59
|
+
raise ValueError("Shape mismatch: res_ids (N,), coords (N, 3) required.")
|
|
60
|
+
|
|
61
|
+
uniq_ids, inv = np.unique(res_ids, return_inverse=True)
|
|
62
|
+
centers = np.zeros((len(uniq_ids), 3), dtype=float)
|
|
63
|
+
counts = np.bincount(inv).astype(float)
|
|
64
|
+
for i in range(3):
|
|
65
|
+
centers[:, i] = np.bincount(inv, weights=coords[:, i])
|
|
66
|
+
centers /= counts[:, None]
|
|
67
|
+
return uniq_ids, centers
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def _get_branch_residue_permutations(
|
|
71
|
+
struct: Structure, chain_id: str
|
|
72
|
+
) -> np.ndarray | None:
|
|
73
|
+
"""
|
|
74
|
+
Detect branch-like connectivity within a chain using non-adjacent residue bonds and,
|
|
75
|
+
if the induced residue-level graph is a single tree, return residue permutations
|
|
76
|
+
corresponding to its graph automorphisms.
|
|
77
|
+
|
|
78
|
+
The procedure:
|
|
79
|
+
1) Filter atoms by ``chain_id`` and collect inter-residue bonds from the chain's
|
|
80
|
+
atom-level ``BondList``.
|
|
81
|
+
2) If any inter-residue bond connects residues whose numeric IDs differ by more than 1
|
|
82
|
+
(``|res_id_i - res_id_j| > 1``), mark the chain as having a branch-like connection.
|
|
83
|
+
3) Lift inter-residue bonds to a residue-level undirected graph G (nodes = ``res_id``,
|
|
84
|
+
edges = covalent connections between residues).
|
|
85
|
+
4) If G is disconnected or contains cycles, return ``None`` (only tree-shaped branches
|
|
86
|
+
are supported).
|
|
87
|
+
5) Annotate nodes with ``res_name`` and enumerate automorphisms via subgraph isomorphism
|
|
88
|
+
of G onto itself (constrained by equal ``res_name``). Return their induced permutations.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
struct (Structure): A structure object exposing ``atom_array`` with fields
|
|
92
|
+
``res_id``, ``res_name``, ``uni_chain_id``, and ``bonds``; and where
|
|
93
|
+
``atom_array.bonds[mask].as_array()`` yields an ``(n_bond, 2)`` integer array
|
|
94
|
+
of atom index pairs for the selected chain.
|
|
95
|
+
chain_id (str): The target chain identifier matched against ``uni_chain_id``.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
np.ndarray | None: ``None`` if no branch-like non-adjacent residue bond is detected,
|
|
99
|
+
or if the residue graph is not a single tree. Otherwise an integer array of shape
|
|
100
|
+
``(K, N)`` where each row encodes one automorphism as a permutation of the ``N``
|
|
101
|
+
residue nodes (ordered by ascending source node id). ``K`` is the number of
|
|
102
|
+
automorphisms found (capped internally at 1000).
|
|
103
|
+
"""
|
|
104
|
+
mask = struct.uni_chain_id == chain_id
|
|
105
|
+
arr = struct.atom_array
|
|
106
|
+
|
|
107
|
+
if not np.any(mask):
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
bond_arr = arr.bonds[mask].as_array()
|
|
111
|
+
|
|
112
|
+
res_id_i = arr.res_id[mask][bond_arr[:, 0]]
|
|
113
|
+
res_id_j = arr.res_id[mask][bond_arr[:, 1]]
|
|
114
|
+
|
|
115
|
+
res_id_pairs = set(tuple(zip(res_id_i, res_id_j)))
|
|
116
|
+
has_branch = False
|
|
117
|
+
nodes_adj = set()
|
|
118
|
+
for i, j in res_id_pairs:
|
|
119
|
+
if i == j:
|
|
120
|
+
continue
|
|
121
|
+
nodes_adj.add((i, j))
|
|
122
|
+
|
|
123
|
+
if abs(i - j) > 1:
|
|
124
|
+
has_branch = True
|
|
125
|
+
|
|
126
|
+
if has_branch:
|
|
127
|
+
G = nx.Graph()
|
|
128
|
+
G.add_edges_from(nodes_adj)
|
|
129
|
+
if (
|
|
130
|
+
nx.number_connected_components(G) > 1
|
|
131
|
+
or len(nx.cycle_basis(G)) > 0
|
|
132
|
+
or (1 not in G.nodes)
|
|
133
|
+
):
|
|
134
|
+
return
|
|
135
|
+
|
|
136
|
+
attrs = {}
|
|
137
|
+
for node in G.nodes:
|
|
138
|
+
node_res_name = arr.res_name[mask][arr.res_id[mask] == node][0]
|
|
139
|
+
node_atom_names = "_".join(
|
|
140
|
+
arr.atom_name[mask][arr.res_id[mask] == node]
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
if node == 1:
|
|
144
|
+
# Do not permute the root residue
|
|
145
|
+
node_res_name += "_root"
|
|
146
|
+
|
|
147
|
+
attrs[node] = {"res_name": node_res_name, "atom_names": node_atom_names}
|
|
148
|
+
nx.set_node_attributes(G, attrs)
|
|
149
|
+
matches = get_res_graph_matches(G, G, max_matches=1000)
|
|
150
|
+
|
|
151
|
+
perm = []
|
|
152
|
+
for match in matches:
|
|
153
|
+
sorted_result = sorted(match.items(), key=lambda x: x[0])
|
|
154
|
+
match_values = [i[1] for i in sorted_result]
|
|
155
|
+
if match_values[0] != 1:
|
|
156
|
+
continue
|
|
157
|
+
perm.append(match_values)
|
|
158
|
+
|
|
159
|
+
if len(perm) > 1:
|
|
160
|
+
perm = np.array(perm)
|
|
161
|
+
return perm
|
|
162
|
+
|
|
163
|
+
def _get_optimal_perm_ids_for_chain(self, chain_id: str) -> np.ndarray | None:
|
|
164
|
+
"""
|
|
165
|
+
Compute the residue-ID permutation for a branch-like chain that best aligns
|
|
166
|
+
the model to the reference, measured by centroid RMSD.
|
|
167
|
+
|
|
168
|
+
Steps:
|
|
169
|
+
1) Detects residue-level graph automorphisms for the chain (if the chain
|
|
170
|
+
exhibits non-adjacent inter-residue bonds indicating a branch-like tree).
|
|
171
|
+
2) Treats residue 1 as fixed (root) to define the rigid alignment between
|
|
172
|
+
the model and the reference using root-atom coordinates.
|
|
173
|
+
3) For each candidate permutation of residue IDs, applies the rigid
|
|
174
|
+
transform to model residue centroids and computes RMSD to the reference
|
|
175
|
+
residue centroids.
|
|
176
|
+
4) Returns the residue-ID permutation that minimizes this RMSD.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
chain_id (str): Target chain identifier to evaluate.
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
np.ndarray | None: If the chain has a valid branch-like tree and at least
|
|
183
|
+
one non-trivial automorphism, returns an integer array of shape (N,)
|
|
184
|
+
containing the residue IDs in the selected order (1-based, matching the
|
|
185
|
+
original residue numbering). Returns ``None`` if no branch-like structure
|
|
186
|
+
is detected or no valid permutations are found.
|
|
187
|
+
"""
|
|
188
|
+
perm = self._get_branch_residue_permutations(self.model_struct, chain_id)
|
|
189
|
+
if perm is None:
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
chain_mask = self.model_struct.uni_chain_id == chain_id
|
|
193
|
+
|
|
194
|
+
# Use the residue 1 as the root
|
|
195
|
+
root_coord_mask = chain_mask & (self.model_struct.atom_array.res_id == 1)
|
|
196
|
+
model_root = self.model_struct.atom_array.coord[root_coord_mask]
|
|
197
|
+
ref_root = self.ref_struct.atom_array.coord[root_coord_mask]
|
|
198
|
+
if (len(model_root) == 0) or (len(ref_root) == 0):
|
|
199
|
+
return
|
|
200
|
+
|
|
201
|
+
assert model_root.shape == ref_root.shape
|
|
202
|
+
|
|
203
|
+
rot, trans = align_src_to_tar(model_root, ref_root)
|
|
204
|
+
|
|
205
|
+
_ref_ids, ref_centers = self._calc_residue_centers(
|
|
206
|
+
self.ref_struct.atom_array.res_id[chain_mask],
|
|
207
|
+
self.ref_struct.atom_array.coord[chain_mask],
|
|
208
|
+
)
|
|
209
|
+
model_ids, model_centers = self._calc_residue_centers(
|
|
210
|
+
self.model_struct.atom_array.res_id[chain_mask],
|
|
211
|
+
self.model_struct.atom_array.coord[chain_mask],
|
|
212
|
+
)
|
|
213
|
+
model_pos = {rid: i for i, rid in enumerate(model_ids)}
|
|
214
|
+
|
|
215
|
+
best_perm = None
|
|
216
|
+
best_rmsd = np.inf
|
|
217
|
+
for ids in perm:
|
|
218
|
+
ordered = np.array([model_pos[i] for i in ids], dtype=int)
|
|
219
|
+
model_mat = model_centers[ordered]
|
|
220
|
+
|
|
221
|
+
transformed = apply_transform(model_mat, rot, trans)
|
|
222
|
+
v = rmsd(transformed, ref_centers)
|
|
223
|
+
if v < best_rmsd:
|
|
224
|
+
best_rmsd = v
|
|
225
|
+
best_perm = ids
|
|
226
|
+
|
|
227
|
+
return best_perm
|
|
228
|
+
|
|
229
|
+
def run(self):
|
|
230
|
+
"""
|
|
231
|
+
Reorder model atoms within non-polymer chains according to the
|
|
232
|
+
RMSD-optimal residue-ID permutation per chain.
|
|
233
|
+
|
|
234
|
+
For each non-polymer entity and its chains:
|
|
235
|
+
- Detect branch-like residue graphs and enumerate automorphisms.
|
|
236
|
+
- Select the permutation of residue IDs that minimizes centroid RMSD
|
|
237
|
+
to the reference (via rigid alignment anchored at residue 1).
|
|
238
|
+
- Stably reorder atom indices of that chain so atoms follow the selected
|
|
239
|
+
residue-ID order (preserving within-residue atom order).
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
np.ndarray: A 1-D integer array of length,
|
|
243
|
+
representing the remapped atom indices.
|
|
244
|
+
"""
|
|
245
|
+
model_index = np.arange(len(self.model_struct.atom_array))
|
|
246
|
+
model_entity_id_to_chain_ids = self.model_struct.get_entity_id_to_chain_ids()
|
|
247
|
+
for entity_id, chain_ids in model_entity_id_to_chain_ids.items():
|
|
248
|
+
if entity_id in self.model_struct.entity_poly_type:
|
|
249
|
+
# Skip polymer
|
|
250
|
+
continue
|
|
251
|
+
|
|
252
|
+
for chain_id in chain_ids:
|
|
253
|
+
optimal_perm_ids = self._get_optimal_perm_ids_for_chain(chain_id)
|
|
254
|
+
if optimal_perm_ids is None:
|
|
255
|
+
continue
|
|
256
|
+
chain_mask = self.model_struct.uni_chain_id == chain_id
|
|
257
|
+
model_chain_index = model_index[chain_mask]
|
|
258
|
+
sorted_atom_index = np.concatenate(
|
|
259
|
+
[
|
|
260
|
+
model_chain_index[
|
|
261
|
+
self.model_struct.atom_array.res_id[model_chain_index] == i
|
|
262
|
+
]
|
|
263
|
+
for i in optimal_perm_ids
|
|
264
|
+
]
|
|
265
|
+
)
|
|
266
|
+
model_index[chain_mask] = sorted_atom_index
|
|
267
|
+
return model_index
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pxmeter
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: PXMeter is a comprehensive toolkit for evaluating the quality of structures generated by biomolecular structure prediction models.
|
|
5
5
|
Author: Bytedance Inc.
|
|
6
6
|
Author-email: ai4s-bio@bytedance.com
|
|
@@ -20,7 +20,7 @@ with open("requirements.txt") as f:
|
|
|
20
20
|
setup(
|
|
21
21
|
name="pxmeter",
|
|
22
22
|
python_requires=">=3.11",
|
|
23
|
-
version="0.1.
|
|
23
|
+
version="0.1.6",
|
|
24
24
|
description="PXMeter is a comprehensive toolkit for evaluating the quality of \
|
|
25
25
|
structures generated by biomolecular structure prediction models.",
|
|
26
26
|
author="Bytedance Inc.",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|