pxmeter 0.1.3__tar.gz → 0.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {pxmeter-0.1.3/pxmeter.egg-info → pxmeter-0.1.5}/PKG-INFO +1 -1
  2. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/configs/data_config.py +10 -15
  3. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/data/struct.py +32 -0
  4. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/permutation/chain.py +105 -9
  5. {pxmeter-0.1.3 → pxmeter-0.1.5/pxmeter.egg-info}/PKG-INFO +1 -1
  6. {pxmeter-0.1.3 → pxmeter-0.1.5}/setup.py +1 -1
  7. {pxmeter-0.1.3 → pxmeter-0.1.5}/LICENSE +0 -0
  8. {pxmeter-0.1.3 → pxmeter-0.1.5}/MANIFEST.in +0 -0
  9. {pxmeter-0.1.3 → pxmeter-0.1.5}/README.md +0 -0
  10. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/__init__.py +0 -0
  11. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/calc_metric.py +0 -0
  12. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/cli.py +0 -0
  13. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/configs/__init__.py +0 -0
  14. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/configs/run_config.py +0 -0
  15. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/constants.py +0 -0
  16. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/data/__init__.py +0 -0
  17. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/data/ccd.py +0 -0
  18. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/data/parser.py +0 -0
  19. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/data/utils.py +0 -0
  20. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/data/writer.py +0 -0
  21. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/eval.py +0 -0
  22. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/mapping.py +0 -0
  23. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/metrics/__init__.py +0 -0
  24. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/metrics/clashes.py +0 -0
  25. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/metrics/lddt_metrics.py +0 -0
  26. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/metrics/rmsd.py +0 -0
  27. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/metrics/rmsd_metrics.py +0 -0
  28. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/permutation/__init__.py +0 -0
  29. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/permutation/atom.py +0 -0
  30. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter/utils.py +0 -0
  31. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter.egg-info/SOURCES.txt +0 -0
  32. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter.egg-info/dependency_links.txt +0 -0
  33. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter.egg-info/entry_points.txt +0 -0
  34. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter.egg-info/requires.txt +0 -0
  35. {pxmeter-0.1.3 → pxmeter-0.1.5}/pxmeter.egg-info/top_level.txt +0 -0
  36. {pxmeter-0.1.3 → pxmeter-0.1.5}/requirements.txt +0 -0
  37. {pxmeter-0.1.3 → pxmeter-0.1.5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pxmeter
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: PXMeter is a comprehensive toolkit for evaluating the quality of structures generated by biomolecular structure prediction models.
5
5
  Author: Bytedance Inc.
6
6
  Author-email: ai4s-bio@bytedance.com
@@ -12,13 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import gzip
15
16
  import json
16
17
  import logging
17
18
  import os
18
- import subprocess as sp
19
19
  from pathlib import Path
20
20
 
21
21
  import gemmi
22
+ import requests
22
23
 
23
24
  logging.basicConfig(level=logging.INFO)
24
25
 
@@ -30,27 +31,21 @@ def download_ccd_cif(output_path: Path):
30
31
  Args:
31
32
  output_path (Path): The output path for saving the downloaded CCD CIF file.
32
33
  """
33
- output_path.parent.mkdir(parents=True, exist_ok=True)
34
+ output_path.mkdir(parents=True, exist_ok=True)
34
35
 
35
36
  logging.info("Downloading CCD CIF file from rcsb.org ...")
36
37
 
37
- output_cif_gz = output_path / "components.cif.gz"
38
- if output_cif_gz.exists():
39
- logging.info("Remove old zipped CCD CIF file: %s", output_cif_gz)
40
- output_cif_gz.unlink()
41
-
42
- output_cif = output_cif_gz.with_suffix("")
38
+ output_cif = output_path / "components.cif"
43
39
  if output_cif.exists():
44
40
  logging.info("Remove old CCD CIF file: %s", output_cif)
45
41
  output_cif.unlink()
46
42
 
47
- sp.run(
48
- f"wget https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz -P {output_path}",
49
- shell=True,
50
- check=True,
51
- )
52
-
53
- sp.run(f"gunzip -d {output_cif_gz}", shell=True, check=True)
43
+ url = "https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
44
+ with requests.get(url, stream=True, timeout=60) as r:
45
+ r.raise_for_status()
46
+ with gzip.GzipFile(fileobj=r.raw) as f_in, output_cif.open("wb") as f_out:
47
+ for chunk in iter(lambda: f_in.read(8192), b""):
48
+ f_out.write(chunk)
54
49
 
55
50
  logging.info("Download CCD CIF file successfully: %s", output_cif)
56
51
 
@@ -432,6 +432,38 @@ class Structure:
432
432
  chain_id_to_entity_id[chain_id] = entity_id
433
433
  return chain_id_to_entity_id
434
434
 
435
+ def get_ligand_polymer_bonds(self) -> np.ndarray:
436
+ """
437
+ Get bonds between the bonded ligand and its parent chain.
438
+
439
+ Returns:
440
+ np.ndarray: bond records between the bonded ligand and its parent chain.
441
+ e.g. np.array([[atom1, atom2, bond_order]...])
442
+ """
443
+ atom_array = self.atom_array
444
+ bond_array = atom_array.bonds.as_array()
445
+
446
+ polymer_mask = np.isin(
447
+ atom_array.label_entity_id, list(self.entity_poly_type.keys())
448
+ )
449
+
450
+ lig_mask = ~polymer_mask
451
+
452
+ idx_i = bond_array[:, 0]
453
+ idx_j = bond_array[:, 1]
454
+
455
+ lig_polymer_bond_indices = np.where(
456
+ (lig_mask[idx_i] & polymer_mask[idx_j])
457
+ | (lig_mask[idx_j] & polymer_mask[idx_i])
458
+ )[0]
459
+ if lig_polymer_bond_indices.size == 0:
460
+ # no ligand-polymer bonds
461
+ lig_polymer_bonds = np.empty((0, 3)).astype(int)
462
+ else:
463
+ # np.array([[atom1, atom2, bond_order], ...])
464
+ lig_polymer_bonds = bond_array[lig_polymer_bond_indices]
465
+ return lig_polymer_bonds
466
+
435
467
  def clean_structure(
436
468
  self,
437
469
  mse_to_met=True,
@@ -59,6 +59,71 @@ class ChainPermutation:
59
59
  self.ref_chain_id_to_entity_id = self.ref_struct.get_chain_id_to_entity_id()
60
60
  self.model_chain_id_to_entity_id = self.model_struct.get_chain_id_to_entity_id()
61
61
 
62
+ self.ref_and_model_mapping_ban_set = self._get_ban_set_by_lig_bonded_position()
63
+
64
+ @staticmethod
65
+ def find_bonded_position_for_lig_chains(
66
+ struct: Structure,
67
+ ) -> dict[str, tuple[str, int]]:
68
+ """
69
+ Find the bonded entity ID and residue ID for ligand chains.
70
+
71
+ Args:
72
+ struct (Structure): Structure object.
73
+
74
+ Returns:
75
+ dict[str, tuple[str, int]]: Mapping of ligand chain ID to bonded
76
+ entity ID and residue ID.
77
+ """
78
+ ligand_polymer_bonds = struct.get_ligand_polymer_bonds()
79
+
80
+ chain_id_to_bonded_position = {}
81
+ for bond in ligand_polymer_bonds:
82
+ atom1, atom2, _ = bond
83
+ if struct.atom_array.label_entity_id[atom1] not in struct.entity_poly_type:
84
+ # atom1 is ligand
85
+ lig_chain_id = struct.uni_chain_id[atom1]
86
+ entity_id = struct.atom_array.label_entity_id[atom2]
87
+ res_id = struct.atom_array.res_id[atom2]
88
+ else:
89
+ lig_chain_id = struct.uni_chain_id[atom2]
90
+ entity_id = struct.atom_array.label_entity_id[atom1]
91
+ res_id = struct.atom_array.res_id[atom1]
92
+ chain_id_to_bonded_position[lig_chain_id] = (entity_id, res_id)
93
+ return chain_id_to_bonded_position
94
+
95
+ def _get_ban_set_by_lig_bonded_position(
96
+ self,
97
+ ) -> set[tuple[str, str]]:
98
+ ref_chain_id_to_bond_position = (
99
+ ChainPermutation.find_bonded_position_for_lig_chains(self.ref_struct)
100
+ )
101
+ model_chain_id_to_bond_position = (
102
+ ChainPermutation.find_bonded_position_for_lig_chains(self.model_struct)
103
+ )
104
+
105
+ ban_set = set()
106
+ for ref_chain_id in np.unique(self.ref_struct.uni_chain_id):
107
+ ref_bonded_entity, ref_bonded_res_id = ref_chain_id_to_bond_position.get(
108
+ ref_chain_id, ["-1", -1]
109
+ )
110
+ mapped_model_bonded_entity = self.ref_to_model_entity_id.get(
111
+ ref_bonded_entity, "-1"
112
+ )
113
+
114
+ for model_chain_id in np.unique(self.model_struct.uni_chain_id):
115
+ (
116
+ model_bonded_entity,
117
+ model_bonded_res_id,
118
+ ) = model_chain_id_to_bond_position.get(model_chain_id, ["-1", -1])
119
+
120
+ if mapped_model_bonded_entity != "-1" and model_bonded_entity != "-1":
121
+ if (mapped_model_bonded_entity != model_bonded_entity) or (
122
+ ref_bonded_res_id != model_bonded_res_id
123
+ ):
124
+ ban_set.add((ref_chain_id, model_chain_id))
125
+ return ban_set
126
+
62
127
  def find_model_anchor_chains(self) -> str:
63
128
  """
64
129
  Ref: AlphaFold3 SI Chapter 4.2. -> AlphaFold Multimer Chapter 7.3.1
@@ -180,15 +245,22 @@ class ChainPermutation:
180
245
  row_indices = []
181
246
  col_indices = []
182
247
 
248
+ dist_matrix_copy = dist_matrix.copy()
183
249
  for _ in range(num_cols):
184
- min_pos = np.unravel_index(np.argmin(dist_matrix), dist_matrix.shape)
250
+ min_pos = np.unravel_index(
251
+ np.argmin(dist_matrix_copy), dist_matrix_copy.shape
252
+ )
253
+
254
+ if dist_matrix_copy[min_pos[0], min_pos[1]] == np.inf:
255
+ # No more valid pairs
256
+ break
185
257
 
186
258
  row_indices.append(min_pos[0])
187
259
  col_indices.append(min_pos[1])
188
260
 
189
261
  # Set the found row and column to np.inf to ignore it
190
- dist_matrix[min_pos[0], :] = np.inf
191
- dist_matrix[:, min_pos[1]] = np.inf
262
+ dist_matrix_copy[min_pos[0], :] = np.inf
263
+ dist_matrix_copy[:, min_pos[1]] = np.inf
192
264
  return row_indices, col_indices
193
265
 
194
266
  @staticmethod
@@ -199,6 +271,7 @@ class ChainPermutation:
199
271
  struct2: Structure,
200
272
  coords1: np.ndarray,
201
273
  coords2: np.ndarray,
274
+ banned_chain_pairs: set[tuple[str, str]],
202
275
  ) -> dict[str, str]:
203
276
  """
204
277
  Chain mapping between two structures within the same entity using
@@ -212,12 +285,13 @@ class ChainPermutation:
212
285
  - Selecting the pair with minimal centroid distance
213
286
 
214
287
  Args:
215
- chain_ids1 (list[str]): Chain IDs from first structure (may become struct2 after swap)
216
- chain_ids2 (list[str]): Chain IDs from second structure (may become struct1 after swap)
288
+ chain_ids1 (list[str]): Chain IDs from first structure
289
+ chain_ids2 (list[str]): Chain IDs from second structure
217
290
  struct1 (Structure): First structure containing chain metadata
218
291
  struct2 (Structure): Second structure containing chain metadata
219
292
  coords1 (np.ndarray): Atom coordinates for struct1 (shape: [N, 3])
220
- coords2 (np.ndarray): Atom coordinates for struct2 (shape: [M, 3])
293
+ coords2 (np.ndarray): Atom coordinates for struct2 (shape: [N, 3])
294
+ banned_chain_pairs (set[tuple[str, str]]): Pairs of chain IDs to be banned.
221
295
 
222
296
  Returns:
223
297
  dict[str, str]: Mapping of chain IDs from struct1 to struct2.
@@ -227,6 +301,8 @@ class ChainPermutation:
227
301
  chain_ids1, chain_ids2 = chain_ids2, chain_ids1
228
302
  struct1, struct2 = struct2, struct1
229
303
  coords1, coords2 = coords2, coords1
304
+ banned_chain_pairs = {(cid2, cid1) for cid1, cid2 in banned_chain_pairs}
305
+
230
306
  swapped = True
231
307
  else:
232
308
  swapped = False
@@ -241,6 +317,10 @@ class ChainPermutation:
241
317
  atoms1 = struct1.uni_atom_id[mask1]
242
318
 
243
319
  for cid2 in chain_ids2:
320
+ if (cid1, cid2) in banned_chain_pairs:
321
+ # "inf" distance if the pair is banned
322
+ continue
323
+
244
324
  mask2 = struct2.uni_chain_id == cid2
245
325
  atoms2 = struct2.uni_atom_id[mask2]
246
326
 
@@ -258,6 +338,9 @@ class ChainPermutation:
258
338
  matched_chains = {}
259
339
  row_indices, col_indices = ChainPermutation._find_min_indices(dist_mat)
260
340
  for row, col in zip(row_indices, col_indices):
341
+ if np.isinf(dist_mat[row, col]):
342
+ # "inf" distance if the pair is banned
343
+ continue
261
344
  matched_chains[chain_ids1[row]] = chain_ids2[col]
262
345
 
263
346
  return (
@@ -309,6 +392,7 @@ class ChainPermutation:
309
392
  struct2=self.model_struct,
310
393
  coords1=aligned_ref_coord,
311
394
  coords2=self.model_struct.atom_array.coord,
395
+ banned_chain_pairs=self.ref_and_model_mapping_ban_set,
312
396
  )
313
397
  matched_chains.update(matched_chains_in_curr_entity)
314
398
  return matched_chains
@@ -443,6 +527,12 @@ class ChainPermutation:
443
527
  anchors = {}
444
528
 
445
529
  for ref_anchor_chain_id in ref_anchor_candidates:
530
+ if (
531
+ ref_anchor_chain_id,
532
+ model_anchor_chain_id,
533
+ ) in self.ref_and_model_mapping_ban_set:
534
+ continue
535
+
446
536
  # Find atoms in ref chain to match atoms in model chain
447
537
  ref_chain_mask = self.ref_struct.uni_chain_id == ref_anchor_chain_id
448
538
  ref_anchor_coord = self.ref_struct.atom_array.coord[ref_chain_mask]
@@ -555,9 +645,6 @@ class ChainPermutation:
555
645
  - Anchors used for alignment.
556
646
  """
557
647
  model_anchor_chain_ids = self.find_model_anchor_chains()
558
- if not self.enumerate_all_anchors:
559
- # Only use the first anchor chain
560
- model_anchor_chain_ids = [model_anchor_chain_ids[0]]
561
648
 
562
649
  ref_to_model_optimal_mapping = None
563
650
  best_rmsd = float("inf")
@@ -579,8 +666,17 @@ class ChainPermutation:
579
666
  ) = self.find_ref_to_model_optimal_chain_mapping(
580
667
  model_anchor_chain_id, ref_anchor_candidates
581
668
  )
669
+
670
+ if mapping_i is None:
671
+ continue
672
+
582
673
  if best_rmsd_i < best_rmsd:
583
674
  best_rmsd = best_rmsd_i
584
675
  ref_to_model_optimal_mapping = mapping_i
585
676
  best_anchors = anchors
677
+
678
+ if not self.enumerate_all_anchors:
679
+ # Only use the first valid anchor chain
680
+ break
681
+
586
682
  return ref_to_model_optimal_mapping, best_anchors
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pxmeter
3
- Version: 0.1.3
3
+ Version: 0.1.5
4
4
  Summary: PXMeter is a comprehensive toolkit for evaluating the quality of structures generated by biomolecular structure prediction models.
5
5
  Author: Bytedance Inc.
6
6
  Author-email: ai4s-bio@bytedance.com
@@ -20,7 +20,7 @@ with open("requirements.txt") as f:
20
20
  setup(
21
21
  name="pxmeter",
22
22
  python_requires=">=3.11",
23
- version="0.1.3",
23
+ version="0.1.5",
24
24
  description="PXMeter is a comprehensive toolkit for evaluating the quality of \
25
25
  structures generated by biomolecular structure prediction models.",
26
26
  author="Bytedance Inc.",
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes