gemmi-protools 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of gemmi-protools might be problematic. Click here for more details.
- gemmi_protools/io/reader.py +35 -79
- gemmi_protools/tools/pdb_annot.py +23 -0
- {gemmi_protools-1.0.3.dist-info → gemmi_protools-1.0.4.dist-info}/METADATA +2 -2
- {gemmi_protools-1.0.3.dist-info → gemmi_protools-1.0.4.dist-info}/RECORD +7 -7
- {gemmi_protools-1.0.3.dist-info → gemmi_protools-1.0.4.dist-info}/WHEEL +0 -0
- {gemmi_protools-1.0.3.dist-info → gemmi_protools-1.0.4.dist-info}/licenses/LICENSE +0 -0
- {gemmi_protools-1.0.3.dist-info → gemmi_protools-1.0.4.dist-info}/top_level.txt +0 -0
gemmi_protools/io/reader.py
CHANGED
|
@@ -1,19 +1,28 @@
|
|
|
1
1
|
import gzip
|
|
2
2
|
import io
|
|
3
|
-
import itertools
|
|
4
3
|
import pathlib
|
|
5
4
|
import random
|
|
6
5
|
import string
|
|
7
6
|
from collections import defaultdict
|
|
8
|
-
from copy import deepcopy
|
|
9
7
|
from typing import Dict, Optional, List
|
|
10
8
|
|
|
11
9
|
import gemmi
|
|
12
10
|
import numpy as np
|
|
13
11
|
import pandas as pd
|
|
14
|
-
from joblib import Parallel, delayed
|
|
15
12
|
from scipy.spatial import cKDTree
|
|
16
13
|
|
|
14
|
+
ATOM = [("chain_name", "U5"),
|
|
15
|
+
("residue_num", "i4"),
|
|
16
|
+
("residue_icode", "U3"),
|
|
17
|
+
("residue_name", "U5"),
|
|
18
|
+
("atom_name", "U5"),
|
|
19
|
+
("element", "U3"),
|
|
20
|
+
("charge", "i1"),
|
|
21
|
+
("b_factor", "f4"),
|
|
22
|
+
("occupancy", "f4"),
|
|
23
|
+
("coordinate", ("f4", (3,)))
|
|
24
|
+
]
|
|
25
|
+
|
|
17
26
|
|
|
18
27
|
def is_pdb(path: str) -> bool:
|
|
19
28
|
"""
|
|
@@ -318,8 +327,6 @@ class StructureParser(object):
|
|
|
318
327
|
del self.STRUCT[idx]
|
|
319
328
|
|
|
320
329
|
self.MODEL = self.STRUCT[0]
|
|
321
|
-
self.STRUCT.remove_alternative_conformations()
|
|
322
|
-
self.STRUCT.remove_hydrogens()
|
|
323
330
|
self.STRUCT.remove_empty_chains()
|
|
324
331
|
self._update_full_sequences()
|
|
325
332
|
|
|
@@ -774,17 +781,23 @@ class StructureParser(object):
|
|
|
774
781
|
new_struct.INFO["source"] = src
|
|
775
782
|
return new_struct
|
|
776
783
|
|
|
777
|
-
def clean_structure(self, remove_ligand=True):
|
|
784
|
+
def clean_structure(self, remove_ligand=False, remove_hydrogen=True):
|
|
778
785
|
"""
|
|
779
786
|
Remove water by default
|
|
780
787
|
|
|
781
|
-
:param remove_ligand:
|
|
788
|
+
:param remove_ligand: bool, default False
|
|
789
|
+
:param remove_hydrogen: bool, default True
|
|
782
790
|
:return:
|
|
783
791
|
"""
|
|
792
|
+
self.STRUCT.remove_alternative_conformations()
|
|
793
|
+
|
|
794
|
+
if remove_hydrogen:
|
|
795
|
+
self.STRUCT.remove_hydrogens()
|
|
796
|
+
|
|
784
797
|
if remove_ligand:
|
|
785
|
-
self.STRUCT.remove_waters()
|
|
786
|
-
else:
|
|
787
798
|
self.STRUCT.remove_ligands_and_waters()
|
|
799
|
+
else:
|
|
800
|
+
self.STRUCT.remove_waters()
|
|
788
801
|
|
|
789
802
|
self.STRUCT.remove_empty_chains()
|
|
790
803
|
self.update_entity()
|
|
@@ -799,11 +812,12 @@ class StructureParser(object):
|
|
|
799
812
|
atom.name = 'SE'
|
|
800
813
|
atom.element = gemmi.Element('Se')
|
|
801
814
|
|
|
802
|
-
def get_atoms(self, arg: str = "*"):
|
|
815
|
+
def get_atoms(self, arg: str = "*", exclude_hydrogen=False):
|
|
803
816
|
"""
|
|
804
817
|
|
|
805
818
|
:param arg: str, "*", "/1/*//N,CA,C,O", "/1/*"
|
|
806
819
|
see gemmi.Selection
|
|
820
|
+
:param exclude_hydrogen: bool, default False
|
|
807
821
|
:return:
|
|
808
822
|
np.ndarray
|
|
809
823
|
"""
|
|
@@ -814,6 +828,9 @@ class StructureParser(object):
|
|
|
814
828
|
for chain in sel.chains(model):
|
|
815
829
|
for residue in sel.residues(chain):
|
|
816
830
|
for atom in sel.atoms(residue):
|
|
831
|
+
if exclude_hydrogen and atom.is_hydrogen():
|
|
832
|
+
continue
|
|
833
|
+
|
|
817
834
|
val = (chain.name,
|
|
818
835
|
residue.seqid.num,
|
|
819
836
|
residue.seqid.icode,
|
|
@@ -827,23 +844,12 @@ class StructureParser(object):
|
|
|
827
844
|
)
|
|
828
845
|
res.append(val)
|
|
829
846
|
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
("charge", "i1"),
|
|
837
|
-
("b_factor", "f4"),
|
|
838
|
-
("occupancy", "f4"),
|
|
839
|
-
("coordinate", ("f4", (3,)))
|
|
840
|
-
]
|
|
841
|
-
return np.array(res, dtype=dtype)
|
|
842
|
-
|
|
843
|
-
def polymer_interface_residues(self,
|
|
844
|
-
chains_x: List[str],
|
|
845
|
-
chains_y: List[str],
|
|
846
|
-
threshold: float = 4.5):
|
|
847
|
+
return np.array(res, dtype=ATOM)
|
|
848
|
+
|
|
849
|
+
def compute_interface(self,
|
|
850
|
+
chains_x: List[str],
|
|
851
|
+
chains_y: List[str],
|
|
852
|
+
threshold: float = 5.0):
|
|
847
853
|
"""
|
|
848
854
|
Identify PPI among protein, DNA, RNA using heavy atom distances.
|
|
849
855
|
:param chains_x:
|
|
@@ -860,23 +866,8 @@ class StructureParser(object):
|
|
|
860
866
|
% (ch, " ".join(list(self.polymer_types.keys())))
|
|
861
867
|
)
|
|
862
868
|
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
protein_atoms = ['N', 'ND1', 'ND2', 'NE', 'NE1', 'NE2', 'NH1', 'NH2', 'NZ',
|
|
866
|
-
'O', 'OD1', 'OD2', 'OE1', 'OE2', 'OG', 'OG1', 'OH',
|
|
867
|
-
'SD', 'SG']
|
|
868
|
-
xna_atoms = ['N1', 'N2', 'N3', 'N4', 'N6', 'N7', 'N9',
|
|
869
|
-
'O2', "O2'", "O3'", 'O4', "O4'", "O5'", 'O6',
|
|
870
|
-
'OP1', 'OP2', 'OP3', 'P']
|
|
871
|
-
tag = "/1/%s//%s" % (",".join(chains), ",".join(protein_atoms + xna_atoms))
|
|
872
|
-
z = struct.get_atoms(tag)
|
|
873
|
-
return z
|
|
874
|
-
|
|
875
|
-
query_struct = deepcopy(self)
|
|
876
|
-
query_struct.clean_structure(remove_ligand=True)
|
|
877
|
-
|
|
878
|
-
atom_x = ppi_atoms(query_struct, chains_x)
|
|
879
|
-
atom_y = ppi_atoms(query_struct, chains_y)
|
|
869
|
+
atom_x = self.get_atoms("/1/%s" % ",".join(chains_x), exclude_hydrogen=True)
|
|
870
|
+
atom_y = self.get_atoms("/1/%s" % ",".join(chains_y), exclude_hydrogen=True)
|
|
880
871
|
|
|
881
872
|
kd_tree_x = cKDTree(atom_x["coordinate"])
|
|
882
873
|
kd_tree_y = cKDTree(atom_y["coordinate"])
|
|
@@ -886,38 +877,3 @@ class StructureParser(object):
|
|
|
886
877
|
y_res = np.unique(atom_y[pairs.col][["chain_name", "residue_num", "residue_icode", "residue_name"]])
|
|
887
878
|
|
|
888
879
|
return x_res, y_res
|
|
889
|
-
|
|
890
|
-
def polymer_interface_residues_all(self, ppi_threshold: float = 4.5, n_cpus: int = 4):
|
|
891
|
-
"""
|
|
892
|
-
Identify PPI among protein, DNA, RNA using heavy atom distances between all chain pairs.
|
|
893
|
-
|
|
894
|
-
:param ppi_threshold:
|
|
895
|
-
:param n_cpus:
|
|
896
|
-
:return:
|
|
897
|
-
"""
|
|
898
|
-
chains = list(self.polymer_types.keys())
|
|
899
|
-
ch_pairs = list(itertools.combinations(chains, r=2))
|
|
900
|
-
ch_pairs.sort()
|
|
901
|
-
|
|
902
|
-
def _run(ch_1, ch_2):
|
|
903
|
-
key = "%s/%s" % (ch_1, ch_2)
|
|
904
|
-
res_x, res_y = self.polymer_interface_residues(chains_x=[ch_1], chains_y=[ch_2], threshold=ppi_threshold)
|
|
905
|
-
|
|
906
|
-
if len(res_x) > 0:
|
|
907
|
-
vx = ["%s/%d/%s/%s" % (a, b, c.strip(), d) for a, b, c, d in res_x.tolist()]
|
|
908
|
-
vy = ["%s/%d/%s/%s" % (a, b, c.strip(), d) for a, b, c, d in res_y.tolist()]
|
|
909
|
-
return {key: [vx, vy]}
|
|
910
|
-
else:
|
|
911
|
-
return dict()
|
|
912
|
-
|
|
913
|
-
cpu2use = max(min(n_cpus, len(ch_pairs)), 1)
|
|
914
|
-
|
|
915
|
-
outputs = dict()
|
|
916
|
-
if cpu2use == 1 or len(ch_pairs) < 50:
|
|
917
|
-
for ch_1, ch_2 in ch_pairs:
|
|
918
|
-
outputs.update(_run(ch_1, ch_2))
|
|
919
|
-
else:
|
|
920
|
-
results = Parallel(n_jobs=cpu2use)(delayed(_run)(c1, c2) for c1, c2 in ch_pairs)
|
|
921
|
-
for item in results:
|
|
922
|
-
outputs.update(item)
|
|
923
|
-
return outputs
|
|
@@ -10,6 +10,7 @@ import uuid
|
|
|
10
10
|
from collections import defaultdict
|
|
11
11
|
from importlib.resources import files
|
|
12
12
|
|
|
13
|
+
import numpy as np
|
|
13
14
|
from anarci import run_anarci
|
|
14
15
|
from anarci.germlines import all_germlines
|
|
15
16
|
|
|
@@ -37,10 +38,21 @@ def get_fv_region(in_sequence: str):
|
|
|
37
38
|
)
|
|
38
39
|
|
|
39
40
|
mapper = dict()
|
|
41
|
+
num_mapper = dict()
|
|
40
42
|
for k, v in imgt_scheme.items():
|
|
41
43
|
for i in range(v[0], v[1] + 1):
|
|
42
44
|
mapper[i] = k
|
|
43
45
|
|
|
46
|
+
if k == "cdr1":
|
|
47
|
+
ki = 1
|
|
48
|
+
elif k == "cdr2":
|
|
49
|
+
ki = 2
|
|
50
|
+
elif k == "cdr3":
|
|
51
|
+
ki = 3
|
|
52
|
+
else:
|
|
53
|
+
ki = 0
|
|
54
|
+
num_mapper[i] = ki
|
|
55
|
+
|
|
44
56
|
inputs = [("input", in_sequence)]
|
|
45
57
|
_, numbered, alignment_details, _ = run_anarci(inputs, scheme="imgt", assign_germline=True)
|
|
46
58
|
if numbered[0] is None:
|
|
@@ -49,6 +61,16 @@ def get_fv_region(in_sequence: str):
|
|
|
49
61
|
outputs = []
|
|
50
62
|
for cur_numbered, cur_details in zip(numbered[0], alignment_details[0]):
|
|
51
63
|
aligned_sites, start, end = cur_numbered
|
|
64
|
+
# add mask
|
|
65
|
+
# 9 for not Fv region
|
|
66
|
+
# 0 for non-CDR region, 1, 2, 3 for CDR region for the current Fv
|
|
67
|
+
mask = np.full(len(in_sequence), fill_value=9, dtype=np.int8)
|
|
68
|
+
mask[start: end + 1] = 0
|
|
69
|
+
i = 0
|
|
70
|
+
for (site_num, _), site_aa in aligned_sites:
|
|
71
|
+
if site_aa != "-":
|
|
72
|
+
mask[i + start] = num_mapper[site_num]
|
|
73
|
+
i += 1
|
|
52
74
|
|
|
53
75
|
# region_seq
|
|
54
76
|
regions = defaultdict(list)
|
|
@@ -119,6 +141,7 @@ def get_fv_region(in_sequence: str):
|
|
|
119
141
|
cdr1_aa=cdr1_seq,
|
|
120
142
|
cdr2_aa=cdr2_seq,
|
|
121
143
|
cdr3_aa=cdr3_seq,
|
|
144
|
+
mask="".join([str(i) for i in mask.tolist()])
|
|
122
145
|
)
|
|
123
146
|
)
|
|
124
147
|
return outputs
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gemmi_protools
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.4
|
|
4
4
|
Summary: An Enhanced tool to process PDB structures based on Gemmi
|
|
5
5
|
Author: Luo Jiejian
|
|
6
6
|
Author-email: Luo Jiejian <luojiejian12@mails.ucas.ac.cn>
|
|
@@ -27,7 +27,7 @@ Dynamic: license-file
|
|
|
27
27
|
# Install
|
|
28
28
|
```commandline
|
|
29
29
|
|
|
30
|
-
conda install python=3.12.9 anarci hmmer dockq trimesh rtree -c bioconda -c conda-forge
|
|
30
|
+
conda install python=3.12.9 anarci hmmer dockq trimesh rtree pdbfixer -c bioconda -c conda-forge
|
|
31
31
|
pip install gemmi_protools
|
|
32
32
|
```
|
|
33
33
|
|
|
@@ -6,14 +6,14 @@ gemmi_protools/data/MHC/MHC_combined.hmm.h3m,sha256=CvNMCsobQiX-wL7iB4CreNcbpnEl
|
|
|
6
6
|
gemmi_protools/data/MHC/MHC_combined.hmm.h3p,sha256=-mK278pRedG3-KL-DtuVAQy7La9DgXg5FcP89D6X3Ck,78325
|
|
7
7
|
gemmi_protools/io/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
|
|
8
8
|
gemmi_protools/io/convert.py,sha256=A1i1vPgxG1LqMSUvWtegLl9LipgUQbfmKeGJ_f00UYo,3781
|
|
9
|
-
gemmi_protools/io/reader.py,sha256
|
|
9
|
+
gemmi_protools/io/reader.py,sha256=drJ8WO_N8VR3KAmvEJBsHHAgZj-hzfnM3fyIV1uc0gg,32047
|
|
10
10
|
gemmi_protools/tools/__init__.py,sha256=F6e1xNT_7lZAWQgNIneH06o2qtWYrHNr_xPUPTwwx5E,29
|
|
11
11
|
gemmi_protools/tools/align.py,sha256=oKHvpeDa62zEjLkPmuyBM6avYDl3HFeJVHeRX62I2f4,7085
|
|
12
12
|
gemmi_protools/tools/dockq.py,sha256=baCuO5-GZCwrlS59T5UIXogpM44OIFIfXqksqRBAb0A,4428
|
|
13
13
|
gemmi_protools/tools/mesh.py,sha256=73MuJYwS_ACJI15OsrooAAhB1Ti4fM8CJSBqFOBR7LU,6537
|
|
14
|
-
gemmi_protools/tools/pdb_annot.py,sha256=
|
|
15
|
-
gemmi_protools-1.0.
|
|
16
|
-
gemmi_protools-1.0.
|
|
17
|
-
gemmi_protools-1.0.
|
|
18
|
-
gemmi_protools-1.0.
|
|
19
|
-
gemmi_protools-1.0.
|
|
14
|
+
gemmi_protools/tools/pdb_annot.py,sha256=fjEB7xmY-SmctW_lICtf2GZ63Odu9nADlyJuOen4W8o,8906
|
|
15
|
+
gemmi_protools-1.0.4.dist-info/licenses/LICENSE,sha256=JuQvKcgj6n11y5y6nXr9rABv3gJSswc4eTCd5WZBtSY,1062
|
|
16
|
+
gemmi_protools-1.0.4.dist-info/METADATA,sha256=HU5R2A-uwFbMcqDTcVeNPr0U3L0XrmeYVA8DihfWfiI,1043
|
|
17
|
+
gemmi_protools-1.0.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
18
|
+
gemmi_protools-1.0.4.dist-info/top_level.txt,sha256=P12mYJi5O5EKIn5u-RFaWxuix431CgLacSRD7rBid_U,15
|
|
19
|
+
gemmi_protools-1.0.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|