EntDetect 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. EntDetect/Jwalk/GridTools.py +567 -0
  2. EntDetect/Jwalk/PDBTools.py +532 -0
  3. EntDetect/Jwalk/SASDTools.py +543 -0
  4. EntDetect/Jwalk/SurfaceTools.py +150 -0
  5. EntDetect/Jwalk/__init__.py +19 -0
  6. EntDetect/Jwalk/naccess.config.txt +255 -0
  7. EntDetect/__init__.py +10 -0
  8. EntDetect/_logging.py +71 -0
  9. EntDetect/change_resolution.py +2361 -0
  10. EntDetect/clustering.py +2626 -0
  11. EntDetect/compare_sim2exp.py +1927 -0
  12. EntDetect/entanglement_features.py +478 -0
  13. EntDetect/gaussian_entanglement.py +2067 -0
  14. EntDetect/order_params.py +1048 -0
  15. EntDetect/resources/__init__.py +11 -0
  16. EntDetect/resources/__pycache__/__init__.cpython-311.pyc +0 -0
  17. EntDetect/resources/calc_K.pl +712 -0
  18. EntDetect/resources/calc_Q.pl +962 -0
  19. EntDetect/resources/pulchra +0 -0
  20. EntDetect/resources/shared_files/__init__.py +2 -0
  21. EntDetect/resources/shared_files/bt_contact_potential.dat +22 -0
  22. EntDetect/resources/shared_files/karanicolas_dihe_parm.dat +1600 -0
  23. EntDetect/resources/shared_files/kgs_contact_potential.dat +22 -0
  24. EntDetect/resources/shared_files/mj_contact_potential.dat +22 -0
  25. EntDetect/resources/stride +0 -0
  26. EntDetect/statistics.py +1344 -0
  27. EntDetect/utilities.py +201 -0
  28. entdetect-1.2.0.dist-info/METADATA +26 -0
  29. entdetect-1.2.0.dist-info/RECORD +45 -0
  30. entdetect-1.2.0.dist-info/WHEEL +5 -0
  31. entdetect-1.2.0.dist-info/entry_points.txt +11 -0
  32. entdetect-1.2.0.dist-info/licenses/LICENSE +674 -0
  33. entdetect-1.2.0.dist-info/top_level.txt +2 -0
  34. scripts/__init__.py +5 -0
  35. scripts/convert_cor_psf_to_pdb.py +103 -0
  36. scripts/run_Foldingpathway.py +162 -0
  37. scripts/run_MSM.py +152 -0
  38. scripts/run_OP_on_simulation_traj.py +194 -0
  39. scripts/run_change_resolution.py +63 -0
  40. scripts/run_compare_sim2exp.py +215 -0
  41. scripts/run_montecarlo.py +158 -0
  42. scripts/run_nativeNCLE.py +179 -0
  43. scripts/run_nonnative_entanglement_clustering.py +110 -0
  44. scripts/run_population_modeling.py +117 -0
  45. scripts/run_workflow4_nativeNCLE_batch.py +412 -0
EntDetect/utilities.py ADDED
@@ -0,0 +1,201 @@
1
+ import logging
2
+ import pandas as pd
3
+ import numpy as np
4
+ from Bio.PDB import PDBParser, PDBIO
5
+ import pathlib
6
+ import os
7
+ from EntDetect._logging import setup_logger
8
+
9
+ class PDBcleaner:
10
+ """
11
+ Class to clean a PDB before entanglement analysis by:
12
+ 1. Checking for duplicate residues
13
+ """
14
+
15
+ #############################################################
16
+ def __init__(self, pdb:str, outdir:str='./', log_level:int=logging.INFO, logdir:str=None) -> None:
17
+ """
18
+ Load the PDB file and initate the PDBcleaner class
19
+ """
20
+ self.outdir = outdir
21
+ self.logger = setup_logger('PDBcleaner', outdir=logdir if logdir is not None else outdir, log_level=log_level)
22
+
23
+ parser = PDBParser()
24
+ structure = parser.get_structure('protein', pdb)
25
+ self.logger.debug(f'structure: {structure}')
26
+ self.structure = structure
27
+ self.pdb_filename = pathlib.Path(pdb).stem
28
+ self.logger.debug(f'pdb_filename: {self.pdb_filename}')
29
+
30
+ ## make a tmp directory to populate with cleaned PDBs if it doesnt already exists
31
+ if not os.path.exists(outdir):
32
+ os.mkdir(outdir)
33
+ #############################################################
34
+
35
+ #############################################################
36
+ def remove_duplicates(self, pdb:str='None'):
37
+ """
38
+ Remove duplicate residues that are present in the PDB
39
+ """
40
+ ## load the PDB file if provided
41
+ if pdb != 'None':
42
+ self.logger.debug(f'Reading PDB file: {pdb}')
43
+ parser = PDBParser()
44
+ structure = parser.get_structure('protein', pdb)
45
+ self.logger.debug(f'structure: {structure}')
46
+ self.structure = structure
47
+ self.pdb_filename = pathlib.Path(pdb).stem
48
+ self.logger.debug(f'pdb_filename: {self.pdb_filename}')
49
+
50
+ ## define the output pdb and directories
51
+ clean_outdir = os.path.join(self.outdir, 'cleanPDB_tmp/')
52
+ if not os.path.exists(clean_outdir):
53
+ os.mkdir(clean_outdir)
54
+ self.logger.debug(f'Made directory: {clean_outdir}')
55
+
56
+ output_pdb = os.path.join(clean_outdir, f'{self.pdb_filename}_removed_duplicates.pdb')
57
+
58
+ # Iterate over residues and identify disulfide bonds
59
+ for model in self.structure:
60
+ self.logger.debug(f'Model: {model}')
61
+
62
+ for chain in model:
63
+ self.logger.debug(f' Chain: {chain}')
64
+ residues_to_keep = []
65
+
66
+ for residue in chain:
67
+ resname = residue.get_resname()
68
+ self.logger.debug(f' Residue: {residue} {residue.get_id()} {resname} {residue.__repr__}')
69
+
70
+ ## check if there are any residues with alternate locs
71
+ residue_alts = False
72
+ for atom in residue:
73
+ self.logger.debug(f' Atom: {atom} {atom.get_altloc()}')
74
+ if atom.get_altloc() != ' ':
75
+ residue_alts = True
76
+ self.logger.debug(f' residue_alts: {residue_alts}')
77
+
78
+ ## if there are no alternate locs and this residues is not an insertion then keep it
79
+ hetflag, resseq, icode = residue.id
80
+ if icode == ' ' and residue_alts == False and hetflag == ' ': # Keep only residues with no insertion code
81
+ residues_to_keep.append(residue)
82
+
83
+
84
+ # Remove all residues from the chain, then re-add only the ones without insertion code
85
+ for residue in list(chain):
86
+ chain.detach_child(residue.id)
87
+ for residue in residues_to_keep:
88
+ chain.add(residue)
89
+
90
+ # Save filtered structure
91
+ io = PDBIO()
92
+ io.set_structure(self.structure)
93
+ io.save(output_pdb)
94
+ self.logger.info(f'SAVED: {output_pdb}')
95
+
96
+ return output_pdb
97
+ #############################################################
98
+
99
+ #############################################################
100
+ def remove_incomplete(self, pdb:str='None'):
101
+ """
102
+ Remove incomplete residues that are present in the PDB
103
+ """
104
+ expected_heavy_atoms = {
105
+ "ALA": 5,
106
+ "ARG": 11,
107
+ "ASN": 8,
108
+ "ASP": 8,
109
+ "CYS": 6,
110
+ "GLU": 9,
111
+ "GLN": 9,
112
+ "GLY": 4,
113
+ "HIS": 10,
114
+ "ILE": 8,
115
+ "LEU": 8,
116
+ "LYS": 9,
117
+ "MET": 8,
118
+ "PHE": 11,
119
+ "PRO": 7,
120
+ "SER": 6,
121
+ "THR": 7,
122
+ "TRP": 14,
123
+ "TYR": 12,
124
+ "VAL": 7,
125
+ }
126
+
127
+ ## load the PDB file if provided
128
+ if pdb != 'None':
129
+ self.logger.debug(f'Reading PDB file: {pdb}')
130
+ parser = PDBParser()
131
+ structure = parser.get_structure('protein', pdb)
132
+ self.logger.debug(f'structure: {structure}')
133
+ self.structure = structure
134
+ self.pdb_filename = pathlib.Path(pdb).stem
135
+ self.logger.debug(f'pdb_filename: {self.pdb_filename}')
136
+
137
+ ## define the output pdb and directories
138
+ clean_outdir = os.path.join(self.outdir, 'cleanPDB_tmp/')
139
+ if not os.path.exists(clean_outdir):
140
+ os.mkdir(clean_outdir)
141
+ self.logger.debug(f'Made directory: {clean_outdir}')
142
+
143
+ output_pdb = os.path.join(clean_outdir, f'{self.pdb_filename}_removed_incomplete.pdb')
144
+
145
+ # Iterate over residues and identify disulfide bonds
146
+ for model in self.structure:
147
+ self.logger.debug(f'Model: {model}')
148
+
149
+ for chain in model:
150
+ self.logger.debug(f' Chain: {chain}')
151
+ residues_to_keep = []
152
+
153
+ for residue in chain:
154
+ resname = residue.get_resname()
155
+ self.logger.debug(f' Residue: {residue} {residue.get_id()} {resname} {residue.__repr__}')
156
+
157
+ ## check if the residue is complete
158
+ residue_complete = False
159
+ if resname in expected_heavy_atoms:
160
+ num_heavy_atoms = sum(1 for atom in residue if atom.element != 'H')
161
+ self.logger.debug(f' Number of heavy atoms: {num_heavy_atoms}')
162
+ if num_heavy_atoms == expected_heavy_atoms[resname]:
163
+ residue_complete = True
164
+ self.logger.debug(f' Complete residue: {residue} with {num_heavy_atoms} heavy atoms, expected {expected_heavy_atoms[resname]}')
165
+ else:
166
+ self.logger.debug(f' Incomplete residue: {residue} with {num_heavy_atoms} heavy atoms, expected {expected_heavy_atoms[resname]}')
167
+ else:
168
+ self.logger.debug(f' Unknown residue type: {resname}, keeping it by default')
169
+ residue_complete = False
170
+
171
+ ## check if there are any residues with alternate locs
172
+ residue_alts = False
173
+ for atom in residue:
174
+ self.logger.debug(f' Atom: {atom} {atom.get_altloc()}')
175
+ if atom.get_altloc() != ' ':
176
+ residue_alts = True
177
+ self.logger.debug(f' residue_alts: {residue_alts}')
178
+
179
+ ## if there are no alternate locs and this residues is not an insertion then keep it
180
+ hetflag, resseq, icode = residue.id
181
+ if icode == ' ' and residue_alts == False and hetflag == ' ' and residue_complete: # Keep only residues with no insertion code
182
+ residues_to_keep.append(residue)
183
+
184
+
185
+ # Remove all residues from the chain, then re-add only the ones without insertion code
186
+ for residue in list(chain):
187
+ chain.detach_child(residue.id)
188
+ for residue in residues_to_keep:
189
+ chain.add(residue)
190
+
191
+ # Save filtered structure
192
+ io = PDBIO()
193
+ io.set_structure(self.structure)
194
+ io.save(output_pdb)
195
+ self.logger.info(f'SAVED: {output_pdb}')
196
+
197
+ return output_pdb
198
+ #############################################################
199
+
200
+ #####################################################################
201
+ #####################################################################
@@ -0,0 +1,26 @@
1
+ Metadata-Version: 2.4
2
+ Name: EntDetect
3
+ Version: 1.2.0
4
+ Summary: Entanglement Detection in Protein Structures
5
+ License-File: LICENSE
6
+ Requires-Dist: biopython
7
+ Requires-Dist: numpy
8
+ Requires-Dist: scipy
9
+ Requires-Dist: pandas
10
+ Requires-Dist: MDAnalysis
11
+ Requires-Dist: mdtraj
12
+ Requires-Dist: parmed
13
+ Requires-Dist: numba
14
+ Requires-Dist: topoly
15
+ Requires-Dist: geom_median
16
+ Requires-Dist: matplotlib
17
+ Requires-Dist: seaborn
18
+ Requires-Dist: scikit-learn
19
+ Requires-Dist: networkx
20
+ Requires-Dist: pyyaml
21
+ Requires-Dist: tqdm
22
+ Requires-Dist: requests
23
+ Requires-Dist: statsmodels
24
+ Dynamic: license-file
25
+ Dynamic: requires-dist
26
+ Dynamic: summary
@@ -0,0 +1,45 @@
1
+ EntDetect/__init__.py,sha256=3SOShWtCJih2HZ0k1U-GRu6LuQAzr_LCSZEiLFnAw0c,307
2
+ EntDetect/_logging.py,sha256=9yn8bpAHO-6gBBQZVjP7fvWxryTVn3iqfwzWPzMymA8,2195
3
+ EntDetect/change_resolution.py,sha256=GTB2WccnlOf1oaIatlnC9usyRbIix-qrDX2hP-V2jfU,117853
4
+ EntDetect/clustering.py,sha256=fBMpiJVMfy_lo23jWjlzQWtFLJWOOUJ7lMwJm_5eXiA,135711
5
+ EntDetect/compare_sim2exp.py,sha256=TjqrELfU_ypY78oCS-r0ttzWhCH5egteguXiZ2B1WFs,99754
6
+ EntDetect/entanglement_features.py,sha256=yjvfiNT7UiptL-ULDByEpHqFQJ9D_8JM7hxiS-oUH34,22493
7
+ EntDetect/gaussian_entanglement.py,sha256=rfYCrY1P99DmpVE0paxdsrdg6qPduCBvcCbXpcngq0k,103081
8
+ EntDetect/order_params.py,sha256=68oq6vE21qCfbezpeF7OHBDoZEAxKmoIU3dm4WxeqR0,51960
9
+ EntDetect/statistics.py,sha256=FQZsVowGuTBJ0WBdS47P0kYE6YOVTn4nu_cHgK9HIjM,65481
10
+ EntDetect/utilities.py,sha256=wF9lH43bJk1E7eY3wZK8UluQxErlWTxOONCKXlCU08k,8535
11
+ EntDetect/Jwalk/GridTools.py,sha256=e0oVNk_q0Oj-gDNickqAN7mRZehR3XeOZkVySn7-MHU,18660
12
+ EntDetect/Jwalk/PDBTools.py,sha256=Oam_vWoQ3q1iH7UR3pU4ccOJViUhv58UEaGnCWMZiC0,16322
13
+ EntDetect/Jwalk/SASDTools.py,sha256=werscmfo6eR-1eKJtJWkPahAQztvB1ueMUP_jNlsdv0,22709
14
+ EntDetect/Jwalk/SurfaceTools.py,sha256=DSgO6NaP6OCH5thxjqICJFkDVVSt_vh4BgPn4eKpyz4,6290
15
+ EntDetect/Jwalk/__init__.py,sha256=jMzpd8yB8quFVqLKt8q8tffCfpYwRudyOVhaPCjKuek,873
16
+ EntDetect/Jwalk/naccess.config.txt,sha256=ZLXV2ULm5kjPGq7ABO4YYx7Jc1eu6mAlk76JpsP2Vz4,2780
17
+ EntDetect/resources/__init__.py,sha256=lrlePz5pb7PFp3-EpTdwbNyznTxN3sP4-WiApVYDIX8,438
18
+ EntDetect/resources/calc_K.pl,sha256=6si6voV3VCoN32CKah9yVJAwFOgIqy-z1U6x2nILs1o,15713
19
+ EntDetect/resources/calc_Q.pl,sha256=OMsy86ZPRl1lmWUpLBhlHqfDvkUgoNYov4xureCywfQ,23338
20
+ EntDetect/resources/pulchra,sha256=eO7nNusOVRJRhCOodhiySQCpgVLM_228etI4obajF5M,3598552
21
+ EntDetect/resources/stride,sha256=gViu_Lgc71njrF_WJ5nPlhumh8DB3u9BADVJLZyOWlU,160608
22
+ EntDetect/resources/__pycache__/__init__.cpython-311.pyc,sha256=C3M2sgGloRNhyO-HGCVoteJ0FAXLYxZiqO9_sDxD6mc,449
23
+ EntDetect/resources/shared_files/__init__.py,sha256=-C1lxAgxJOoF_daSJCGf2Qid6u9__hUJoe_C-rGkC3g,102
24
+ EntDetect/resources/shared_files/bt_contact_potential.dat,sha256=dABkMtqnj--wAwPMejMFmCcjP5u_z0N1uh8z72OgPSQ,1384
25
+ EntDetect/resources/shared_files/karanicolas_dihe_parm.dat,sha256=tA-xnBq5bYDB5VGoTQGCStBpwlmVuZfiwJPn489_6i4,57600
26
+ EntDetect/resources/shared_files/kgs_contact_potential.dat,sha256=FwsIIseSmgUH07P3IAMXMignhHq-jtPHl8Nh8lSTz8c,1168
27
+ EntDetect/resources/shared_files/mj_contact_potential.dat,sha256=KNBwAwZctCZdzmhZkirVHHDggIgKa_CEtHx-pMI_Hog,1303
28
+ entdetect-1.2.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
29
+ scripts/__init__.py,sha256=_rbb5_TMoSLkMNpZ4PbaD1GDTiiewYSGy_aKfsOH6PI,132
30
+ scripts/convert_cor_psf_to_pdb.py,sha256=ySxnvJYOspd3pC_t3iljo5AiXBXjaOC_Gii5XYx5QG8,2963
31
+ scripts/run_Foldingpathway.py,sha256=5trkrU5cgDZVjJ4fy2FKZ-in2Y6KsSqV4HbxxOGKVyI,8451
32
+ scripts/run_MSM.py,sha256=ST4kCQy7rf88h9w7_TRaGJKRYVqECxKtu44F2Un3H7g,7591
33
+ scripts/run_OP_on_simulation_traj.py,sha256=4ODQFEyloybfIO_mGIPnhe_OBrFGw23EYD6agX_QsIU,9923
34
+ scripts/run_change_resolution.py,sha256=FvfjUNsEqeLrP0VjasOgmPnhfiZugDFmz9sBMlltBPc,2214
35
+ scripts/run_compare_sim2exp.py,sha256=SWlzfed37oN88DZ1k_s6KnctwG89ZOrr1v5GPbRHd_E,10300
36
+ scripts/run_montecarlo.py,sha256=Tz1dFvhUSdons8R-REsL9RUDBjGTvZ398OX5aeij5lI,5690
37
+ scripts/run_nativeNCLE.py,sha256=KFKKQ1nTUhBB1DbANfUh433cCW_HG_0BALgX9zx1dBA,9493
38
+ scripts/run_nonnative_entanglement_clustering.py,sha256=md-IRSK-3z8jnqx7iOmnmPm7uQO4a-omGHDXxe5Tr3A,5825
39
+ scripts/run_population_modeling.py,sha256=yZm0vMEwIK4qQUJGoK36Tt821LYf55b1CPDzOmgv0jk,4222
40
+ scripts/run_workflow4_nativeNCLE_batch.py,sha256=RBsRcIuCg4qT9imqhYJ4Ow4ajx2y1wXpreA78QM5rbE,16556
41
+ entdetect-1.2.0.dist-info/METADATA,sha256=KVTSuIVKsDNvbGUo3vdb9CW3y6K_GhlCOGxwuZffVdA,614
42
+ entdetect-1.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
43
+ entdetect-1.2.0.dist-info/entry_points.txt,sha256=tn3EH7khcG4MH6AlYPkCjfIDB4AJwwlSuDvlk7i3tdk,588
44
+ entdetect-1.2.0.dist-info/top_level.txt,sha256=hucW5IQiJAKoqBMvgmFHSRbxr5Eox7caYQB2sdtLlVU,18
45
+ entdetect-1.2.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,11 @@
1
+ [console_scripts]
2
+ convert_cor_psf_to_pdb = scripts.convert_cor_psf_to_pdb:main
3
+ run_Foldingpathway = scripts.run_Foldingpathway:main
4
+ run_MSM = scripts.run_MSM:main
5
+ run_OP_on_simulation_traj = scripts.run_OP_on_simulation_traj:main
6
+ run_change_resolution = scripts.run_change_resolution:main
7
+ run_compare_sim2exp = scripts.run_compare_sim2exp:main
8
+ run_montecarlo = scripts.run_montecarlo:main
9
+ run_nativeNCLE = scripts.run_nativeNCLE:main
10
+ run_nonnative_entanglement_clustering = scripts.run_nonnative_entanglement_clustering:main
11
+ run_population_modeling = scripts.run_population_modeling:main