modelcraft 5.0.2__py3-none-any.whl → 6.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modelcraft/__init__.py +16 -31
- modelcraft/__main__.py +0 -1
- modelcraft/arguments.py +35 -7
- modelcraft/combine.py +22 -41
- modelcraft/contents.py +188 -164
- modelcraft/environ.py +0 -7
- modelcraft/geometry.py +39 -27
- modelcraft/job.py +6 -5
- modelcraft/jobs/acedrg.py +2 -0
- modelcraft/jobs/buccaneer.py +22 -4
- modelcraft/jobs/comit.py +2 -0
- modelcraft/jobs/ctruncate.py +3 -1
- modelcraft/jobs/emda.py +2 -0
- modelcraft/jobs/findwaters.py +2 -0
- modelcraft/jobs/freerflag.py +2 -0
- modelcraft/jobs/libg.py +2 -0
- modelcraft/jobs/molrep.py +2 -0
- modelcraft/jobs/nautilus.py +28 -14
- modelcraft/jobs/nucleofind.py +88 -0
- modelcraft/jobs/parrot.py +13 -2
- modelcraft/jobs/phasematch.py +2 -1
- modelcraft/jobs/refmac.py +3 -1
- modelcraft/jobs/servalcat.py +38 -4
- modelcraft/jobs/sheetbend.py +2 -0
- modelcraft/modelcraftem.py +49 -6
- modelcraft/modelcraftxray.py +90 -42
- modelcraft/monlib.py +55 -52
- modelcraft/pdbe.py +54 -0
- modelcraft/pipeline.py +1 -1
- modelcraft/prune.py +69 -0
- modelcraft/reflections.py +11 -1
- modelcraft/scripts/contents.py +5 -215
- modelcraft/scripts/copies.py +26 -17
- modelcraft/scripts/modelcraft.py +1 -0
- modelcraft/scripts/sidechains.py +141 -0
- modelcraft/scripts/validate.py +81 -0
- modelcraft/sequence.py +106 -0
- modelcraft/solvent.py +42 -113
- modelcraft/structure.py +64 -41
- modelcraft/tests/ccp4/__init__.py +7 -11
- modelcraft/tests/ccp4/test_acedrg.py +2 -0
- modelcraft/tests/ccp4/test_arguments.py +3 -0
- modelcraft/tests/ccp4/test_buccaneer.py +3 -2
- modelcraft/tests/ccp4/test_cell.py +4 -1
- modelcraft/tests/ccp4/test_comit.py +2 -0
- modelcraft/tests/ccp4/test_contents.py +99 -17
- modelcraft/tests/ccp4/test_copies.py +1 -0
- modelcraft/tests/ccp4/test_ctruncate.py +2 -0
- modelcraft/tests/ccp4/test_findwaters.py +2 -0
- modelcraft/tests/ccp4/test_freerflag.py +2 -0
- modelcraft/tests/ccp4/test_libg.py +1 -0
- modelcraft/tests/ccp4/test_molrep.py +3 -0
- modelcraft/tests/ccp4/test_monlib.py +75 -45
- modelcraft/tests/ccp4/test_nautilus.py +5 -3
- modelcraft/tests/ccp4/test_nucleofind.py +62 -0
- modelcraft/tests/ccp4/test_parrot.py +3 -1
- modelcraft/tests/ccp4/test_phasematch.py +2 -0
- modelcraft/tests/ccp4/test_prune.py +17 -0
- modelcraft/tests/ccp4/test_reflections.py +110 -1
- modelcraft/tests/ccp4/test_refmac.py +3 -0
- modelcraft/tests/{unittests/test_contents.py → ccp4/test_sequence.py} +5 -12
- modelcraft/tests/ccp4/test_servalcat.py +52 -0
- modelcraft/tests/ccp4/test_sheetbend.py +4 -3
- modelcraft/tests/ccp4/test_sidechains.py +25 -0
- modelcraft/tests/ccp4/test_solvent.py +12 -26
- modelcraft/tests/ccp4/test_structure.py +1 -0
- modelcraft/tests/ccp4/test_validation.py +19 -0
- modelcraft/tests/ccp4/test_xray.py +12 -6
- modelcraft/tests/ccpem/test_em.py +3 -0
- modelcraft/tests/ccpem/test_emda.py +2 -0
- modelcraft/tests/ccpem/test_refmac.py +1 -0
- modelcraft/tests/ccpem/test_servalcat.py +4 -3
- modelcraft/utils.py +16 -4
- modelcraft/validation.py +101 -0
- modelcraft-6.0.0.dist-info/METADATA +76 -0
- modelcraft-6.0.0.dist-info/RECORD +85 -0
- {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/WHEEL +1 -1
- {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/entry_points.txt +2 -0
- modelcraft/coot/prune.py +0 -1085
- modelcraft/coot/sidechains.py +0 -68
- modelcraft/jobs/acorn.py +0 -114
- modelcraft/jobs/coot.py +0 -104
- modelcraft/tests/ccp4/test_coot.py +0 -29
- modelcraft/tests/ccp4/test_geometry.py +0 -20
- modelcraft/tests/unittests/__init__.py +0 -0
- modelcraft/tests/unittests/test_reflections.py +0 -101
- modelcraft-5.0.2.dist-info/LICENSE +0 -504
- modelcraft-5.0.2.dist-info/METADATA +0 -48
- modelcraft-5.0.2.dist-info/RECORD +0 -82
- {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/top_level.txt +0 -0
modelcraft/modelcraftxray.py
CHANGED
|
@@ -1,19 +1,28 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import shutil
|
|
2
3
|
import time
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from tempfile import TemporaryDirectory
|
|
6
|
+
|
|
3
7
|
import gemmi
|
|
8
|
+
|
|
4
9
|
from . import __version__
|
|
10
|
+
from .cell import max_distortion, remove_scale, update_cell
|
|
11
|
+
from .combine import combine_results
|
|
5
12
|
from .jobs.buccaneer import Buccaneer
|
|
6
|
-
from .jobs.coot import FixSideChains, Prune
|
|
7
13
|
from .jobs.ctruncate import CTruncate
|
|
8
14
|
from .jobs.findwaters import FindWaters
|
|
9
15
|
from .jobs.nautilus import Nautilus
|
|
16
|
+
from .jobs.nucleofind import NucleoFindBuild, NucleoFindPredict
|
|
10
17
|
from .jobs.parrot import Parrot
|
|
11
18
|
from .jobs.refmac import Refmac
|
|
12
19
|
from .jobs.sheetbend import Sheetbend
|
|
13
|
-
from .
|
|
14
|
-
from .combine import combine_results
|
|
20
|
+
from .monlib import MonLib
|
|
15
21
|
from .pipeline import Pipeline
|
|
22
|
+
from .prune import prune
|
|
16
23
|
from .reflections import DataItem, write_mtz
|
|
24
|
+
from .scripts.sidechains import any_missing_side_chains
|
|
25
|
+
from .scripts.sidechains import main as fix_side_chains
|
|
17
26
|
from .structure import ModelStats, remove_residues, write_mmcif
|
|
18
27
|
|
|
19
28
|
|
|
@@ -38,6 +47,10 @@ class ModelCraftXray(Pipeline):
|
|
|
38
47
|
self.last_refmac = None
|
|
39
48
|
self.output_refmac = None
|
|
40
49
|
self.cycles_without_improvement = 0
|
|
50
|
+
resnames = self.args.contents.monomer_codes()
|
|
51
|
+
if self.args.model:
|
|
52
|
+
resnames |= set(self.args.model[0].get_all_residue_names())
|
|
53
|
+
self.monlib = MonLib(resnames, self.args.restraints, include_standard=True)
|
|
41
54
|
|
|
42
55
|
@property
|
|
43
56
|
def resolution(self):
|
|
@@ -52,23 +65,23 @@ class ModelCraftXray(Pipeline):
|
|
|
52
65
|
if self.args.model is not None:
|
|
53
66
|
self._refine_input_model()
|
|
54
67
|
for self.cycle in range(1, self.args.cycles + 1):
|
|
55
|
-
print("\n## Cycle
|
|
68
|
+
print(f"\n## Cycle {self.cycle}\n", flush=True)
|
|
56
69
|
self.run_cycle()
|
|
57
70
|
self.process_cycle_output(self.last_refmac)
|
|
58
71
|
if self.cycles_without_improvement == self.args.auto_stop_cycles > 0:
|
|
59
72
|
break
|
|
60
73
|
if (
|
|
61
74
|
not self.args.basic
|
|
62
|
-
and self.
|
|
63
|
-
and self.
|
|
75
|
+
and not self.args.disable_side_chain_fixing
|
|
76
|
+
and any_missing_side_chains(self.output_refmac.structure)
|
|
64
77
|
):
|
|
65
|
-
print("\n##
|
|
78
|
+
print("\n## Adding missing side chains\n", flush=True)
|
|
66
79
|
self.cycle += 1
|
|
67
80
|
self.update_current_from_refmac_result(self.output_refmac)
|
|
68
81
|
self.fixsidechains()
|
|
69
82
|
self.process_cycle_output(self.last_refmac)
|
|
70
83
|
print("\n## Best Model:", flush=True)
|
|
71
|
-
_print_refmac_result(self.output_refmac)
|
|
84
|
+
self._print_refmac_result(self.output_refmac)
|
|
72
85
|
self._remove_current_files()
|
|
73
86
|
self.terminate(reason="Normal")
|
|
74
87
|
|
|
@@ -101,13 +114,13 @@ class ModelCraftXray(Pipeline):
|
|
|
101
114
|
self.args.model = self.current_structure
|
|
102
115
|
if self.args.phases is not None:
|
|
103
116
|
self.current_phases = self.args.phases
|
|
104
|
-
_print_refmac_result(self.last_refmac)
|
|
117
|
+
self._print_refmac_result(self.last_refmac)
|
|
105
118
|
|
|
106
119
|
def run_cycle(self):
|
|
107
120
|
if self.args.basic:
|
|
108
121
|
if self.cycle == 1:
|
|
109
122
|
self.parrot()
|
|
110
|
-
self.
|
|
123
|
+
self.run_model_building()
|
|
111
124
|
else:
|
|
112
125
|
if self.cycle > 1 and self.resolution < 2.3:
|
|
113
126
|
self.prune()
|
|
@@ -116,19 +129,25 @@ class ModelCraftXray(Pipeline):
|
|
|
116
129
|
if self.cycle > 1 or self.args.phases is None:
|
|
117
130
|
self.findwaters(dummy=True)
|
|
118
131
|
remove_residues(structure=self.current_structure, names={"HOH", "DUM"})
|
|
119
|
-
self.
|
|
132
|
+
self.run_model_building()
|
|
120
133
|
self.prune(chains_only=True)
|
|
121
134
|
self.findwaters()
|
|
122
135
|
|
|
123
|
-
def
|
|
136
|
+
def run_model_building(self):
|
|
124
137
|
buccaneer = self.buccaneer()
|
|
138
|
+
if shutil.which("nucleofind"):
|
|
139
|
+
try:
|
|
140
|
+
return self.nucleofind(buccaneer)
|
|
141
|
+
except FileNotFoundError:
|
|
142
|
+
pass
|
|
125
143
|
nautilus = self.nautilus()
|
|
126
144
|
if buccaneer is None and nautilus is None:
|
|
127
145
|
self.terminate(reason="No residues built")
|
|
128
146
|
if buccaneer is None or nautilus is None:
|
|
129
147
|
self.update_current_from_refmac_result(buccaneer or nautilus)
|
|
130
148
|
else:
|
|
131
|
-
|
|
149
|
+
combined_structure = combine_results(buccaneer, nautilus, self.monlib)
|
|
150
|
+
combined = self.run_refmac(combined_structure, cycles=5)
|
|
132
151
|
best = min((buccaneer, nautilus, combined), key=lambda result: result.rfree)
|
|
133
152
|
self.update_current_from_refmac_result(best)
|
|
134
153
|
|
|
@@ -149,11 +168,32 @@ class ModelCraftXray(Pipeline):
|
|
|
149
168
|
cycles=3 if self.cycle == 1 else 2,
|
|
150
169
|
threads=self.args.threads,
|
|
151
170
|
).run(self)
|
|
152
|
-
if
|
|
171
|
+
if (
|
|
172
|
+
result.structure is None
|
|
173
|
+
or ModelStats(result.structure, self.monlib).residues == 0
|
|
174
|
+
):
|
|
153
175
|
return None
|
|
154
176
|
write_mmcif(self.path("current.cif"), result.structure)
|
|
155
177
|
return self.run_refmac(result.structure, cycles=10)
|
|
156
178
|
|
|
179
|
+
def nucleofind(self, refmac):
|
|
180
|
+
if not (self.args.contents.rnas or self.args.contents.dnas):
|
|
181
|
+
return None
|
|
182
|
+
prediction = NucleoFindPredict(fphi=refmac.fphi_best).run(self)
|
|
183
|
+
result = NucleoFindBuild(
|
|
184
|
+
contents=self.args.contents,
|
|
185
|
+
fphi=refmac.fphi_best,
|
|
186
|
+
structure=refmac.structure,
|
|
187
|
+
prediction=prediction,
|
|
188
|
+
).run(self)
|
|
189
|
+
if (
|
|
190
|
+
result.structure is None
|
|
191
|
+
or ModelStats(result.structure, self.monlib).residues == 0
|
|
192
|
+
):
|
|
193
|
+
return None
|
|
194
|
+
write_mmcif(self.path("current.cif"), result.structure)
|
|
195
|
+
self.refmac(result.structure, cycles=10, auto_accept=True)
|
|
196
|
+
|
|
157
197
|
def nautilus(self):
|
|
158
198
|
if not (self.args.contents.rnas or self.args.contents.dnas):
|
|
159
199
|
return None
|
|
@@ -165,7 +205,10 @@ class ModelCraftXray(Pipeline):
|
|
|
165
205
|
freer=self.args.freer,
|
|
166
206
|
structure=self.current_structure,
|
|
167
207
|
).run(self)
|
|
168
|
-
if
|
|
208
|
+
if (
|
|
209
|
+
result.structure is None
|
|
210
|
+
or ModelStats(result.structure, self.monlib).residues == 0
|
|
211
|
+
):
|
|
169
212
|
return None
|
|
170
213
|
write_mmcif(self.path("current.cif"), result.structure)
|
|
171
214
|
return self.run_refmac(result.structure, cycles=10)
|
|
@@ -181,7 +224,7 @@ class ModelCraftXray(Pipeline):
|
|
|
181
224
|
write_mmcif(self.path("current.cif"), self.current_structure)
|
|
182
225
|
|
|
183
226
|
def run_refmac(self, structure: gemmi.Structure, cycles: int):
|
|
184
|
-
if ModelStats(structure).residues == 0:
|
|
227
|
+
if ModelStats(structure, self.monlib).residues == 0:
|
|
185
228
|
self.terminate(reason="No residues to refine")
|
|
186
229
|
use_phases = self.args.unbiased and (
|
|
187
230
|
self.output_refmac is None or self.output_refmac.rwork > 0.35
|
|
@@ -216,6 +259,7 @@ class ModelCraftXray(Pipeline):
|
|
|
216
259
|
phases=self.current_phases,
|
|
217
260
|
fphi=self.current_fphi_best,
|
|
218
261
|
structure=self.current_structure,
|
|
262
|
+
monlib=self.monlib,
|
|
219
263
|
).run(self)
|
|
220
264
|
self.current_phases = result.abcd
|
|
221
265
|
self.current_fphi_best = result.fphi
|
|
@@ -224,25 +268,30 @@ class ModelCraftXray(Pipeline):
|
|
|
224
268
|
def prune(self, chains_only=False):
|
|
225
269
|
if self.args.disable_pruning or not self.args.contents.proteins:
|
|
226
270
|
return
|
|
227
|
-
|
|
271
|
+
pruned = prune(
|
|
228
272
|
structure=self.current_structure,
|
|
229
273
|
fphi_best=self.current_fphi_best,
|
|
230
274
|
fphi_diff=self.current_fphi_diff,
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
275
|
+
fphi_calc=self.current_fphi_calc,
|
|
276
|
+
residues=not chains_only,
|
|
277
|
+
monlib=self.monlib,
|
|
278
|
+
)
|
|
279
|
+
if pruned:
|
|
280
|
+
write_mmcif(self.path("current.cif"), pruned)
|
|
281
|
+
self.refmac(pruned, cycles=5, auto_accept=True)
|
|
235
282
|
|
|
236
283
|
def fixsidechains(self):
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
284
|
+
with TemporaryDirectory() as tempdir:
|
|
285
|
+
xyzin = str(Path(tempdir, "input.cif"))
|
|
286
|
+
hklin = str(Path(tempdir, "input.mtz"))
|
|
287
|
+
xyzout = str(Path(tempdir, "output.cif"))
|
|
288
|
+
write_mmcif(xyzin, self.current_structure)
|
|
289
|
+
write_mtz(hklin, [self.current_fphi_best], ["FWT,PHWT"])
|
|
290
|
+
fix_side_chains([xyzin, hklin, xyzout])
|
|
291
|
+
if os.path.exists(xyzout):
|
|
292
|
+
structure = gemmi.read_structure(xyzout)
|
|
293
|
+
write_mmcif(self.path("current.cif"), structure)
|
|
294
|
+
self.refmac(structure, cycles=5, auto_accept=False)
|
|
246
295
|
|
|
247
296
|
def findwaters(self, dummy=False):
|
|
248
297
|
if dummy and self.args.disable_dummy_atoms:
|
|
@@ -258,8 +307,8 @@ class ModelCraftXray(Pipeline):
|
|
|
258
307
|
self.refmac(result.structure, cycles=10, auto_accept=False)
|
|
259
308
|
|
|
260
309
|
def process_cycle_output(self, result):
|
|
261
|
-
_print_refmac_result(result)
|
|
262
|
-
model_stats = ModelStats(result.structure)
|
|
310
|
+
self._print_refmac_result(result)
|
|
311
|
+
model_stats = ModelStats(result.structure, self.monlib)
|
|
263
312
|
stats = {
|
|
264
313
|
"cycle": self.cycle,
|
|
265
314
|
"residues": model_stats.residues,
|
|
@@ -308,13 +357,12 @@ class ModelCraftXray(Pipeline):
|
|
|
308
357
|
except FileNotFoundError:
|
|
309
358
|
pass
|
|
310
359
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
print(f"R-free: {result.rfree:6.4f}", flush=True)
|
|
360
|
+
def _print_refmac_result(self, result):
|
|
361
|
+
model_stats = ModelStats(result.structure, self.monlib)
|
|
362
|
+
print("")
|
|
363
|
+
print(f"Residues: {model_stats.residues:6d}")
|
|
364
|
+
print(f"Protein: {model_stats.protein:6d}")
|
|
365
|
+
print(f"Nucleic: {model_stats.nucleic:6d}")
|
|
366
|
+
print(f"Waters: {model_stats.waters:6d}")
|
|
367
|
+
print(f"R-work: {result.rwork:6.4f}")
|
|
368
|
+
print(f"R-free: {result.rfree:6.4f}", flush=True)
|
modelcraft/monlib.py
CHANGED
|
@@ -1,55 +1,58 @@
|
|
|
1
|
-
import functools
|
|
2
1
|
import os
|
|
3
|
-
import
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
@functools.lru_cache(maxsize=None)
|
|
7
|
-
def _path(code: str) -> str:
|
|
8
|
-
directory = os.path.join(os.environ["CLIBD_MON"], code[0].lower())
|
|
9
|
-
single = os.path.join(directory, f"{code.upper()}.cif")
|
|
10
|
-
double = os.path.join(directory, f"{code.upper()}_{code.upper()}.cif")
|
|
11
|
-
return double if os.path.exists(double) else single
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@functools.lru_cache(maxsize=None)
|
|
15
|
-
def atom_ids(code: str) -> set:
|
|
16
|
-
return {atom.id for atom in chemcomp(code).atoms}
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
@functools.lru_cache(maxsize=None)
|
|
20
|
-
def chemcomp(code: str) -> gemmi.ChemComp:
|
|
21
|
-
doc = gemmi.cif.read(_path(code))
|
|
22
|
-
return gemmi.make_chemcomp_from_block(doc[-1])
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
@functools.lru_cache(maxsize=None)
|
|
26
|
-
def in_library(code: str) -> bool:
|
|
27
|
-
return os.path.exists(_path(code))
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
@functools.lru_cache(maxsize=None)
|
|
31
|
-
def group(code: str) -> gemmi.ChemComp.Group:
|
|
32
|
-
if in_library(code):
|
|
33
|
-
doc = gemmi.cif.read(_path(code))
|
|
34
|
-
monlib = gemmi.MonLib()
|
|
35
|
-
monlib.read_monomer_doc(doc)
|
|
36
|
-
return monlib.monomers[code].group
|
|
37
|
-
return None
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
@functools.lru_cache(maxsize=None)
|
|
41
|
-
def is_protein(code: str) -> bool:
|
|
42
|
-
return group(code) in {
|
|
43
|
-
gemmi.ChemComp.Group.Peptide,
|
|
44
|
-
gemmi.ChemComp.Group.PPeptide,
|
|
45
|
-
gemmi.ChemComp.Group.MPeptide,
|
|
46
|
-
}
|
|
2
|
+
import sys
|
|
47
3
|
|
|
4
|
+
import gemmi
|
|
48
5
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
6
|
+
from .sequence import DNA_CODES, PROTEIN_CODES, RNA_CODES
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class MonLib(gemmi.MonLib):
|
|
10
|
+
def __init__(self, resnames, libin: str = "", include_standard: bool = False):
|
|
11
|
+
super().__init__()
|
|
12
|
+
if libin:
|
|
13
|
+
self.read_monomer_cif(libin)
|
|
14
|
+
if include_standard:
|
|
15
|
+
resnames = set(resnames)
|
|
16
|
+
resnames |= set(PROTEIN_CODES.values())
|
|
17
|
+
resnames |= set(RNA_CODES.values())
|
|
18
|
+
resnames |= set(DNA_CODES.values())
|
|
19
|
+
resnames |= {"MSE", "HOH"}
|
|
20
|
+
ok = self.read_monomer_lib(
|
|
21
|
+
os.environ["CLIBD_MON"], list(resnames), logging=sys.stderr
|
|
22
|
+
)
|
|
23
|
+
if not ok:
|
|
24
|
+
raise ValueError("Please create definitions for missing monomers.")
|
|
25
|
+
|
|
26
|
+
def __contains__(self, code: str):
|
|
27
|
+
return code in self.monomers
|
|
28
|
+
|
|
29
|
+
def __getitem__(self, code: str):
|
|
30
|
+
if code not in self:
|
|
31
|
+
raise KeyError(f"Monomer {code} not in this monomer library instance")
|
|
32
|
+
return self.monomers[code]
|
|
33
|
+
|
|
34
|
+
def atom_ids(self, code: str):
|
|
35
|
+
return {atom.id for atom in self[code].atoms}
|
|
36
|
+
|
|
37
|
+
def group(self, code: str):
|
|
38
|
+
return self[code].group if code in self else gemmi.ChemComp.Group.Null
|
|
39
|
+
|
|
40
|
+
def is_nucleic(self, code: str) -> bool:
|
|
41
|
+
return self.group(code) in {
|
|
42
|
+
gemmi.ChemComp.Group.Dna,
|
|
43
|
+
gemmi.ChemComp.Group.Rna,
|
|
44
|
+
gemmi.ChemComp.Group.DnaRna,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
def is_protein(self, code: str) -> bool:
|
|
48
|
+
return self.group(code) in {
|
|
49
|
+
gemmi.ChemComp.Group.Peptide,
|
|
50
|
+
gemmi.ChemComp.Group.PPeptide,
|
|
51
|
+
gemmi.ChemComp.Group.MPeptide,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
def volume(self, code: str):
|
|
55
|
+
return sum(18 for atom in self[code].atoms if not atom.is_hydrogen())
|
|
56
|
+
|
|
57
|
+
def weight(self, code: str):
|
|
58
|
+
return sum(atom.el.weight for atom in self[code].atoms)
|
modelcraft/pdbe.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import multiprocessing
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
_MULTIPROCESSING_LOCK = multiprocessing.Lock()
|
|
7
|
+
_SERVER = "https://www.ebi.ac.uk/pdbe"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _response_json(url, data=None):
|
|
11
|
+
with _MULTIPROCESSING_LOCK:
|
|
12
|
+
print("Requesting:", url)
|
|
13
|
+
if data is None:
|
|
14
|
+
response = requests.get(url, timeout=30)
|
|
15
|
+
else:
|
|
16
|
+
response = requests.post(url, data=data, timeout=30)
|
|
17
|
+
response.raise_for_status()
|
|
18
|
+
return response.json()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def molecule_dicts(entry_id: str) -> list:
|
|
22
|
+
entry_id = _superceeding_entry(entry_id)
|
|
23
|
+
url = _SERVER + "/api/pdb/entry/molecules/" + entry_id
|
|
24
|
+
response = _response_json(url)
|
|
25
|
+
mols = response[entry_id]
|
|
26
|
+
if any(mol["molecule_type"] == "carbohydrate polymer" for mol in mols):
|
|
27
|
+
codes = _carb_codes(entry_id)
|
|
28
|
+
for mol in mols:
|
|
29
|
+
mol["carb_codes"] = codes.get(mol["entity_id"])
|
|
30
|
+
return mols
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _superceeding_entry(entry_id: str) -> str:
|
|
34
|
+
entry_id = entry_id.lower()
|
|
35
|
+
url = _SERVER + "/api/pdb/entry/status/" + entry_id
|
|
36
|
+
response = _response_json(url)
|
|
37
|
+
superceded_by = response[entry_id][0].get("superceded_by", [])
|
|
38
|
+
return entry_id if len(superceded_by) == 0 else superceded_by[-1]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _carb_codes(entry: str) -> dict:
|
|
42
|
+
url = _SERVER + "/search/pdb/select?"
|
|
43
|
+
query = "pdb_id:" + entry
|
|
44
|
+
filter_list = "carb_compound_id_entity"
|
|
45
|
+
request_data = {"q": query, "fl": filter_list, "wt": "json"}
|
|
46
|
+
response = _response_json(url, data=request_data)
|
|
47
|
+
docs = response["response"]["docs"]
|
|
48
|
+
codes = {}
|
|
49
|
+
for doc in docs:
|
|
50
|
+
for line in doc["carb_compound_id_entity"]:
|
|
51
|
+
match = re.match(r"(.+)\((\d+)\)_(\d+)", line)
|
|
52
|
+
code, copies, entity = match.groups()
|
|
53
|
+
codes.setdefault(int(entity), {})[code] = int(copies)
|
|
54
|
+
return codes
|
modelcraft/pipeline.py
CHANGED
|
@@ -55,7 +55,7 @@ class Pipeline:
|
|
|
55
55
|
def write_report(self):
|
|
56
56
|
if self.json_name:
|
|
57
57
|
self.seconds["total"] = time.time() - self.start_time
|
|
58
|
-
with open(self.path(self.json_name), "w") as report_file:
|
|
58
|
+
with open(self.path(self.json_name), "w", encoding="utf-8") as report_file:
|
|
59
59
|
json.dump(self.report, report_file, indent=4)
|
|
60
60
|
|
|
61
61
|
def terminate(self, reason: str):
|
modelcraft/prune.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import gemmi
|
|
2
|
+
|
|
3
|
+
from .monlib import MonLib
|
|
4
|
+
from .reflections import DataItem
|
|
5
|
+
from .structure import remove_isolated_fragments
|
|
6
|
+
from .validation import validate
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def prune(
|
|
10
|
+
structure: gemmi.Structure,
|
|
11
|
+
fphi_best: DataItem,
|
|
12
|
+
fphi_diff: DataItem,
|
|
13
|
+
fphi_calc: DataItem,
|
|
14
|
+
residues: bool = True,
|
|
15
|
+
chain_threshold: float = -2,
|
|
16
|
+
residue_threshold: float = -5,
|
|
17
|
+
monlib: MonLib = None,
|
|
18
|
+
) -> gemmi.Structure:
|
|
19
|
+
print("Performing validation for pruning", flush=True)
|
|
20
|
+
structure = structure.clone()
|
|
21
|
+
monlib = monlib or MonLib(structure[0].get_all_residue_names())
|
|
22
|
+
metrics = validate(structure, fphi_best, fphi_diff, fphi_calc, monlib)
|
|
23
|
+
|
|
24
|
+
max_deleted = int(len(metrics) * 0.2)
|
|
25
|
+
num_deleted = 0
|
|
26
|
+
grouped = metrics.groupby("Chain")
|
|
27
|
+
means = grouped.mean(numeric_only=True)
|
|
28
|
+
for chain_name in means.sort_values("Score").index:
|
|
29
|
+
score = means.loc[chain_name, "Score"]
|
|
30
|
+
count = grouped.size().loc[chain_name]
|
|
31
|
+
print(
|
|
32
|
+
f"Chain {chain_name} has a score of {score} over {count} residues",
|
|
33
|
+
flush=True,
|
|
34
|
+
)
|
|
35
|
+
if (
|
|
36
|
+
means.loc[chain_name, "Score"] < chain_threshold
|
|
37
|
+
and count <= 20
|
|
38
|
+
and num_deleted + count <= max_deleted
|
|
39
|
+
):
|
|
40
|
+
print("Deleting chain", chain_name, flush=True)
|
|
41
|
+
del structure[0][chain_name]
|
|
42
|
+
num_deleted += num_deleted
|
|
43
|
+
metrics = metrics[metrics["Chain"] != chain_name]
|
|
44
|
+
|
|
45
|
+
if not residues:
|
|
46
|
+
return structure if num_deleted > 0 else None
|
|
47
|
+
|
|
48
|
+
max_deleted = int(len(metrics) * 0.2)
|
|
49
|
+
metrics = metrics[metrics["Score"] < residue_threshold]
|
|
50
|
+
metrics.sort_values("Score", inplace=True)
|
|
51
|
+
metrics = metrics.head(max_deleted)
|
|
52
|
+
if len(metrics) == 0:
|
|
53
|
+
return structure if num_deleted > 0 else None
|
|
54
|
+
|
|
55
|
+
print(
|
|
56
|
+
f"Deleting {len(metrics)} residues with scores < {residue_threshold}",
|
|
57
|
+
flush=True,
|
|
58
|
+
)
|
|
59
|
+
to_delete = {(row["Chain"], row["SeqId"]) for _, row in metrics.iterrows()}
|
|
60
|
+
for chain in structure[0]:
|
|
61
|
+
for i, residue in reversed(list(enumerate(chain))):
|
|
62
|
+
if (chain.name, str(residue.seqid)) in to_delete:
|
|
63
|
+
del chain[i]
|
|
64
|
+
|
|
65
|
+
print("Removing isolated residues (if any)", flush=True)
|
|
66
|
+
for chain in structure[0]:
|
|
67
|
+
remove_isolated_fragments(chain, monlib, max_length=1)
|
|
68
|
+
|
|
69
|
+
return structure
|
modelcraft/reflections.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
from typing import Iterator, Iterable, List, Optional, Union
|
|
2
1
|
import itertools
|
|
3
2
|
import re
|
|
3
|
+
from functools import partial
|
|
4
|
+
from typing import Iterable, Iterator, List, Optional, Union
|
|
5
|
+
|
|
4
6
|
import gemmi
|
|
5
7
|
import numpy
|
|
6
8
|
import pandas
|
|
@@ -104,6 +106,14 @@ class DataItem(gemmi.Mtz):
|
|
|
104
106
|
data = numpy.array(self, copy=copy)
|
|
105
107
|
return pandas.DataFrame(data=data, columns=self.column_labels())
|
|
106
108
|
|
|
109
|
+
def map(self, spacing: float = 1.0, size=None) -> gemmi.FloatGrid:
|
|
110
|
+
if self.types != "FP":
|
|
111
|
+
raise ValueError("DataItem must contain F and PHI columns")
|
|
112
|
+
func = partial(self.transform_f_phi_to_map, self.label(0), self.label(1))
|
|
113
|
+
if size is None:
|
|
114
|
+
return func(sample_rate=self.resolution_high() / spacing)
|
|
115
|
+
return func(exact_size=size)
|
|
116
|
+
|
|
107
117
|
@classmethod
|
|
108
118
|
def search(cls, mtz: gemmi.Mtz, types: str, sequential: bool = True):
|
|
109
119
|
types = list(types)
|