modelcraft 5.0.2__py3-none-any.whl → 6.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modelcraft/__init__.py +16 -31
- modelcraft/__main__.py +0 -1
- modelcraft/arguments.py +35 -7
- modelcraft/combine.py +22 -41
- modelcraft/contents.py +188 -164
- modelcraft/environ.py +0 -7
- modelcraft/geometry.py +39 -27
- modelcraft/job.py +6 -5
- modelcraft/jobs/acedrg.py +2 -0
- modelcraft/jobs/buccaneer.py +22 -4
- modelcraft/jobs/comit.py +2 -0
- modelcraft/jobs/ctruncate.py +3 -1
- modelcraft/jobs/emda.py +2 -0
- modelcraft/jobs/findwaters.py +2 -0
- modelcraft/jobs/freerflag.py +2 -0
- modelcraft/jobs/libg.py +2 -0
- modelcraft/jobs/molrep.py +2 -0
- modelcraft/jobs/nautilus.py +28 -14
- modelcraft/jobs/nucleofind.py +88 -0
- modelcraft/jobs/parrot.py +13 -2
- modelcraft/jobs/phasematch.py +2 -1
- modelcraft/jobs/refmac.py +3 -1
- modelcraft/jobs/servalcat.py +38 -4
- modelcraft/jobs/sheetbend.py +2 -0
- modelcraft/modelcraftem.py +49 -6
- modelcraft/modelcraftxray.py +90 -42
- modelcraft/monlib.py +55 -52
- modelcraft/pdbe.py +54 -0
- modelcraft/pipeline.py +1 -1
- modelcraft/prune.py +69 -0
- modelcraft/reflections.py +11 -1
- modelcraft/scripts/contents.py +5 -215
- modelcraft/scripts/copies.py +26 -17
- modelcraft/scripts/modelcraft.py +1 -0
- modelcraft/scripts/sidechains.py +141 -0
- modelcraft/scripts/validate.py +81 -0
- modelcraft/sequence.py +106 -0
- modelcraft/solvent.py +42 -113
- modelcraft/structure.py +64 -41
- modelcraft/tests/ccp4/__init__.py +7 -11
- modelcraft/tests/ccp4/test_acedrg.py +2 -0
- modelcraft/tests/ccp4/test_arguments.py +3 -0
- modelcraft/tests/ccp4/test_buccaneer.py +3 -2
- modelcraft/tests/ccp4/test_cell.py +4 -1
- modelcraft/tests/ccp4/test_comit.py +2 -0
- modelcraft/tests/ccp4/test_contents.py +99 -17
- modelcraft/tests/ccp4/test_copies.py +1 -0
- modelcraft/tests/ccp4/test_ctruncate.py +2 -0
- modelcraft/tests/ccp4/test_findwaters.py +2 -0
- modelcraft/tests/ccp4/test_freerflag.py +2 -0
- modelcraft/tests/ccp4/test_libg.py +1 -0
- modelcraft/tests/ccp4/test_molrep.py +3 -0
- modelcraft/tests/ccp4/test_monlib.py +75 -45
- modelcraft/tests/ccp4/test_nautilus.py +5 -3
- modelcraft/tests/ccp4/test_nucleofind.py +62 -0
- modelcraft/tests/ccp4/test_parrot.py +3 -1
- modelcraft/tests/ccp4/test_phasematch.py +2 -0
- modelcraft/tests/ccp4/test_prune.py +17 -0
- modelcraft/tests/ccp4/test_reflections.py +110 -1
- modelcraft/tests/ccp4/test_refmac.py +3 -0
- modelcraft/tests/{unittests/test_contents.py → ccp4/test_sequence.py} +5 -12
- modelcraft/tests/ccp4/test_servalcat.py +52 -0
- modelcraft/tests/ccp4/test_sheetbend.py +4 -3
- modelcraft/tests/ccp4/test_sidechains.py +25 -0
- modelcraft/tests/ccp4/test_solvent.py +12 -26
- modelcraft/tests/ccp4/test_structure.py +1 -0
- modelcraft/tests/ccp4/test_validation.py +19 -0
- modelcraft/tests/ccp4/test_xray.py +12 -6
- modelcraft/tests/ccpem/test_em.py +3 -0
- modelcraft/tests/ccpem/test_emda.py +2 -0
- modelcraft/tests/ccpem/test_refmac.py +1 -0
- modelcraft/tests/ccpem/test_servalcat.py +4 -3
- modelcraft/utils.py +16 -4
- modelcraft/validation.py +101 -0
- modelcraft-6.0.0.dist-info/METADATA +76 -0
- modelcraft-6.0.0.dist-info/RECORD +85 -0
- {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/WHEEL +1 -1
- {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/entry_points.txt +2 -0
- modelcraft/coot/prune.py +0 -1085
- modelcraft/coot/sidechains.py +0 -68
- modelcraft/jobs/acorn.py +0 -114
- modelcraft/jobs/coot.py +0 -104
- modelcraft/tests/ccp4/test_coot.py +0 -29
- modelcraft/tests/ccp4/test_geometry.py +0 -20
- modelcraft/tests/unittests/__init__.py +0 -0
- modelcraft/tests/unittests/test_reflections.py +0 -101
- modelcraft-5.0.2.dist-info/LICENSE +0 -504
- modelcraft-5.0.2.dist-info/METADATA +0 -48
- modelcraft-5.0.2.dist-info/RECORD +0 -82
- {modelcraft-5.0.2.dist-info → modelcraft-6.0.0.dist-info}/top_level.txt +0 -0
modelcraft/contents.py
CHANGED
|
@@ -1,63 +1,43 @@
|
|
|
1
|
-
import
|
|
1
|
+
import abc
|
|
2
|
+
import functools
|
|
2
3
|
import json
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
DNA_CODES = {
|
|
42
|
-
"A": "DA",
|
|
43
|
-
"C": "DC",
|
|
44
|
-
"G": "DG",
|
|
45
|
-
"I": "DI",
|
|
46
|
-
"T": "DT",
|
|
47
|
-
"U": "DU",
|
|
48
|
-
"X": "DN",
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
PIR_CODES = {"D1", "DC", "DL", "F1", "N1", "N3", "P1", "RC", "RL", "XX"}
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class PolymerType(enum.Enum):
|
|
55
|
-
PROTEIN = "PROTEIN"
|
|
56
|
-
RNA = "RNA"
|
|
57
|
-
DNA = "DNA"
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
class Polymer:
|
|
4
|
+
import math
|
|
5
|
+
|
|
6
|
+
from . import pdbe
|
|
7
|
+
from .monlib import MonLib
|
|
8
|
+
from .sequence import PolymerType, sequences_in_file
|
|
9
|
+
|
|
10
|
+
BUFFERS = {"12P", "144", "15P", "16D", "1BO", "1PS", "2OS", "3CO", "3NI", "ACA", "ACN"}
|
|
11
|
+
BUFFERS |= {"ACT", "ACY", "AG", "AGC", "AL", "AZI", "B3P", "B7G", "BA", "BCN", "BE7"}
|
|
12
|
+
BUFFERS |= {"BEQ", "BGC", "BMA", "BNG", "BOG", "BR", "BRO", "BTB", "BTC", "BU1", "BU2"}
|
|
13
|
+
BUFFERS |= {"BU3", "C10", "C15", "C8E", "CA", "CAC", "CBM", "CBX", "CCN", "CD", "CE1"}
|
|
14
|
+
BUFFERS |= {"CIT", "CL", "CLO", "CM", "CM5", "CN", "CO", "CPS", "CRY", "CS", "CU"}
|
|
15
|
+
BUFFERS |= {"CU1", "CXE", "CYN", "CYS", "DDQ", "DHD", "DIA", "DIO", "DMF", "DMS", "DMU"}
|
|
16
|
+
BUFFERS |= {"DMX", "DOX", "DPR", "DR6", "DXG", "EDO", "EEE", "EGL", "EOH", "ETF", "F"}
|
|
17
|
+
BUFFERS |= {"FCL", "FCY", "FE", "FE2", "FLO", "FMT", "FRU", "GBL", "GCD", "GLC", "GLO"}
|
|
18
|
+
BUFFERS |= {"GLY", "GOL", "GPX", "HEZ", "HG", "HTG", "HTO", "ICI", "ICT", "IDO", "IDT"}
|
|
19
|
+
BUFFERS |= {"IOD", "IOH", "IPA", "IPH", "JEF", "K", "LAK", "LAT", "LBT", "LDA", "LI"}
|
|
20
|
+
BUFFERS |= {"LMT", "MA4", "MAN", "MG", "MG8", "MHA", "MN", "MN3", "MOH", "MPD", "MPO"}
|
|
21
|
+
BUFFERS |= {"MRD", "MRY", "MTL", "N8E", "NA", "NCO", "NH4", "NHE", "NI", "NO3", "OTE"}
|
|
22
|
+
BUFFERS |= {"P33", "P4C", "PB", "PDO", "PE4", "PE7", "PE8", "PEU", "PG5", "PG6", "PGE"}
|
|
23
|
+
BUFFERS |= {"PGO", "PGQ", "PGR", "PIG", "PIN", "POL", "RB", "SAL", "SBT", "SCN", "SDS"}
|
|
24
|
+
BUFFERS |= {"SO4", "SOR", "SPD", "SPK", "SPM", "SR", "SUC", "SUL", "SYL", "TAR", "TAU"}
|
|
25
|
+
BUFFERS |= {"TBU", "TEP", "TFP", "TLA", "TMA", "TRE", "TRS", "TRT", "UMQ", "UNX", "URE"}
|
|
26
|
+
BUFFERS |= {"XPE", "Y1", "YT3", "ZN", "ZN2"}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@functools.cache
|
|
30
|
+
def is_buffer(code: str) -> bool:
|
|
31
|
+
return code.upper() in BUFFERS
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class Component(abc.ABC):
|
|
35
|
+
@abc.abstractmethod
|
|
36
|
+
def volume(self, monlib: MonLib):
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class Polymer(Component):
|
|
61
41
|
def __init__(
|
|
62
42
|
self,
|
|
63
43
|
sequence: str,
|
|
@@ -67,29 +47,54 @@ class Polymer:
|
|
|
67
47
|
):
|
|
68
48
|
self.sequence = sequence.upper()
|
|
69
49
|
self.stoichiometry = stoichiometry
|
|
70
|
-
self.type = polymer_type or
|
|
50
|
+
self.type = polymer_type or PolymerType.guess(self.sequence)
|
|
71
51
|
self.modifications = modifications or []
|
|
72
52
|
|
|
73
|
-
def
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
return NotImplemented
|
|
53
|
+
def __str__(self) -> str:
|
|
54
|
+
s = f"{self.type.name} with {len(self.sequence)} residues: "
|
|
55
|
+
if len(self.sequence) > 9:
|
|
56
|
+
s += f"{self.sequence[:3]}...{self.sequence[-3:]}"
|
|
57
|
+
else:
|
|
58
|
+
s += f"{self.sequence:9}"
|
|
59
|
+
return s
|
|
81
60
|
|
|
82
61
|
@classmethod
|
|
83
|
-
def from_json(cls, component: dict) -> "Polymer":
|
|
62
|
+
def from_json(cls, component: dict, polymer_type: PolymerType) -> "Polymer":
|
|
84
63
|
return cls(
|
|
85
64
|
sequence=component["sequence"],
|
|
86
65
|
stoichiometry=component.get("stoichiometry"),
|
|
66
|
+
polymer_type=polymer_type,
|
|
87
67
|
modifications=component.get("modifications"),
|
|
88
68
|
)
|
|
89
69
|
|
|
70
|
+
@classmethod
|
|
71
|
+
def from_pdbe(cls, mol: dict, polymer_type: PolymerType) -> "Polymer":
|
|
72
|
+
mod_indices = {}
|
|
73
|
+
for index, mod in mol["pdb_sequence_indices_with_multiple_residues"].items():
|
|
74
|
+
code1 = mod["one_letter_code"]
|
|
75
|
+
code3 = mod["three_letter_code"]
|
|
76
|
+
if code3 not in ("DA", "DC", "DG", "DT"):
|
|
77
|
+
key = code1, code3
|
|
78
|
+
mod_indices.setdefault(key, []).append(index)
|
|
79
|
+
modifications = []
|
|
80
|
+
for (code1, code3), indices in mod_indices.items():
|
|
81
|
+
total = mol["sequence"].count(code1)
|
|
82
|
+
if code1 == "M" and mol["sequence"][0] == "M":
|
|
83
|
+
total -= 1
|
|
84
|
+
if len(indices) >= total:
|
|
85
|
+
modifications.append(f"{code1}->{code3}")
|
|
86
|
+
else:
|
|
87
|
+
modifications.extend(f"{index}->{code3}" for index in indices)
|
|
88
|
+
return cls(
|
|
89
|
+
sequence=mol["sequence"],
|
|
90
|
+
stoichiometry=mol["number_of_copies"],
|
|
91
|
+
polymer_type=polymer_type,
|
|
92
|
+
modifications=modifications,
|
|
93
|
+
)
|
|
94
|
+
|
|
90
95
|
@classmethod
|
|
91
96
|
def from_sequence_file(cls, path: str, polymer_type: PolymerType = None):
|
|
92
|
-
with open(path) as stream:
|
|
97
|
+
with open(path, encoding="utf-8") as stream:
|
|
93
98
|
contents = stream.read()
|
|
94
99
|
for sequence in sequences_in_file(contents=contents):
|
|
95
100
|
yield cls(sequence=sequence, polymer_type=polymer_type)
|
|
@@ -102,7 +107,7 @@ class Polymer:
|
|
|
102
107
|
}
|
|
103
108
|
|
|
104
109
|
def residue_codes(self, modified: bool = True) -> list:
|
|
105
|
-
codes =
|
|
110
|
+
codes = self.type.parse(self.sequence)
|
|
106
111
|
if modified:
|
|
107
112
|
for mod in self.modifications:
|
|
108
113
|
source, code = mod.split("->")
|
|
@@ -118,16 +123,27 @@ class Polymer:
|
|
|
118
123
|
def is_selenomet(self) -> bool:
|
|
119
124
|
return "M->MSE" in self.modifications
|
|
120
125
|
|
|
126
|
+
def weight(self, monlib: MonLib) -> float:
|
|
127
|
+
codes = self.residue_codes(modified=False)
|
|
128
|
+
weight = sum(monlib.weight(code) for code in codes)
|
|
129
|
+
weight -= monlib.weight("HOH") * (len(codes) - 1)
|
|
130
|
+
return weight
|
|
131
|
+
|
|
132
|
+
def volume(self, monlib: MonLib) -> float:
|
|
133
|
+
density = 1.35 if self.type == PolymerType.PROTEIN else 2.0
|
|
134
|
+
return self.weight(monlib) / (density * 0.602214)
|
|
135
|
+
|
|
121
136
|
|
|
122
|
-
class Carb:
|
|
137
|
+
class Carb(Component):
|
|
123
138
|
def __init__(self, codes: dict, stoichiometry: int = None):
|
|
124
139
|
self.codes = codes
|
|
125
140
|
self.stoichiometry = stoichiometry
|
|
126
141
|
|
|
127
|
-
def
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
142
|
+
def __str__(self) -> str:
|
|
143
|
+
s = "Carb:"
|
|
144
|
+
for code, count in self.codes.items():
|
|
145
|
+
s += f" {count}x{code}"
|
|
146
|
+
return s
|
|
131
147
|
|
|
132
148
|
@classmethod
|
|
133
149
|
def from_json(cls, component: dict) -> "Carb":
|
|
@@ -136,19 +152,30 @@ class Carb:
|
|
|
136
152
|
stoichiometry=component.get("stoichiometry"),
|
|
137
153
|
)
|
|
138
154
|
|
|
155
|
+
@classmethod
|
|
156
|
+
def from_pdbe(cls, mol: dict) -> "Carb":
|
|
157
|
+
codes = mol["carb_codes"]
|
|
158
|
+
length = sum(codes.values())
|
|
159
|
+
stoichiometry = mol["number_of_copies"] // length
|
|
160
|
+
return cls(codes=codes, stoichiometry=stoichiometry)
|
|
161
|
+
|
|
139
162
|
def to_json(self) -> dict:
|
|
140
163
|
return {"codes": self.codes, "stoichiometry": self.stoichiometry}
|
|
141
164
|
|
|
165
|
+
def volume(self, monlib: MonLib) -> float:
|
|
166
|
+
monomers = sum(self.codes.values())
|
|
167
|
+
volume = sum(monlib.volume(code) for code in self.codes)
|
|
168
|
+
volume -= monomers * monlib.volume("HOH")
|
|
169
|
+
return volume
|
|
170
|
+
|
|
142
171
|
|
|
143
|
-
class Ligand:
|
|
172
|
+
class Ligand(Component):
|
|
144
173
|
def __init__(self, code: str, stoichiometry: int = None):
|
|
145
174
|
self.code = code
|
|
146
175
|
self.stoichiometry = stoichiometry
|
|
147
176
|
|
|
148
|
-
def
|
|
149
|
-
|
|
150
|
-
return self.code == other.code
|
|
151
|
-
return NotImplemented
|
|
177
|
+
def __str__(self) -> str:
|
|
178
|
+
return f"Ligand: {self.code}"
|
|
152
179
|
|
|
153
180
|
@classmethod
|
|
154
181
|
def from_json(cls, component: dict) -> "Ligand":
|
|
@@ -157,9 +184,16 @@ class Ligand:
|
|
|
157
184
|
stoichiometry=component.get("stoichiometry"),
|
|
158
185
|
)
|
|
159
186
|
|
|
187
|
+
@classmethod
|
|
188
|
+
def from_pdbe(cls, mol: dict) -> "Ligand":
|
|
189
|
+
return cls(code=mol["chem_comp_ids"][0], stoichiometry=mol["number_of_copies"])
|
|
190
|
+
|
|
160
191
|
def to_json(self) -> dict:
|
|
161
192
|
return {"code": self.code, "stoichiometry": self.stoichiometry}
|
|
162
193
|
|
|
194
|
+
def volume(self, monlib: MonLib) -> float:
|
|
195
|
+
return monlib.volume(self.code)
|
|
196
|
+
|
|
163
197
|
|
|
164
198
|
class AsuContents:
|
|
165
199
|
def __init__(
|
|
@@ -171,7 +205,6 @@ class AsuContents:
|
|
|
171
205
|
carbs: list = None,
|
|
172
206
|
ligands: list = None,
|
|
173
207
|
buffers: list = None,
|
|
174
|
-
smiles: dict = None,
|
|
175
208
|
):
|
|
176
209
|
self.copies = copies
|
|
177
210
|
self.proteins = proteins or []
|
|
@@ -180,7 +213,6 @@ class AsuContents:
|
|
|
180
213
|
self.carbs = carbs or []
|
|
181
214
|
self.ligands = ligands or []
|
|
182
215
|
self.buffers = buffers or []
|
|
183
|
-
self.smiles = smiles or {}
|
|
184
216
|
|
|
185
217
|
@classmethod
|
|
186
218
|
def from_file(cls, path: str) -> "AsuContents":
|
|
@@ -190,31 +222,26 @@ class AsuContents:
|
|
|
190
222
|
|
|
191
223
|
@classmethod
|
|
192
224
|
def from_json_file(cls, path: str) -> "AsuContents":
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
contents.
|
|
212
|
-
|
|
213
|
-
ligand = Ligand.from_json(obj)
|
|
214
|
-
contents.ligands.append(ligand)
|
|
215
|
-
contents.buffers = contents_json.get("buffers") or []
|
|
216
|
-
contents.smiles = contents_json.get("smiles") or []
|
|
217
|
-
return contents
|
|
225
|
+
with open(path, encoding="utf-8") as stream:
|
|
226
|
+
contents = json.load(stream)
|
|
227
|
+
return cls(
|
|
228
|
+
copies=contents.get("copies"),
|
|
229
|
+
proteins=[
|
|
230
|
+
Polymer.from_json(obj, PolymerType.PROTEIN)
|
|
231
|
+
for obj in contents.get("proteins", [])
|
|
232
|
+
],
|
|
233
|
+
rnas=[
|
|
234
|
+
Polymer.from_json(obj, PolymerType.RNA)
|
|
235
|
+
for obj in contents.get("rnas", [])
|
|
236
|
+
],
|
|
237
|
+
dnas=[
|
|
238
|
+
Polymer.from_json(obj, PolymerType.DNA)
|
|
239
|
+
for obj in contents.get("dnas", [])
|
|
240
|
+
],
|
|
241
|
+
carbs=[Carb.from_json(obj) for obj in contents.get("carbs", [])],
|
|
242
|
+
ligands=[Ligand.from_json(obj) for obj in contents.get("ligands", [])],
|
|
243
|
+
buffers=contents.get("buffers", []),
|
|
244
|
+
)
|
|
218
245
|
|
|
219
246
|
@classmethod
|
|
220
247
|
def from_sequence_file(
|
|
@@ -233,20 +260,67 @@ class AsuContents:
|
|
|
233
260
|
if polymer.type == PolymerType.DNA:
|
|
234
261
|
self.dnas.append(polymer)
|
|
235
262
|
|
|
263
|
+
@classmethod
|
|
264
|
+
def from_pdbe(cls, entry_id: str) -> "AsuContents":
|
|
265
|
+
contents = cls(copies=1)
|
|
266
|
+
for mol in pdbe.molecule_dicts(entry_id):
|
|
267
|
+
molecule_type = mol["molecule_type"].lower()
|
|
268
|
+
if "polypeptide" in molecule_type:
|
|
269
|
+
protein = Polymer.from_pdbe(mol, PolymerType.PROTEIN)
|
|
270
|
+
contents.proteins.append(protein)
|
|
271
|
+
elif "polyribonucleotide" in molecule_type:
|
|
272
|
+
rna = Polymer.from_pdbe(mol, PolymerType.RNA)
|
|
273
|
+
contents.rnas.append(rna)
|
|
274
|
+
elif "polydeoxyribonucleotide" in molecule_type:
|
|
275
|
+
dna = Polymer.from_pdbe(mol, PolymerType.DNA)
|
|
276
|
+
contents.dnas.append(dna)
|
|
277
|
+
elif "carbohydrate" in molecule_type:
|
|
278
|
+
carb = Carb.from_pdbe(mol)
|
|
279
|
+
contents.carbs.append(carb)
|
|
280
|
+
elif "bound" in molecule_type:
|
|
281
|
+
ligand = Ligand.from_pdbe(mol)
|
|
282
|
+
if is_buffer(ligand.code):
|
|
283
|
+
contents.buffers.append(ligand.code)
|
|
284
|
+
else:
|
|
285
|
+
contents.ligands.append(ligand)
|
|
286
|
+
contents.divide_stoichiometry()
|
|
287
|
+
return contents
|
|
288
|
+
|
|
289
|
+
def components(self) -> list[Component]:
|
|
290
|
+
return self.proteins + self.rnas + self.dnas + self.carbs + self.ligands
|
|
291
|
+
|
|
292
|
+
def divide_stoichiometry(self):
|
|
293
|
+
counts = []
|
|
294
|
+
for component in self.components():
|
|
295
|
+
if component.stoichiometry is not None:
|
|
296
|
+
counts.append(component.stoichiometry)
|
|
297
|
+
if len(counts) > 0:
|
|
298
|
+
if len(counts) > 1:
|
|
299
|
+
divisor = functools.reduce(math.gcd, counts)
|
|
300
|
+
else:
|
|
301
|
+
divisor = counts[0]
|
|
302
|
+
if divisor > 1:
|
|
303
|
+
self.copies *= divisor
|
|
304
|
+
for component in self.components():
|
|
305
|
+
component.stoichiometry //= divisor
|
|
306
|
+
|
|
236
307
|
def monomer_codes(self) -> set:
|
|
237
308
|
codes = set()
|
|
238
309
|
for polymer in self.proteins + self.rnas + self.dnas:
|
|
239
|
-
codes
|
|
310
|
+
codes |= set(polymer.residue_codes(modified=True))
|
|
240
311
|
for carb in self.carbs:
|
|
241
|
-
codes
|
|
312
|
+
codes |= set(carb.codes.keys())
|
|
242
313
|
for ligand in self.ligands:
|
|
243
314
|
codes.add(ligand.code)
|
|
244
|
-
codes
|
|
315
|
+
codes |= set(self.buffers)
|
|
245
316
|
return codes
|
|
246
317
|
|
|
247
318
|
def is_selenomet(self) -> bool:
|
|
248
319
|
return len(self.proteins) > 0 and all(p.is_selenomet() for p in self.proteins)
|
|
249
320
|
|
|
321
|
+
def volume(self, monlib: MonLib) -> float:
|
|
322
|
+
return sum(c.volume(monlib) * (c.stoichiometry or 1) for c in self.components())
|
|
323
|
+
|
|
250
324
|
def to_json(self) -> list:
|
|
251
325
|
return {
|
|
252
326
|
"copies": self.copies,
|
|
@@ -256,68 +330,18 @@ class AsuContents:
|
|
|
256
330
|
"carbs": [carb.to_json() for carb in self.carbs],
|
|
257
331
|
"ligands": [ligand.to_json() for ligand in self.ligands],
|
|
258
332
|
"buffers": self.buffers,
|
|
259
|
-
"smiles": self.smiles,
|
|
260
333
|
}
|
|
261
334
|
|
|
262
335
|
def write_json_file(self, path: str) -> None:
|
|
263
|
-
with open(path, "w") as stream:
|
|
336
|
+
with open(path, "w", encoding="utf-8") as stream:
|
|
264
337
|
json.dump(self.to_json(), stream, indent=2)
|
|
265
338
|
|
|
266
339
|
def write_sequence_file(
|
|
267
340
|
self, path: str, types: list = None, line_length: int = 60
|
|
268
341
|
) -> None:
|
|
269
|
-
with open(path, "w") as stream:
|
|
342
|
+
with open(path, "w", encoding="utf-8") as stream:
|
|
270
343
|
for polymer in self.proteins + self.rnas + self.dnas:
|
|
271
344
|
if types is None or polymer.type in types:
|
|
272
|
-
stream.write(f">{polymer.type.
|
|
345
|
+
stream.write(f">{polymer.type.name}\n")
|
|
273
346
|
for i in range(0, len(polymer.sequence), line_length):
|
|
274
347
|
stream.write(polymer.sequence[i : i + line_length] + "\n")
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
def code1_to_code3(code1: str, polymer_type: PolymerType) -> str:
|
|
278
|
-
return {
|
|
279
|
-
PolymerType.PROTEIN: PROTEIN_CODES.get(code1) or PROTEIN_CODES["X"],
|
|
280
|
-
PolymerType.RNA: RNA_CODES.get(code1) or RNA_CODES["X"],
|
|
281
|
-
PolymerType.DNA: DNA_CODES.get(code1) or DNA_CODES["X"],
|
|
282
|
-
}[polymer_type]
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
def guess_sequence_type(sequence: str) -> PolymerType:
|
|
286
|
-
codes = set(sequence)
|
|
287
|
-
if "U" in codes:
|
|
288
|
-
return PolymerType.RNA
|
|
289
|
-
if codes & set("DEFHIKLMNPQRSVWY"):
|
|
290
|
-
return PolymerType.PROTEIN
|
|
291
|
-
if codes == {"A"}:
|
|
292
|
-
return PolymerType.PROTEIN
|
|
293
|
-
if codes == {"G"}:
|
|
294
|
-
return PolymerType.PROTEIN
|
|
295
|
-
if "T" in codes:
|
|
296
|
-
return PolymerType.DNA
|
|
297
|
-
return PolymerType.RNA
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
def sequences_in_file(contents: str) -> list:
|
|
301
|
-
sequence = ""
|
|
302
|
-
sequences = []
|
|
303
|
-
skip_line = False
|
|
304
|
-
skip_lines = False
|
|
305
|
-
lines = contents.splitlines(keepends=False)
|
|
306
|
-
for line in lines:
|
|
307
|
-
if skip_line:
|
|
308
|
-
skip_line = False
|
|
309
|
-
continue
|
|
310
|
-
if line[:1] == ">":
|
|
311
|
-
if len(sequence) > 0:
|
|
312
|
-
sequences.append(sequence)
|
|
313
|
-
sequence = ""
|
|
314
|
-
if line[1:3] in PIR_CODES and line[3:4] == ";":
|
|
315
|
-
skip_line = True
|
|
316
|
-
skip_lines = False
|
|
317
|
-
elif line[:1] != ";" and not skip_lines:
|
|
318
|
-
sequence += "".join(c for c in line if c.isalpha())
|
|
319
|
-
if line[-1:] == "*":
|
|
320
|
-
skip_lines = True
|
|
321
|
-
if len(sequence) > 0:
|
|
322
|
-
sequences.append(sequence)
|
|
323
|
-
return sequences
|
modelcraft/environ.py
CHANGED
|
@@ -6,10 +6,3 @@ def setup_environ():
|
|
|
6
6
|
if variable not in os.environ:
|
|
7
7
|
raise EnvironmentError(variable + " environment variable not set")
|
|
8
8
|
os.environ["LD_LIBRARY_PATH"] = os.environ["CLIB"]
|
|
9
|
-
os.environ["COOT_N_THREADS"] = "1"
|
|
10
|
-
os.environ["GOTO_NUM_THREADS"] = "1"
|
|
11
|
-
os.environ["MKL_NUM_THREADS"] = "1"
|
|
12
|
-
os.environ["NUMEXPR_NUM_THREADS"] = "1"
|
|
13
|
-
os.environ["OMP_NUM_THREADS"] = "1"
|
|
14
|
-
os.environ["OPENBLAS_NUM_THREADS"] = "1"
|
|
15
|
-
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
|
modelcraft/geometry.py
CHANGED
|
@@ -1,32 +1,44 @@
|
|
|
1
|
-
import
|
|
2
|
-
|
|
1
|
+
import collections
|
|
2
|
+
|
|
3
3
|
import gemmi
|
|
4
|
-
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from .monlib import MonLib
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def per_residue_geometry_rmsz(
|
|
10
|
+
structure: gemmi.Structure, monlib: MonLib, model_index: int = 0
|
|
11
|
+
) -> dict:
|
|
12
|
+
atom_zs = _atom_zs(structure, monlib, model_index)
|
|
13
|
+
rv = {}
|
|
14
|
+
for chain in structure[model_index]:
|
|
15
|
+
for residue in chain:
|
|
16
|
+
zs = np.concatenate([atom_zs.get(atom.serial, []) for atom in residue])
|
|
17
|
+
rmsz = np.sqrt(np.mean(np.square(zs))) if len(zs) > 0 else np.nan
|
|
18
|
+
rv[(chain.name, str(residue.seqid))] = rmsz
|
|
19
|
+
return rv
|
|
5
20
|
|
|
6
21
|
|
|
7
|
-
def
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
for plane in
|
|
22
|
+
def _atom_zs(structure: gemmi.Structure, monlib: MonLib, model_index: int) -> dict:
|
|
23
|
+
structure.assign_serial_numbers()
|
|
24
|
+
topo = gemmi.prepare_topology(structure, monlib, model_index)
|
|
25
|
+
atom_zs = collections.defaultdict(list)
|
|
26
|
+
for bond in topo.bonds:
|
|
27
|
+
z = bond.calculate_z()
|
|
28
|
+
for atom in bond.atoms:
|
|
29
|
+
atom_zs[atom.serial].append(z)
|
|
30
|
+
for angle in topo.angles:
|
|
31
|
+
z = angle.calculate_z()
|
|
32
|
+
for atom in angle.atoms:
|
|
33
|
+
atom_zs[atom.serial].append(z)
|
|
34
|
+
for torsion in topo.torsions:
|
|
35
|
+
if torsion.restr.esd > 0: # Some torsions are only restrained by planes
|
|
36
|
+
z = torsion.calculate_z()
|
|
37
|
+
for atom in torsion.atoms:
|
|
38
|
+
atom_zs[atom.serial].append(z)
|
|
39
|
+
for plane in topo.planes:
|
|
25
40
|
best_plane = gemmi.find_best_plane(plane.atoms)
|
|
26
|
-
max_z = 0
|
|
27
41
|
for atom in plane.atoms:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
sum_of_squares += max_z ** 2
|
|
32
|
-
return math.sqrt(sum_of_squares / num_of_squares)
|
|
42
|
+
z = gemmi.get_distance_from_plane(atom.pos, best_plane) / plane.restr.esd
|
|
43
|
+
atom_zs[atom.serial].append(z)
|
|
44
|
+
return atom_zs
|
modelcraft/job.py
CHANGED
|
@@ -4,8 +4,9 @@ import shutil
|
|
|
4
4
|
import subprocess
|
|
5
5
|
import textwrap
|
|
6
6
|
import time
|
|
7
|
+
|
|
7
8
|
from .pipeline import Pipeline
|
|
8
|
-
from .utils import
|
|
9
|
+
from .utils import puid
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class Job(abc.ABC):
|
|
@@ -22,13 +23,13 @@ class Job(abc.ABC):
|
|
|
22
23
|
if self._exe_path is None:
|
|
23
24
|
raise ValueError(f"Executable '{self._exe_name}' not found")
|
|
24
25
|
if pipeline is None:
|
|
25
|
-
self._directory = f"job_{self._exe_name}_{
|
|
26
|
+
self._directory = f"job_{self._exe_name}_{puid(length=20)}"
|
|
26
27
|
else:
|
|
27
28
|
self._directory = pipeline.next_job_directory(self._exe_name)
|
|
28
29
|
pipeline.report_job_start(self._exe_name)
|
|
29
30
|
os.makedirs(self._directory, exist_ok=True)
|
|
30
31
|
self._setup()
|
|
31
|
-
with open(self._path("script.sh"), "w") as stream:
|
|
32
|
+
with open(self._path("script.sh"), "w", encoding="utf-8") as stream:
|
|
32
33
|
stream.write(self._script())
|
|
33
34
|
os.chmod(self._path("script.sh"), 0o755)
|
|
34
35
|
start_time = time.time()
|
|
@@ -56,8 +57,8 @@ class Job(abc.ABC):
|
|
|
56
57
|
pass
|
|
57
58
|
|
|
58
59
|
def _run_subprocess(self):
|
|
59
|
-
with open(self._path("stdout.txt"), "w") as out_stream:
|
|
60
|
-
with open(self._path("stderr.txt"), "w") as err_stream:
|
|
60
|
+
with open(self._path("stdout.txt"), "w", encoding="utf-8") as out_stream:
|
|
61
|
+
with open(self._path("stderr.txt"), "w", encoding="utf-8") as err_stream:
|
|
61
62
|
process = subprocess.Popen(
|
|
62
63
|
args=[self._exe_path] + self._args,
|
|
63
64
|
stdin=subprocess.PIPE if self._stdin else None,
|
modelcraft/jobs/acedrg.py
CHANGED
modelcraft/jobs/buccaneer.py
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import dataclasses
|
|
2
2
|
import os
|
|
3
3
|
import xml.etree.ElementTree as ET
|
|
4
|
+
|
|
4
5
|
import gemmi
|
|
5
|
-
|
|
6
|
+
|
|
7
|
+
from ..contents import AsuContents
|
|
6
8
|
from ..job import Job
|
|
7
9
|
from ..reflections import DataItem, write_mtz
|
|
8
|
-
from ..
|
|
10
|
+
from ..sequence import PROTEIN_CODES, PolymerType
|
|
11
|
+
from ..structure import read_structure, write_mmcif
|
|
9
12
|
|
|
10
13
|
|
|
11
14
|
@dataclasses.dataclass
|
|
@@ -124,12 +127,27 @@ class Buccaneer(Job):
|
|
|
124
127
|
)
|
|
125
128
|
|
|
126
129
|
|
|
127
|
-
def _known_structure_ids(structure: gemmi.Structure)
|
|
130
|
+
def _known_structure_ids(structure: gemmi.Structure):
|
|
128
131
|
"Known structure IDs for ligands (but not modified residues) with a CA atom"
|
|
129
132
|
protein_residue_names = set(PROTEIN_CODES.values()) | {"MSE", "UNK"}
|
|
130
133
|
for chain in structure[0]:
|
|
131
|
-
for residues in
|
|
134
|
+
for residues in _consecutive_residues(chain):
|
|
132
135
|
if not any(res.name in protein_residue_names for res in residues):
|
|
133
136
|
for residue in residues:
|
|
134
137
|
if "CA" in residue:
|
|
135
138
|
yield f"/{chain.name}/{str(residue.seqid)}/*/:1.0"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _consecutive_residues(chain: gemmi.Chain):
|
|
142
|
+
"Iterate through lists of residues with consecutive seqnums (first conformer only)"
|
|
143
|
+
consecutive = []
|
|
144
|
+
last_seqnum = None
|
|
145
|
+
for residue in chain.first_conformer():
|
|
146
|
+
if last_seqnum is None or residue.seqid.num == last_seqnum + 1:
|
|
147
|
+
consecutive.append(residue)
|
|
148
|
+
else:
|
|
149
|
+
yield consecutive
|
|
150
|
+
consecutive = [residue]
|
|
151
|
+
last_seqnum = residue.seqid.num
|
|
152
|
+
if len(consecutive) > 0:
|
|
153
|
+
yield consecutive
|