modelcraft 5.0.3__py3-none-any.whl → 6.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modelcraft/__init__.py +16 -31
- modelcraft/__main__.py +0 -1
- modelcraft/arguments.py +35 -7
- modelcraft/combine.py +22 -41
- modelcraft/contents.py +188 -164
- modelcraft/environ.py +0 -7
- modelcraft/geometry.py +39 -27
- modelcraft/job.py +6 -5
- modelcraft/jobs/acedrg.py +2 -0
- modelcraft/jobs/buccaneer.py +22 -4
- modelcraft/jobs/comit.py +2 -0
- modelcraft/jobs/ctruncate.py +3 -1
- modelcraft/jobs/emda.py +2 -0
- modelcraft/jobs/findwaters.py +2 -0
- modelcraft/jobs/freerflag.py +2 -0
- modelcraft/jobs/libg.py +2 -0
- modelcraft/jobs/molrep.py +2 -0
- modelcraft/jobs/nautilus.py +28 -14
- modelcraft/jobs/nucleofind.py +88 -0
- modelcraft/jobs/parrot.py +13 -2
- modelcraft/jobs/phasematch.py +2 -1
- modelcraft/jobs/refmac.py +3 -1
- modelcraft/jobs/servalcat.py +36 -2
- modelcraft/jobs/sheetbend.py +2 -0
- modelcraft/modelcraftem.py +49 -6
- modelcraft/modelcraftxray.py +90 -42
- modelcraft/monlib.py +55 -52
- modelcraft/pdbe.py +54 -0
- modelcraft/pipeline.py +1 -1
- modelcraft/prune.py +69 -0
- modelcraft/reflections.py +11 -1
- modelcraft/scripts/contents.py +5 -215
- modelcraft/scripts/copies.py +26 -17
- modelcraft/scripts/modelcraft.py +1 -0
- modelcraft/scripts/sidechains.py +141 -0
- modelcraft/scripts/validate.py +81 -0
- modelcraft/sequence.py +106 -0
- modelcraft/solvent.py +42 -113
- modelcraft/structure.py +64 -41
- modelcraft/tests/ccp4/__init__.py +7 -11
- modelcraft/tests/ccp4/test_acedrg.py +2 -0
- modelcraft/tests/ccp4/test_arguments.py +3 -0
- modelcraft/tests/ccp4/test_buccaneer.py +3 -2
- modelcraft/tests/ccp4/test_cell.py +4 -1
- modelcraft/tests/ccp4/test_comit.py +2 -0
- modelcraft/tests/ccp4/test_contents.py +99 -17
- modelcraft/tests/ccp4/test_copies.py +1 -0
- modelcraft/tests/ccp4/test_ctruncate.py +2 -0
- modelcraft/tests/ccp4/test_findwaters.py +2 -0
- modelcraft/tests/ccp4/test_freerflag.py +2 -0
- modelcraft/tests/ccp4/test_libg.py +1 -0
- modelcraft/tests/ccp4/test_molrep.py +3 -0
- modelcraft/tests/ccp4/test_monlib.py +75 -45
- modelcraft/tests/ccp4/test_nautilus.py +5 -3
- modelcraft/tests/ccp4/test_nucleofind.py +62 -0
- modelcraft/tests/ccp4/test_parrot.py +3 -1
- modelcraft/tests/ccp4/test_phasematch.py +2 -0
- modelcraft/tests/ccp4/test_prune.py +17 -0
- modelcraft/tests/ccp4/test_reflections.py +110 -1
- modelcraft/tests/ccp4/test_refmac.py +3 -0
- modelcraft/tests/{unittests/test_contents.py → ccp4/test_sequence.py} +5 -12
- modelcraft/tests/ccp4/test_servalcat.py +52 -0
- modelcraft/tests/ccp4/test_sheetbend.py +4 -3
- modelcraft/tests/ccp4/test_sidechains.py +25 -0
- modelcraft/tests/ccp4/test_solvent.py +12 -26
- modelcraft/tests/ccp4/test_structure.py +1 -0
- modelcraft/tests/ccp4/test_validation.py +19 -0
- modelcraft/tests/ccp4/test_xray.py +12 -6
- modelcraft/tests/ccpem/test_em.py +3 -0
- modelcraft/tests/ccpem/test_emda.py +2 -0
- modelcraft/tests/ccpem/test_refmac.py +1 -0
- modelcraft/tests/ccpem/test_servalcat.py +4 -3
- modelcraft/utils.py +16 -4
- modelcraft/validation.py +101 -0
- modelcraft-6.0.0.dist-info/METADATA +76 -0
- modelcraft-6.0.0.dist-info/RECORD +85 -0
- {modelcraft-5.0.3.dist-info → modelcraft-6.0.0.dist-info}/WHEEL +1 -1
- {modelcraft-5.0.3.dist-info → modelcraft-6.0.0.dist-info}/entry_points.txt +2 -0
- modelcraft/coot/prune.py +0 -1085
- modelcraft/coot/sidechains.py +0 -68
- modelcraft/jobs/acorn.py +0 -114
- modelcraft/jobs/coot.py +0 -104
- modelcraft/tests/ccp4/test_coot.py +0 -29
- modelcraft/tests/ccp4/test_geometry.py +0 -20
- modelcraft/tests/unittests/__init__.py +0 -0
- modelcraft/tests/unittests/test_reflections.py +0 -101
- modelcraft-5.0.3.dist-info/METADATA +0 -49
- modelcraft-5.0.3.dist-info/RECORD +0 -82
- modelcraft-5.0.3.dist-info/licenses/LICENSE +0 -504
- {modelcraft-5.0.3.dist-info → modelcraft-6.0.0.dist-info}/top_level.txt +0 -0
modelcraft/scripts/contents.py
CHANGED
|
@@ -1,218 +1,8 @@
|
|
|
1
1
|
import argparse
|
|
2
|
-
import functools
|
|
3
|
-
import math
|
|
4
|
-
import os
|
|
5
|
-
import re
|
|
6
2
|
import sys
|
|
7
|
-
import requests
|
|
8
|
-
from ..contents import AsuContents, Carb, Ligand, Polymer, PolymerType
|
|
9
|
-
from ..environ import setup_environ
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def _response_json(url, data=None):
|
|
13
|
-
print("Requesting:", url)
|
|
14
|
-
if data is None:
|
|
15
|
-
response = requests.get(url)
|
|
16
|
-
else:
|
|
17
|
-
response = requests.post(url, data=data)
|
|
18
|
-
if response.status_code != 200:
|
|
19
|
-
raise ConnectionError(response.text)
|
|
20
|
-
return response.json()
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def _add_smiles(contents: AsuContents) -> None:
|
|
24
|
-
codes = contents.monomer_codes()
|
|
25
|
-
codes = {code for code in codes if not _in_library(code)}
|
|
26
|
-
for code in sorted(codes):
|
|
27
|
-
path = os.path.join(os.environ["CLIBD_MON"], code[0].lower(), code + ".cif")
|
|
28
|
-
if not os.path.exists(path):
|
|
29
|
-
contents.smiles[code] = _smiles(code)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
@functools.lru_cache(maxsize=None)
|
|
33
|
-
def _buffers() -> set:
|
|
34
|
-
path = os.path.join(os.environ["CCP4"], "share", "pisa", "agents.dat")
|
|
35
|
-
agents = set()
|
|
36
|
-
with open(path) as stream:
|
|
37
|
-
for line in stream:
|
|
38
|
-
if line[0] != "#" and "," in line:
|
|
39
|
-
code = line.split(",")[0]
|
|
40
|
-
agents.add(code)
|
|
41
|
-
return agents
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def _carb_codes(entry: str) -> dict:
|
|
45
|
-
url = "https://www.ebi.ac.uk/pdbe/search/pdb/select?"
|
|
46
|
-
query = "pdb_id:" + entry
|
|
47
|
-
filter_list = "carb_compound_id_entity"
|
|
48
|
-
request_data = {"q": query, "fl": filter_list, "wt": "json"}
|
|
49
|
-
json = _response_json(url, data=request_data)
|
|
50
|
-
docs = json["response"]["docs"]
|
|
51
|
-
codes = {}
|
|
52
|
-
for doc in docs:
|
|
53
|
-
for line in doc["carb_compound_id_entity"]:
|
|
54
|
-
match = re.match(r"(.+)\((\d+)\)_(\d+)", line)
|
|
55
|
-
code, copies, entity = match.groups()
|
|
56
|
-
codes.setdefault(int(entity), {})[code] = int(copies)
|
|
57
|
-
return codes
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def _carb_from_pdbe_molecule_dict(mol: dict) -> Carb:
|
|
61
|
-
codes = mol["carb_codes"]
|
|
62
|
-
length = sum(codes.values())
|
|
63
|
-
stoichiometry = mol["number_of_copies"] // length
|
|
64
|
-
return Carb(codes=codes, stoichiometry=stoichiometry)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def _divide_stoichiometry(contents: AsuContents):
|
|
68
|
-
stoichiometry = []
|
|
69
|
-
for item in (
|
|
70
|
-
contents.proteins
|
|
71
|
-
+ contents.rnas
|
|
72
|
-
+ contents.dnas
|
|
73
|
-
+ contents.carbs
|
|
74
|
-
+ contents.ligands
|
|
75
|
-
):
|
|
76
|
-
if item.stoichiometry is not None:
|
|
77
|
-
stoichiometry.append(item.stoichiometry)
|
|
78
|
-
divisor = stoichiometry[0]
|
|
79
|
-
if len(stoichiometry) > 1:
|
|
80
|
-
divisor = functools.reduce(math.gcd, stoichiometry)
|
|
81
|
-
contents.copies *= divisor
|
|
82
|
-
for item in (
|
|
83
|
-
contents.proteins
|
|
84
|
-
+ contents.rnas
|
|
85
|
-
+ contents.dnas
|
|
86
|
-
+ contents.carbs
|
|
87
|
-
+ contents.ligands
|
|
88
|
-
):
|
|
89
|
-
item.stoichiometry //= divisor
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def _entry_contents(entry: str) -> AsuContents:
|
|
93
|
-
contents = AsuContents()
|
|
94
|
-
contents.copies = 1
|
|
95
|
-
for mol in _pdbe_molecules(entry):
|
|
96
|
-
if "sequence" in mol:
|
|
97
|
-
polymer = _polymer_from_pdbe_molecule_dict(mol)
|
|
98
|
-
contents.add_polymer(polymer)
|
|
99
|
-
if mol["molecule_type"] == "carbohydrate polymer":
|
|
100
|
-
carb = _carb_from_pdbe_molecule_dict(mol)
|
|
101
|
-
contents.carbs.append(carb)
|
|
102
|
-
if mol["molecule_type"] == "bound":
|
|
103
|
-
ligand = _ligand_from_pdbe_molecule_dict(mol)
|
|
104
|
-
if _is_buffer(ligand.code):
|
|
105
|
-
contents.buffers.append(ligand.code)
|
|
106
|
-
elif ligand.code not in ("UNL", "UNX"):
|
|
107
|
-
contents.ligands.append(ligand)
|
|
108
|
-
_divide_stoichiometry(contents)
|
|
109
|
-
_add_smiles(contents)
|
|
110
|
-
return contents
|
|
111
3
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
def _in_library(code: str) -> bool:
|
|
115
|
-
path = os.path.join(os.environ["CLIBD_MON"], code[0].lower(), code + ".cif")
|
|
116
|
-
return os.path.exists(path)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
@functools.lru_cache(maxsize=None)
|
|
120
|
-
def _is_buffer(code: str) -> float:
|
|
121
|
-
return code.upper() in _buffers()
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
def _ligand_from_pdbe_molecule_dict(mol: dict) -> Ligand:
|
|
125
|
-
return Ligand(code=mol["chem_comp_ids"][0], stoichiometry=mol["number_of_copies"])
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def _modifications_in_pdbe_molecule_dict(mol: dict) -> list:
|
|
129
|
-
indices = {}
|
|
130
|
-
for index, mod in mol["pdb_sequence_indices_with_multiple_residues"].items():
|
|
131
|
-
code1 = mod["one_letter_code"]
|
|
132
|
-
code3 = mod["three_letter_code"]
|
|
133
|
-
if code3 not in ("DA", "DC", "DG", "DT"):
|
|
134
|
-
key = code1, code3
|
|
135
|
-
indices.setdefault(key, []).append(index)
|
|
136
|
-
modifications = []
|
|
137
|
-
for key in indices:
|
|
138
|
-
code1, code3 = key
|
|
139
|
-
total = mol["sequence"].count(code1)
|
|
140
|
-
if code1 == "M" and mol["sequence"][0] == "M":
|
|
141
|
-
total -= 1
|
|
142
|
-
if len(indices[key]) >= total:
|
|
143
|
-
modifications.append(f"{code1}->{code3}")
|
|
144
|
-
else:
|
|
145
|
-
modifications.extend(f"{index}->{code3}" for index in indices[key])
|
|
146
|
-
return modifications
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
def _pdbe_molecules(entry: str) -> list:
|
|
150
|
-
entry = entry.lower()
|
|
151
|
-
url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/status/" + entry
|
|
152
|
-
try:
|
|
153
|
-
json = _response_json(url)
|
|
154
|
-
except ConnectionError:
|
|
155
|
-
sys.exit(f"Cannot determine the status of entry {entry}")
|
|
156
|
-
superceded_by = json[entry][0].get("superceded_by", [])
|
|
157
|
-
if len(superceded_by) > 0:
|
|
158
|
-
entry = superceded_by[-1]
|
|
159
|
-
url = "https://www.ebi.ac.uk/pdbe/api/pdb/entry/molecules/" + entry
|
|
160
|
-
try:
|
|
161
|
-
json = _response_json(url)
|
|
162
|
-
except ConnectionError:
|
|
163
|
-
sys.exit(f"No molecule information found for entry {entry}")
|
|
164
|
-
mols = json[entry]
|
|
165
|
-
if any(mol["molecule_type"] == "carbohydrate polymer" for mol in mols):
|
|
166
|
-
codes = _carb_codes(entry)
|
|
167
|
-
for mol in mols:
|
|
168
|
-
mol["carb_codes"] = codes.get(mol["entity_id"])
|
|
169
|
-
return mols
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
def _polymer_from_pdbe_molecule_dict(mol: dict) -> Polymer:
|
|
173
|
-
polymer_type = {
|
|
174
|
-
"polypeptide(l)": PolymerType.PROTEIN,
|
|
175
|
-
"polyribonucleotide": PolymerType.RNA,
|
|
176
|
-
"polydeoxyribonucleotide": PolymerType.DNA,
|
|
177
|
-
}.get(mol["molecule_type"].lower(), None)
|
|
178
|
-
return Polymer(
|
|
179
|
-
sequence=mol["sequence"],
|
|
180
|
-
stoichiometry=mol["number_of_copies"],
|
|
181
|
-
polymer_type=polymer_type,
|
|
182
|
-
modifications=_modifications_in_pdbe_molecule_dict(mol),
|
|
183
|
-
)
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
@functools.lru_cache(maxsize=None)
|
|
187
|
-
def _smiles(code: str) -> str:
|
|
188
|
-
query = (
|
|
189
|
-
"{\n"
|
|
190
|
-
' chem_comp(comp_id: "%s") {\n'
|
|
191
|
-
" pdbx_chem_comp_descriptor {\n"
|
|
192
|
-
" comp_id\n"
|
|
193
|
-
" descriptor\n"
|
|
194
|
-
" program\n"
|
|
195
|
-
" program_version\n"
|
|
196
|
-
" type\n"
|
|
197
|
-
" }\n"
|
|
198
|
-
" }\n"
|
|
199
|
-
"}" % code
|
|
200
|
-
)
|
|
201
|
-
url = "https://data.rcsb.org/graphql?query=" + requests.utils.quote(query)
|
|
202
|
-
json = _response_json(url)
|
|
203
|
-
descriptors = json["data"]["chem_comp"]["pdbx_chem_comp_descriptor"]
|
|
204
|
-
canonical = None
|
|
205
|
-
smiles = None
|
|
206
|
-
for descriptor in descriptors:
|
|
207
|
-
if descriptor["type"] == "SMILES_CANONICAL":
|
|
208
|
-
if descriptor["program"] == "OpenEye OEToolkits":
|
|
209
|
-
return descriptor["descriptor"]
|
|
210
|
-
canonical = descriptor["descriptor"]
|
|
211
|
-
elif descriptor["type"] == "SMILES":
|
|
212
|
-
smiles = descriptor["descriptor"]
|
|
213
|
-
if canonical is None and smiles is None:
|
|
214
|
-
raise RuntimeError("Could not get SMILES from RCSB for " + code)
|
|
215
|
-
return canonical or smiles
|
|
4
|
+
from ..contents import AsuContents
|
|
5
|
+
from ..environ import setup_environ
|
|
216
6
|
|
|
217
7
|
|
|
218
8
|
def main(argument_list=None):
|
|
@@ -221,10 +11,10 @@ def main(argument_list=None):
|
|
|
221
11
|
argument_list = sys.argv[1:]
|
|
222
12
|
description = "Create a contents JSON file for a PDB entry"
|
|
223
13
|
parser = argparse.ArgumentParser(description=description)
|
|
224
|
-
parser.add_argument("
|
|
225
|
-
parser.add_argument("contents", help="Path
|
|
14
|
+
parser.add_argument("entry_id", help="PDB entry ID")
|
|
15
|
+
parser.add_argument("contents", help="Path to write the contents JSON")
|
|
226
16
|
args = parser.parse_args(argument_list)
|
|
227
|
-
contents =
|
|
17
|
+
contents = AsuContents.from_pdbe(args.entry_id)
|
|
228
18
|
contents.write_json_file(args.contents)
|
|
229
19
|
|
|
230
20
|
|
modelcraft/scripts/copies.py
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
import argparse
|
|
2
2
|
import sys
|
|
3
|
+
|
|
3
4
|
import gemmi
|
|
5
|
+
|
|
4
6
|
from ..contents import AsuContents
|
|
5
7
|
from ..environ import setup_environ
|
|
6
|
-
from ..
|
|
8
|
+
from ..monlib import MonLib
|
|
9
|
+
from ..solvent import copies_options
|
|
7
10
|
|
|
8
11
|
|
|
9
12
|
def main(argument_list=None):
|
|
@@ -13,39 +16,44 @@ def main(argument_list=None):
|
|
|
13
16
|
parser = argparse.ArgumentParser()
|
|
14
17
|
parser.add_argument("contents", help="Path to contents file")
|
|
15
18
|
parser.add_argument("mtz", help="Path to MTZ file")
|
|
19
|
+
parser.add_argument("--libin", help="Path to custom restraint dictionary")
|
|
16
20
|
args = parser.parse_args(argument_list)
|
|
21
|
+
|
|
17
22
|
contents = AsuContents.from_file(args.contents)
|
|
18
23
|
mtz = gemmi.read_mtz_file(args.mtz)
|
|
24
|
+
monlib = MonLib(contents.monomer_codes(), args.libin, include_standard=True)
|
|
19
25
|
|
|
20
26
|
cell = mtz.cell
|
|
21
27
|
asu_volume = cell.volume / len(mtz.spacegroup.operations())
|
|
22
28
|
print("## MTZ\n")
|
|
23
29
|
print(
|
|
24
|
-
"Cell
|
|
25
|
-
|
|
30
|
+
f"Cell {cell.a:.3f} {cell.b:.3f} {cell.c:.3f}"
|
|
31
|
+
f" {cell.alpha:.2f} {cell.beta:.2f} {cell.gamma:.2f}"
|
|
26
32
|
)
|
|
27
|
-
print("Spacegroup
|
|
28
|
-
print("ASU Volume
|
|
29
|
-
print("Resolution
|
|
33
|
+
print(f"Spacegroup {mtz.spacegroup.hm}")
|
|
34
|
+
print(f"ASU Volume {asu_volume:.0f}")
|
|
35
|
+
print(f"Resolution {mtz.resolution_low():.2f} - {mtz.resolution_high():.2f}")
|
|
30
36
|
print("")
|
|
31
37
|
|
|
32
38
|
print("## Components\n")
|
|
33
39
|
print("| Description | Stoichiometry | Volume |")
|
|
34
40
|
print("|----------------------------------------------|---------------|----------|")
|
|
35
|
-
for component in
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
volume = component.volume
|
|
41
|
+
for component in contents.components():
|
|
42
|
+
stoichiometry = component.stoichiometry or 1
|
|
43
|
+
assumed = "(assumed)" if component.stoichiometry is None else ""
|
|
44
|
+
volume = component.volume(monlib)
|
|
40
45
|
print(
|
|
41
|
-
"|
|
|
42
|
-
|
|
46
|
+
f"| {str(component)[:44]:44s} "
|
|
47
|
+
f"| {assumed:9s} {stoichiometry:3d} "
|
|
48
|
+
f"| {volume:8.0f} |"
|
|
43
49
|
)
|
|
44
50
|
print("|----------------------------------------------|---------------|----------|")
|
|
45
|
-
print("|
|
|
51
|
+
print(f"| {'Total':44s} | | {contents.volume(monlib):8.0f} |")
|
|
46
52
|
print("")
|
|
47
53
|
|
|
48
|
-
options =
|
|
54
|
+
options = copies_options(
|
|
55
|
+
contents, cell, mtz.spacegroup, mtz.resolution_high(), monlib
|
|
56
|
+
)
|
|
49
57
|
print("## Copies\n")
|
|
50
58
|
if len(options) == 0:
|
|
51
59
|
print("Contents are too big to fit into the asymmetric unit")
|
|
@@ -54,8 +62,9 @@ def main(argument_list=None):
|
|
|
54
62
|
print("|--------|------------------|-------------|")
|
|
55
63
|
for option in options:
|
|
56
64
|
print(
|
|
57
|
-
"|
|
|
58
|
-
|
|
65
|
+
f"| {option.copies:6d} "
|
|
66
|
+
f"| {option.solvent:16.3f} "
|
|
67
|
+
f"| {option.probability:11.3f} |"
|
|
59
68
|
)
|
|
60
69
|
print("")
|
|
61
70
|
|
modelcraft/scripts/modelcraft.py
CHANGED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"Add missing side chains to a protein model"
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
from os import environ
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from shutil import rmtree
|
|
8
|
+
|
|
9
|
+
import coot_headless_api
|
|
10
|
+
import gemmi
|
|
11
|
+
|
|
12
|
+
from ..environ import setup_environ
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _parse_args(argument_list):
|
|
16
|
+
parser = argparse.ArgumentParser(
|
|
17
|
+
description=__doc__,
|
|
18
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
19
|
+
)
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"structure",
|
|
22
|
+
help="Input structure in PDB, mmCIF, mmJSON format",
|
|
23
|
+
)
|
|
24
|
+
parser.add_argument(
|
|
25
|
+
"mtz",
|
|
26
|
+
help="MTZ file amplitudes and phases (FWT and PHWT by default)",
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"output",
|
|
30
|
+
help="Path to write the output structure with added side chains",
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"--model-index",
|
|
34
|
+
type=int,
|
|
35
|
+
default=0,
|
|
36
|
+
metavar="N",
|
|
37
|
+
help="Index of the model to analyse (with 0 being the first model)",
|
|
38
|
+
)
|
|
39
|
+
parser.add_argument(
|
|
40
|
+
"--f_label",
|
|
41
|
+
default="FWT",
|
|
42
|
+
help="Column label for structure factor amplitudes",
|
|
43
|
+
)
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"--phi_label",
|
|
46
|
+
default="PHWT",
|
|
47
|
+
help="Column label for structure factor phases",
|
|
48
|
+
)
|
|
49
|
+
return parser.parse_args(argument_list or sys.argv[1:])
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
SIDE_CHAIN_ATOMS = {
|
|
53
|
+
"ARG": {"CG", "CD", "NE", "CZ", "NH1", "NH2"},
|
|
54
|
+
"ASN": {"CG", "OD1", "ND2"},
|
|
55
|
+
"ASP": {"CG", "OD1", "OD2"},
|
|
56
|
+
"CYS": {"SG"},
|
|
57
|
+
"GLN": {"CG", "CD", "OE1", "NE2"},
|
|
58
|
+
"GLU": {"CG", "CD", "OE1", "OE2"},
|
|
59
|
+
"HIS": {"CG", "ND1", "CD2", "CE1", "NE2"},
|
|
60
|
+
"ILE": {"CG1", "CG2", "CD1"},
|
|
61
|
+
"LEU": {"CG", "CD1", "CD2"},
|
|
62
|
+
"LYS": {"CG", "CD", "CE", "NZ"},
|
|
63
|
+
"MET": {"CG", "SD", "CE"},
|
|
64
|
+
"MSE": {"CG", "SE", "CE"},
|
|
65
|
+
"PHE": {"CG", "CD1", "CD2", "CE1", "CE2", "CZ"},
|
|
66
|
+
"PRO": {"CG", "CD"},
|
|
67
|
+
"SER": {"OG"},
|
|
68
|
+
"THR": {"OG1", "CG2"},
|
|
69
|
+
"TRP": {"CG", "CD1", "CD2", "NE1", "CE2", "CE3", "CZ2", "CZ3", "CH2"},
|
|
70
|
+
"TYR": {"CG", "CD1", "CD2", "CE1", "CE2", "CZ", "OH"},
|
|
71
|
+
"VAL": {"CG1", "CG2"},
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def has_full_side_chain(residue: gemmi.Residue) -> bool:
|
|
76
|
+
"Check if a residue has all side chain atoms from gamma onwards."
|
|
77
|
+
expected = SIDE_CHAIN_ATOMS.get(residue.name, set())
|
|
78
|
+
built = {atom.name for atom in residue}
|
|
79
|
+
return built > expected
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def any_missing_side_chains(structure: gemmi.Structure) -> bool:
|
|
83
|
+
"Check if any residue in a structure has missing side chain atoms."
|
|
84
|
+
for chain in structure[0]:
|
|
85
|
+
for residue in chain:
|
|
86
|
+
if not has_full_side_chain(residue):
|
|
87
|
+
return True
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def cif_path(name: str):
|
|
92
|
+
directory = Path(environ["CLIBD_MON"]) / name[0].lower()
|
|
93
|
+
single_path = directory / f"{name}.cif"
|
|
94
|
+
double_path = directory / f"{name}_{name}.cif"
|
|
95
|
+
if single_path.exists():
|
|
96
|
+
return str(single_path)
|
|
97
|
+
if double_path.exists():
|
|
98
|
+
return str(double_path)
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def main(argument_list=None):
|
|
103
|
+
backup_path = Path("coot-backup")
|
|
104
|
+
backup_existed_before = backup_path.exists()
|
|
105
|
+
setup_environ()
|
|
106
|
+
args = _parse_args(argument_list)
|
|
107
|
+
structure = gemmi.read_structure(args.structure)
|
|
108
|
+
if not any_missing_side_chains(structure):
|
|
109
|
+
print("No missing side chains detected, no action taken")
|
|
110
|
+
return
|
|
111
|
+
mc = coot_headless_api.molecules_container_t(True)
|
|
112
|
+
mc.set_use_gemmi(False)
|
|
113
|
+
mc.set_make_backups(False)
|
|
114
|
+
imol = mc.read_coordinates(args.structure)
|
|
115
|
+
non_standard = mc.non_standard_residue_types_in_model(imol)
|
|
116
|
+
for comp_id in non_standard:
|
|
117
|
+
if (path := cif_path(comp_id)) is None:
|
|
118
|
+
print("WARNING: No CIF file found for non-standard residue", comp_id)
|
|
119
|
+
continue
|
|
120
|
+
mc.import_cif_dictionary(path, imol)
|
|
121
|
+
imap = mc.read_mtz(args.mtz, args.f_label, args.phi_label, "", False, False)
|
|
122
|
+
mc.set_imol_refinement_map(imap)
|
|
123
|
+
mc.set_use_torsion_restraints(True)
|
|
124
|
+
mc.set_use_rama_plot_restraints(True)
|
|
125
|
+
for chain in structure[args.model_index]:
|
|
126
|
+
for residue in chain:
|
|
127
|
+
if not has_full_side_chain(residue):
|
|
128
|
+
num = residue.seqid.num
|
|
129
|
+
icode = residue.seqid.icode
|
|
130
|
+
icode = "" if icode == " " else icode
|
|
131
|
+
mc.refine_residues(imol, chain.name, num, icode, "", "TRIPLE", 1000)
|
|
132
|
+
mc.fill_partial_residue(imol, chain.name, num, icode)
|
|
133
|
+
mc.auto_fit_rotamer(imol, chain.name, num, icode, "", imap)
|
|
134
|
+
mc.refine_residues(imol, chain.name, num, icode, "", "TRIPLE", 1000)
|
|
135
|
+
mc.write_coordinates(imol, args.output)
|
|
136
|
+
if not backup_existed_before and backup_path.exists():
|
|
137
|
+
rmtree(backup_path, ignore_errors=True)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
if __name__ == "__main__":
|
|
141
|
+
main()
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
import gemmi
|
|
5
|
+
from tabulate import tabulate
|
|
6
|
+
|
|
7
|
+
from ..environ import setup_environ
|
|
8
|
+
from ..monlib import MonLib
|
|
9
|
+
from ..reflections import DataItem
|
|
10
|
+
from ..validation import validate
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _parse_args(argument_list):
|
|
14
|
+
parser = argparse.ArgumentParser(
|
|
15
|
+
description=__doc__,
|
|
16
|
+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
17
|
+
)
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"structure",
|
|
20
|
+
help="Input structure in PDB, mmCIF, mmJSON format",
|
|
21
|
+
)
|
|
22
|
+
parser.add_argument(
|
|
23
|
+
"mtz",
|
|
24
|
+
help=(
|
|
25
|
+
"MTZ file from Refmac with standard output column labels "
|
|
26
|
+
"(the output MTZ from ModelCraft meets this requirement)."
|
|
27
|
+
),
|
|
28
|
+
)
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--format",
|
|
31
|
+
default="table",
|
|
32
|
+
choices=["table", "csv"],
|
|
33
|
+
help="Print the results as a human-readable table or a CSV file",
|
|
34
|
+
)
|
|
35
|
+
parser.add_argument(
|
|
36
|
+
"--libin",
|
|
37
|
+
metavar="PATH",
|
|
38
|
+
help="Path to a custom restraint dictionary in CIF format",
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument(
|
|
41
|
+
"--sort",
|
|
42
|
+
action="store_true",
|
|
43
|
+
help="Order the output with the worse scoring residues first",
|
|
44
|
+
)
|
|
45
|
+
parser.add_argument(
|
|
46
|
+
"--model-index",
|
|
47
|
+
type=int,
|
|
48
|
+
default=0,
|
|
49
|
+
metavar="N",
|
|
50
|
+
help="Index of the model to analyse (with 0 being the first model)",
|
|
51
|
+
)
|
|
52
|
+
return parser.parse_args(argument_list or sys.argv[1:])
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def main(argument_list=None):
|
|
56
|
+
setup_environ()
|
|
57
|
+
args = _parse_args(argument_list)
|
|
58
|
+
|
|
59
|
+
structure = gemmi.read_structure(args.structure, format=gemmi.CoorFormat.Detect)
|
|
60
|
+
mtz = gemmi.read_mtz_file(args.mtz)
|
|
61
|
+
fphi_best = DataItem(mtz, "FWT,PHWT")
|
|
62
|
+
fphi_diff = DataItem(mtz, "DELFWT,PHDELWT")
|
|
63
|
+
fphi_calc = DataItem(mtz, "FC_ALL,PHIC_ALL")
|
|
64
|
+
|
|
65
|
+
resnames = structure[args.model_index].get_all_residue_names()
|
|
66
|
+
monlib = MonLib(resnames, args.libin)
|
|
67
|
+
metrics = validate(
|
|
68
|
+
structure, fphi_best, fphi_diff, fphi_calc, monlib, args.model_index
|
|
69
|
+
)
|
|
70
|
+
if args.sort:
|
|
71
|
+
metrics.sort_values("Score", ascending=True, inplace=True)
|
|
72
|
+
|
|
73
|
+
if args.format == "table":
|
|
74
|
+
metrics["Sig"] = metrics["Score"].apply(lambda x: "+" * min(5, -int(x)))
|
|
75
|
+
print(tabulate(metrics, headers="keys", showindex=False, floatfmt=".1f"))
|
|
76
|
+
else:
|
|
77
|
+
print(metrics.to_csv(index=False))
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
if __name__ == "__main__":
|
|
81
|
+
main()
|
modelcraft/sequence.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
PROTEIN_CODES = {
|
|
2
|
+
"A": "ALA",
|
|
3
|
+
"B": "ASX",
|
|
4
|
+
"C": "CYS",
|
|
5
|
+
"D": "ASP",
|
|
6
|
+
"E": "GLU",
|
|
7
|
+
"F": "PHE",
|
|
8
|
+
"G": "GLY",
|
|
9
|
+
"H": "HIS",
|
|
10
|
+
"I": "ILE",
|
|
11
|
+
"K": "LYS",
|
|
12
|
+
"L": "LEU",
|
|
13
|
+
"M": "MET",
|
|
14
|
+
"N": "ASN",
|
|
15
|
+
"O": "PYL",
|
|
16
|
+
"P": "PRO",
|
|
17
|
+
"Q": "GLN",
|
|
18
|
+
"R": "ARG",
|
|
19
|
+
"S": "SER",
|
|
20
|
+
"T": "THR",
|
|
21
|
+
"U": "SEC",
|
|
22
|
+
"V": "VAL",
|
|
23
|
+
"W": "TRP",
|
|
24
|
+
"X": "UNK",
|
|
25
|
+
"Y": "TYR",
|
|
26
|
+
"Z": "GLX",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
DNA_CODES = {
|
|
30
|
+
"A": "DA",
|
|
31
|
+
"C": "DC",
|
|
32
|
+
"G": "DG",
|
|
33
|
+
"I": "DI",
|
|
34
|
+
"N": "DN",
|
|
35
|
+
"T": "DT",
|
|
36
|
+
"U": "DU",
|
|
37
|
+
"X": "DN",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
RNA_CODES = {
|
|
41
|
+
"A": "A",
|
|
42
|
+
"C": "C",
|
|
43
|
+
"G": "G",
|
|
44
|
+
"I": "I",
|
|
45
|
+
"N": "N",
|
|
46
|
+
"U": "U",
|
|
47
|
+
"X": "N",
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class PolymerType:
|
|
52
|
+
PROTEIN = "PolymerType"
|
|
53
|
+
DNA = "PolymerType"
|
|
54
|
+
RNA = "PolymerType"
|
|
55
|
+
|
|
56
|
+
def __init__(self, name: str, codes: dict[str, str]):
|
|
57
|
+
self.name = name
|
|
58
|
+
self.codes = codes
|
|
59
|
+
|
|
60
|
+
def parse(self, sequence: str) -> list[str]:
|
|
61
|
+
return [self.codes.get(c, self.codes["X"]) for c in sequence]
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def guess(cls, sequence: str):
|
|
65
|
+
codes = set(sequence)
|
|
66
|
+
if codes & set("DEFHKLMNPQRSVWY") or codes in ({"A"}, {"G"}):
|
|
67
|
+
return cls.PROTEIN
|
|
68
|
+
if "U" in codes:
|
|
69
|
+
return cls.RNA
|
|
70
|
+
if "T" in codes:
|
|
71
|
+
return cls.DNA
|
|
72
|
+
return cls.RNA
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
PolymerType.PROTEIN = PolymerType("Protein", PROTEIN_CODES)
|
|
76
|
+
PolymerType.DNA = PolymerType("DNA", DNA_CODES)
|
|
77
|
+
PolymerType.RNA = PolymerType("RNA", RNA_CODES)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
PIR_CODES = {"D1", "DC", "DL", "F1", "N1", "N3", "P1", "RC", "RL", "XX"}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def sequences_in_file(contents: str) -> list:
|
|
84
|
+
sequence = ""
|
|
85
|
+
sequences = []
|
|
86
|
+
skip_line = False
|
|
87
|
+
skip_lines = False
|
|
88
|
+
lines = contents.splitlines(keepends=False)
|
|
89
|
+
for line in lines:
|
|
90
|
+
if skip_line:
|
|
91
|
+
skip_line = False
|
|
92
|
+
continue
|
|
93
|
+
if line[:1] == ">":
|
|
94
|
+
if len(sequence) > 0:
|
|
95
|
+
sequences.append(sequence)
|
|
96
|
+
sequence = ""
|
|
97
|
+
if line[1:3] in PIR_CODES and line[3] == ";":
|
|
98
|
+
skip_line = True
|
|
99
|
+
skip_lines = False
|
|
100
|
+
elif line[:1] != ";" and not skip_lines:
|
|
101
|
+
sequence += "".join(c for c in line if c.isalpha())
|
|
102
|
+
if line[-1:] == "*":
|
|
103
|
+
skip_lines = True
|
|
104
|
+
if len(sequence) > 0:
|
|
105
|
+
sequences.append(sequence)
|
|
106
|
+
return sequences
|