servalcat 0.4.131__cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- servalcat/__init__.py +10 -0
- servalcat/__main__.py +120 -0
- servalcat/ext.cpython-314t-x86_64-linux-gnu.so +0 -0
- servalcat/refine/__init__.py +0 -0
- servalcat/refine/cgsolve.py +100 -0
- servalcat/refine/refine.py +1162 -0
- servalcat/refine/refine_geom.py +245 -0
- servalcat/refine/refine_spa.py +400 -0
- servalcat/refine/refine_xtal.py +339 -0
- servalcat/refine/spa.py +151 -0
- servalcat/refine/xtal.py +312 -0
- servalcat/refmac/__init__.py +0 -0
- servalcat/refmac/exte.py +191 -0
- servalcat/refmac/refmac_keywords.py +660 -0
- servalcat/refmac/refmac_wrapper.py +423 -0
- servalcat/spa/__init__.py +0 -0
- servalcat/spa/fofc.py +488 -0
- servalcat/spa/fsc.py +391 -0
- servalcat/spa/localcc.py +197 -0
- servalcat/spa/realspcc_from_var.py +128 -0
- servalcat/spa/run_refmac.py +979 -0
- servalcat/spa/shift_maps.py +293 -0
- servalcat/spa/shiftback.py +137 -0
- servalcat/spa/translate.py +129 -0
- servalcat/utils/__init__.py +35 -0
- servalcat/utils/commands.py +1629 -0
- servalcat/utils/fileio.py +836 -0
- servalcat/utils/generate_operators.py +296 -0
- servalcat/utils/hkl.py +811 -0
- servalcat/utils/logger.py +140 -0
- servalcat/utils/maps.py +345 -0
- servalcat/utils/model.py +933 -0
- servalcat/utils/refmac.py +759 -0
- servalcat/utils/restraints.py +888 -0
- servalcat/utils/symmetry.py +298 -0
- servalcat/xtal/__init__.py +0 -0
- servalcat/xtal/french_wilson.py +262 -0
- servalcat/xtal/run_refmac_small.py +240 -0
- servalcat/xtal/sigmaa.py +1954 -0
- servalcat/xtal/twin.py +316 -0
- servalcat-0.4.131.dist-info/METADATA +60 -0
- servalcat-0.4.131.dist-info/RECORD +45 -0
- servalcat-0.4.131.dist-info/WHEEL +6 -0
- servalcat-0.4.131.dist-info/entry_points.txt +4 -0
- servalcat-0.4.131.dist-info/licenses/LICENSE +373 -0
|
@@ -0,0 +1,888 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Author: "Keitaro Yamashita, Garib N. Murshudov"
|
|
3
|
+
MRC Laboratory of Molecular Biology
|
|
4
|
+
|
|
5
|
+
This software is released under the
|
|
6
|
+
Mozilla Public License, version 2.0; see LICENSE.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import absolute_import, division, print_function, generators
|
|
9
|
+
from servalcat.utils import logger
|
|
10
|
+
from servalcat.refmac import refmac_keywords
|
|
11
|
+
from servalcat import ext
|
|
12
|
+
import os
|
|
13
|
+
import gemmi
|
|
14
|
+
import string
|
|
15
|
+
import random
|
|
16
|
+
import numpy
|
|
17
|
+
import pandas
|
|
18
|
+
import json
|
|
19
|
+
import fnmatch
|
|
20
|
+
|
|
21
|
+
default_proton_scale = 1.13 # scale of X-proton distance to X-H(e) distance
|
|
22
|
+
|
|
23
|
+
def decide_new_mod_id(mod_id, mods):
|
|
24
|
+
# Refmac only allows up to 8 characters
|
|
25
|
+
letters = string.digits + string.ascii_lowercase
|
|
26
|
+
if len(mod_id) < 8:
|
|
27
|
+
for l in letters:
|
|
28
|
+
new_id = "{}{}{}".format(mod_id, "" if len(mod_id)==7 else "-", l)
|
|
29
|
+
if new_id not in mods:
|
|
30
|
+
return new_id
|
|
31
|
+
|
|
32
|
+
# give up keeping original name
|
|
33
|
+
while True: # XXX risk of infinite loop.. less likely though
|
|
34
|
+
new_id = "mod" + "".join([random.choice(letters) for _ in range(4)])
|
|
35
|
+
if new_id not in mods:
|
|
36
|
+
return new_id
|
|
37
|
+
# decide_new_mod_id()
|
|
38
|
+
|
|
39
|
+
def rename_cif_modification_if_necessary(doc, known_ids):
|
|
40
|
+
# FIXME Problematic if other file refers to modification that is renamed in this function - but how can we know?
|
|
41
|
+
trans = {}
|
|
42
|
+
for b in doc:
|
|
43
|
+
for row in b.find("_chem_mod.", ["id"]):
|
|
44
|
+
mod_id = row.str(0)
|
|
45
|
+
if mod_id in known_ids:
|
|
46
|
+
new_id = decide_new_mod_id(mod_id, known_ids)
|
|
47
|
+
trans[mod_id] = new_id
|
|
48
|
+
row[0] = new_id # modify id
|
|
49
|
+
logger.writeln("INFO:: renaming modification id {} to {}".format(mod_id, new_id))
|
|
50
|
+
|
|
51
|
+
# modify ids in mod_* blocks
|
|
52
|
+
for mod_id in trans:
|
|
53
|
+
b = doc.find_block("mod_{}".format(mod_id))
|
|
54
|
+
if not b: # should raise error?
|
|
55
|
+
logger.writeln("WARNING:: inconsistent mod description for {}".format(mod_id))
|
|
56
|
+
continue
|
|
57
|
+
b.name = "mod_{}".format(trans[mod_id]) # modify name
|
|
58
|
+
for item in b:
|
|
59
|
+
for tag in item.loop.tags:
|
|
60
|
+
if tag.endswith(".mod_id"):
|
|
61
|
+
for row in b.find(tag[:tag.rindex(".")+1], ["mod_id"]):
|
|
62
|
+
row[0] = trans[mod_id]
|
|
63
|
+
|
|
64
|
+
# Update mod id in links
|
|
65
|
+
if trans:
|
|
66
|
+
for b in doc:
|
|
67
|
+
for row in b.find("_chem_link.", ["mod_id_1", "mod_id_2"]):
|
|
68
|
+
for i in range(2):
|
|
69
|
+
if row.str(i) in trans:
|
|
70
|
+
row[i] = trans[row.str(i)]
|
|
71
|
+
|
|
72
|
+
return trans
|
|
73
|
+
# rename_cif_modification_if_necessary()
|
|
74
|
+
|
|
75
|
+
def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns=False,
|
|
76
|
+
ignore_monomer_dir=False, update_old_atom_names=True,
|
|
77
|
+
params=None):
|
|
78
|
+
resnames = st[0].get_all_residue_names()
|
|
79
|
+
|
|
80
|
+
if monomer_dir is None and not ignore_monomer_dir:
|
|
81
|
+
if "CLIBD_MON" not in os.environ:
|
|
82
|
+
logger.error("WARNING: CLIBD_MON is not set")
|
|
83
|
+
else:
|
|
84
|
+
monomer_dir = os.environ["CLIBD_MON"]
|
|
85
|
+
|
|
86
|
+
if cif_files is None:
|
|
87
|
+
cif_files = []
|
|
88
|
+
|
|
89
|
+
monlib = gemmi.MonLib()
|
|
90
|
+
if monomer_dir and not ignore_monomer_dir:
|
|
91
|
+
if not os.path.isdir(monomer_dir):
|
|
92
|
+
raise RuntimeError("not a directory: {}".format(monomer_dir))
|
|
93
|
+
|
|
94
|
+
logger.writeln("Reading monomers from {}".format(monomer_dir))
|
|
95
|
+
monlib.read_monomer_lib(monomer_dir, resnames, logger)
|
|
96
|
+
|
|
97
|
+
for f in cif_files:
|
|
98
|
+
logger.writeln("Reading monomer: {}".format(f))
|
|
99
|
+
doc = gemmi.cif.read(f)
|
|
100
|
+
for b in doc:
|
|
101
|
+
atom_id_list = b.find_values("_chem_comp_atom.atom_id")
|
|
102
|
+
if atom_id_list:
|
|
103
|
+
name = b.name.replace("comp_", "")
|
|
104
|
+
if name in monlib.monomers:
|
|
105
|
+
logger.writeln("WARNING:: updating monomer {} using {}".format(name, f))
|
|
106
|
+
del monlib.monomers[name]
|
|
107
|
+
|
|
108
|
+
# Check if bond length values are included
|
|
109
|
+
# This is to fail if cif file is e.g. from PDB website
|
|
110
|
+
if b.find_values("_chem_comp_bond.comp_id") and not b.find_values("_chem_comp_bond.value_dist"):
|
|
111
|
+
raise RuntimeError(f"Bond length information for {name} is missing from {f}. Please generate restraints using a tool like acedrg.")
|
|
112
|
+
|
|
113
|
+
for row in b.find("_chem_link.", ["id"]):
|
|
114
|
+
link_id = row.str(0)
|
|
115
|
+
if link_id in monlib.links:
|
|
116
|
+
logger.writeln("WARNING:: updating link {} using {}".format(link_id, f))
|
|
117
|
+
del monlib.links[link_id]
|
|
118
|
+
|
|
119
|
+
# If modification id is duplicated, need to rename
|
|
120
|
+
rename_cif_modification_if_necessary(doc, monlib.modifications)
|
|
121
|
+
monlib.read_monomer_doc(doc)
|
|
122
|
+
for b in doc:
|
|
123
|
+
for row in b.find("_chem_comp.", ["id", "group"]):
|
|
124
|
+
if row.str(0) in monlib.monomers:
|
|
125
|
+
monlib.monomers[row.str(0)].set_group(row.str(1))
|
|
126
|
+
|
|
127
|
+
not_loaded = set(resnames).difference(monlib.monomers)
|
|
128
|
+
if not_loaded:
|
|
129
|
+
logger.writeln("WARNING: monomers not loaded: {}".format(" ".join(not_loaded)))
|
|
130
|
+
|
|
131
|
+
logger.writeln("Monomer library loaded: {} monomers, {} links, {} modifications".format(len(monlib.monomers),
|
|
132
|
+
len(monlib.links),
|
|
133
|
+
len(monlib.modifications)))
|
|
134
|
+
logger.writeln(" loaded monomers: {}".format(" ".join([x for x in monlib.monomers])))
|
|
135
|
+
logger.writeln("")
|
|
136
|
+
|
|
137
|
+
logger.writeln("Checking if unknown atoms exist..")
|
|
138
|
+
|
|
139
|
+
unknown_cc = set()
|
|
140
|
+
for chain in st[0]: unknown_cc.update(res.name for res in chain if res.name not in monlib.monomers)
|
|
141
|
+
if unknown_cc:
|
|
142
|
+
if stop_for_unknowns:
|
|
143
|
+
raise RuntimeError("Provide restraint cif file(s) for {}".format(",".join(unknown_cc)))
|
|
144
|
+
else:
|
|
145
|
+
logger.writeln("WARNING: ad-hoc restraints will be generated for {}".format(",".join(unknown_cc)))
|
|
146
|
+
logger.writeln(" it is strongly recommended to generate them using AceDRG.")
|
|
147
|
+
|
|
148
|
+
if update_old_atom_names:
|
|
149
|
+
monlib.update_old_atom_names(st, logger)
|
|
150
|
+
|
|
151
|
+
if params:
|
|
152
|
+
update_torsions(monlib, params.get("restr", {}).get("torsion_include", {}))
|
|
153
|
+
|
|
154
|
+
return monlib
|
|
155
|
+
# load_monomer_library()
|
|
156
|
+
|
|
157
|
+
def fix_elements_in_model(monlib, st):
|
|
158
|
+
monlib_els = {m: {a.id: a.el for a in monlib.monomers[m].atoms} for m in monlib.monomers}
|
|
159
|
+
lookup = {x.atom: x for x in st[0].all()}
|
|
160
|
+
for chain in st[0]:
|
|
161
|
+
for res in chain:
|
|
162
|
+
d = monlib_els.get(res.name)
|
|
163
|
+
if not d: continue # should not happen
|
|
164
|
+
for at in res:
|
|
165
|
+
if at.name not in d: # for example atom names of element D may be different, which will be sorted later
|
|
166
|
+
continue
|
|
167
|
+
el = d[at.name]
|
|
168
|
+
if at.element != el:
|
|
169
|
+
logger.writeln(f"WARNING: correcting element of {lookup[at]} to {el.name}")
|
|
170
|
+
at.element = el
|
|
171
|
+
# correct_elements_in_model()
|
|
172
|
+
|
|
173
|
+
def update_torsions(monlib, params):
|
|
174
|
+
# take subset
|
|
175
|
+
params = [p for p in params
|
|
176
|
+
if any(x in p for x in ("tors_value", "tors_sigma", "tors_period"))]
|
|
177
|
+
if not params:
|
|
178
|
+
return
|
|
179
|
+
logger.writeln("Updating torsion targets in dictionaries")
|
|
180
|
+
for p in params:
|
|
181
|
+
if "residue" in p:
|
|
182
|
+
tors = [cc.rt.torsions for cc in monlib.monomers.values()
|
|
183
|
+
if fnmatch.fnmatch(cc.name, p["residue"])]
|
|
184
|
+
elif "group" in p:
|
|
185
|
+
g = gemmi.ChemComp.read_group(p["group"])
|
|
186
|
+
# should warn if g is Null
|
|
187
|
+
tors = [cc.rt.torsions for cc in monlib.monomers.values()
|
|
188
|
+
if cc.group == g]
|
|
189
|
+
elif "link" in p:
|
|
190
|
+
tors = [ln.rt.torsions for ln in monlib.links.values()
|
|
191
|
+
if fnmatch.fnmatch(ln.id, p["link"])]
|
|
192
|
+
else:
|
|
193
|
+
tors = []
|
|
194
|
+
if not tors:
|
|
195
|
+
continue
|
|
196
|
+
logger.writeln(f" rule = {p}")
|
|
197
|
+
for tt in tors:
|
|
198
|
+
for t in tt:
|
|
199
|
+
if fnmatch.fnmatch(t.label, p["tors_name"]):
|
|
200
|
+
if "tors_value" in p:
|
|
201
|
+
t.value = p["tors_value"]
|
|
202
|
+
if "tors_sigma" in p:
|
|
203
|
+
t.esd = p["tors_sigma"]
|
|
204
|
+
if "tors_period" in p:
|
|
205
|
+
t.period = p["tors_period"]
|
|
206
|
+
# update_torsions()
|
|
207
|
+
|
|
208
|
+
def make_torsion_rules(restr_params):
|
|
209
|
+
# Defaults
|
|
210
|
+
include_rules = [{"group": "peptide", "tors_name": "chi*"},
|
|
211
|
+
{"link": "*", "tors_name": "omega"},
|
|
212
|
+
{"residue": "*", "tors_name": "sp2_sp2*"},
|
|
213
|
+
{"link": "*", "tors_name": "sp2_sp2*"},
|
|
214
|
+
]
|
|
215
|
+
exclude_rules = []
|
|
216
|
+
|
|
217
|
+
# Override include/exclude rules
|
|
218
|
+
for i, name in enumerate(("torsion_include", "torsion_exclude")):
|
|
219
|
+
rules = (include_rules, exclude_rules)[i]
|
|
220
|
+
for p in restr_params.get(name, []):
|
|
221
|
+
r = {}
|
|
222
|
+
if p["flag"]:
|
|
223
|
+
for k in "residue", "group", "link":
|
|
224
|
+
if k in p:
|
|
225
|
+
r[k] = p[k]
|
|
226
|
+
if r and "tors_name" in p:
|
|
227
|
+
r["tors_name"] = p["tors_name"]
|
|
228
|
+
rules.append(r)
|
|
229
|
+
else:
|
|
230
|
+
rules.clear()
|
|
231
|
+
|
|
232
|
+
# How to tell about hydrogen?
|
|
233
|
+
logger.writeln("Torsion angle rules:")
|
|
234
|
+
for l, rr in (("include", include_rules), ("exclude", exclude_rules)):
|
|
235
|
+
logger.writeln(f" {l}:")
|
|
236
|
+
if not rr:
|
|
237
|
+
logger.writeln(f" none")
|
|
238
|
+
for r in rr:
|
|
239
|
+
logger.writeln(f" {r}")
|
|
240
|
+
|
|
241
|
+
return include_rules, exclude_rules
|
|
242
|
+
# make_torsion_rules())
|
|
243
|
+
|
|
244
|
+
def select_restrained_torsions(monlib, include_rules, exclude_rules):
|
|
245
|
+
ret = {"monomer": {}, "link": {}}
|
|
246
|
+
|
|
247
|
+
# Collect monomer/link related torsions
|
|
248
|
+
all_tors = {"mon": {}, "link": {}}
|
|
249
|
+
groups = {}
|
|
250
|
+
for mon_id in monlib.monomers:
|
|
251
|
+
mon = monlib.monomers[mon_id]
|
|
252
|
+
groups.setdefault(mon.group, []).append(mon_id)
|
|
253
|
+
all_tors["mon"][mon_id] = [x.label for x in mon.rt.torsions]
|
|
254
|
+
for mod_id in monlib.modifications:
|
|
255
|
+
mod = monlib.modifications[mod_id]
|
|
256
|
+
tors = [x.label for x in mod.rt.torsions if chr(x.id1.comp) in ("a", "c")] # don't need delete
|
|
257
|
+
if not tors: continue
|
|
258
|
+
gr = gemmi.ChemComp.read_group(mod.group_id)
|
|
259
|
+
if mod.comp_id and mod.comp_id in all_tors["mon"]:
|
|
260
|
+
all_tors["mon"][mod.comp_id].extend(tors)
|
|
261
|
+
elif not mod.comp_id and gr in groups:
|
|
262
|
+
for mon_id in groups[gr]:
|
|
263
|
+
all_tors["mon"][mon_id].extend(tors)
|
|
264
|
+
for lnk_id in monlib.links:
|
|
265
|
+
lnk = monlib.links[lnk_id]
|
|
266
|
+
if lnk.rt.torsions:
|
|
267
|
+
all_tors["link"][lnk_id] = [x.label for x in lnk.rt.torsions]
|
|
268
|
+
for k in all_tors:
|
|
269
|
+
for kk in all_tors[k]:
|
|
270
|
+
all_tors[k][kk] = set(all_tors[k][kk])
|
|
271
|
+
|
|
272
|
+
# Apply include/exclude rule
|
|
273
|
+
for mon in all_tors["mon"]:
|
|
274
|
+
match_f = lambda r: ("tors_name" in r and
|
|
275
|
+
("residue" in r and fnmatch.fnmatch(mon, r["residue"]) or
|
|
276
|
+
mon in groups.get(gemmi.ChemComp.read_group(r.get("group", "")), [])))
|
|
277
|
+
use_tors = []
|
|
278
|
+
for r in include_rules:
|
|
279
|
+
if match_f(r):
|
|
280
|
+
use_tors.extend(x for x in all_tors["mon"][mon] if fnmatch.fnmatch(x, r["tors_name"]))
|
|
281
|
+
for r in exclude_rules:
|
|
282
|
+
if match_f(r):
|
|
283
|
+
use_tors = [x for x in use_tors if not fnmatch.fnmatch(x, r["tors_name"])]
|
|
284
|
+
if use_tors:
|
|
285
|
+
ret["monomer"][mon] = sorted(use_tors)
|
|
286
|
+
for lnk in all_tors["link"]:
|
|
287
|
+
match_f = lambda r: ("tors_name" in r and
|
|
288
|
+
"link" in r and fnmatch.fnmatch(lnk, r["link"]))
|
|
289
|
+
use_tors = []
|
|
290
|
+
for r in include_rules:
|
|
291
|
+
if match_f(r):
|
|
292
|
+
use_tors.extend(x for x in all_tors["link"][lnk] if fnmatch.fnmatch(x, r["tors_name"]))
|
|
293
|
+
for r in exclude_rules:
|
|
294
|
+
if match_f(r):
|
|
295
|
+
use_tors = [x for x in use_tors if not fnmatch.fnmatch(x, r["tors_name"])]
|
|
296
|
+
if use_tors:
|
|
297
|
+
ret["link"][lnk] = sorted(use_tors)
|
|
298
|
+
|
|
299
|
+
return ret
|
|
300
|
+
# select_restrained_torsions()
|
|
301
|
+
|
|
302
|
+
def prepare_topology(st, monlib, h_change, ignore_unknown_links=False, raise_error=True, check_hydrogen=False,
|
|
303
|
+
remove_bad_hydrogen=True, use_cispeps=False, add_metal_restraints=True, params=None):
|
|
304
|
+
# Check duplicated atoms
|
|
305
|
+
bad = []
|
|
306
|
+
for chain in st[0]:
|
|
307
|
+
bad_res = []
|
|
308
|
+
for res in chain:
|
|
309
|
+
n_uniq = len({(a.name, a.altloc) for a in res})
|
|
310
|
+
if n_uniq != len(res):
|
|
311
|
+
bad_res.append(str(res.seqid))
|
|
312
|
+
if bad_res:
|
|
313
|
+
bad.append(" chain {}: {}".format(chain.name, " ".join(bad_res)))
|
|
314
|
+
if bad:
|
|
315
|
+
raise RuntimeError("Following residues have duplicated atoms. Check your model.\n{}".format("\n".join(bad)))
|
|
316
|
+
|
|
317
|
+
if add_metal_restraints:
|
|
318
|
+
metalc = MetalCoordination(monlib)
|
|
319
|
+
keywords, todel = metalc.setup_restraints(st)
|
|
320
|
+
con_bak = []
|
|
321
|
+
for i in sorted(todel, reverse=True):
|
|
322
|
+
# temporarily remove connection not to put a bond restraint
|
|
323
|
+
con = st.connections.pop(i)
|
|
324
|
+
con_bak.append((i, con))
|
|
325
|
+
# flag non-hydrogen
|
|
326
|
+
cra2 = st[0].find_cra(con.partner2, ignore_segment=True)
|
|
327
|
+
cra2.atom.calc_flag = gemmi.CalcFlag.NoHydrogen
|
|
328
|
+
if params:
|
|
329
|
+
parsed = refmac_keywords.parse_keywords(keywords).get("exte")
|
|
330
|
+
if parsed:
|
|
331
|
+
params["exte"] = parsed + params.get("exte", [])
|
|
332
|
+
else:
|
|
333
|
+
keywords = []
|
|
334
|
+
# these checks can be done after sorting links
|
|
335
|
+
logger.writeln("Creating restraints..")
|
|
336
|
+
with logger.with_prefix(" "):
|
|
337
|
+
topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=logger, reorder=False,
|
|
338
|
+
ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
|
|
339
|
+
|
|
340
|
+
if remove_bad_hydrogen:
|
|
341
|
+
deleted = False
|
|
342
|
+
for chain in st[0]:
|
|
343
|
+
for res in chain:
|
|
344
|
+
todel = []
|
|
345
|
+
for i, atom in enumerate(res):
|
|
346
|
+
if atom.is_hydrogen() and (atom.calc_flag == gemmi.CalcFlag.Dummy
|
|
347
|
+
or any(numpy.isnan(atom.pos.tolist()))):
|
|
348
|
+
logger.writeln(f" Removing failed hydrogen: {chain.name}/{res.name} {res.seqid}/{atom.name}")
|
|
349
|
+
todel.append(i)
|
|
350
|
+
deleted = True
|
|
351
|
+
for i in reversed(todel):
|
|
352
|
+
del res[i]
|
|
353
|
+
if deleted: # needs re-creation, as the deletion invalidates pointers stored in topo
|
|
354
|
+
logger.writeln("Re-creating restraints..")
|
|
355
|
+
with logger.with_prefix(" "):
|
|
356
|
+
topo = gemmi.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.NoChange, warnings=logger, reorder=False,
|
|
357
|
+
ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
|
|
358
|
+
|
|
359
|
+
unknown_cc = set()
|
|
360
|
+
link_related = set()
|
|
361
|
+
nan_hydr = set()
|
|
362
|
+
|
|
363
|
+
def extra_defined(res1, res2): # TODO should check alt
|
|
364
|
+
for link in topo.extras:
|
|
365
|
+
res12 = (link.res1, link.res2)
|
|
366
|
+
if link.link_id and (res12 == (res1, res2) or res12 == (res2, res1)):
|
|
367
|
+
return True
|
|
368
|
+
return False
|
|
369
|
+
|
|
370
|
+
# collect info
|
|
371
|
+
info = {}
|
|
372
|
+
for cinfo in topo.chain_infos:
|
|
373
|
+
toadd = info.setdefault(cinfo.chain_ref.name, {})
|
|
374
|
+
if cinfo.polymer:
|
|
375
|
+
gaps = []
|
|
376
|
+
for rinfo in cinfo.res_infos:
|
|
377
|
+
if (rinfo.prev and rinfo.prev[0].link_id in ("gap", "") and
|
|
378
|
+
not extra_defined(rinfo.prev[0].res1, rinfo.prev[0].res2)):
|
|
379
|
+
gaps.append((rinfo.prev[0].res1, rinfo.prev[0].res2))
|
|
380
|
+
toadd["polymer"] = (str(cinfo.polymer_type).replace("PolymerType.", ""),
|
|
381
|
+
cinfo.res_infos[0].res.seqid,
|
|
382
|
+
cinfo.res_infos[-1].res.seqid,
|
|
383
|
+
len(cinfo.res_infos), gaps)
|
|
384
|
+
else:
|
|
385
|
+
l = toadd.setdefault("nonpolymer", [])
|
|
386
|
+
for ri in cinfo.res_infos:
|
|
387
|
+
l.append(ri.res.name)
|
|
388
|
+
logger.writeln("\nChain info:")
|
|
389
|
+
for chain in info:
|
|
390
|
+
logger.writeln(" chain {}".format(chain))
|
|
391
|
+
if "polymer" in info[chain]:
|
|
392
|
+
logger.writeln(" {}: {}..{} ({} residues)".format(*info[chain]["polymer"][:-1]))
|
|
393
|
+
for gap in info[chain]["polymer"][-1]:
|
|
394
|
+
logger.writeln(" gap between {} and {}".format(*gap))
|
|
395
|
+
if "nonpolymer" in info[chain]:
|
|
396
|
+
n_res = len(info[chain]["nonpolymer"])
|
|
397
|
+
uniq = set(info[chain]["nonpolymer"])
|
|
398
|
+
logger.writeln(" ligands: {} ({} residues)".format(" ".join(uniq), n_res))
|
|
399
|
+
logger.writeln("")
|
|
400
|
+
|
|
401
|
+
for cinfo in topo.chain_infos:
|
|
402
|
+
for rinfo in cinfo.res_infos:
|
|
403
|
+
cc_org = monlib.monomers[rinfo.res.name] if rinfo.res.name in monlib.monomers else None
|
|
404
|
+
for ia in reversed(range(len(rinfo.res))):
|
|
405
|
+
atom = rinfo.res[ia]
|
|
406
|
+
atom_str = "{}/{} {}/{}".format(cinfo.chain_ref.name, rinfo.res.name, rinfo.res.seqid, atom.name)
|
|
407
|
+
cc = rinfo.get_final_chemcomp(atom.altloc)
|
|
408
|
+
cc_atom = cc.find_atom(atom.name)
|
|
409
|
+
if cc_atom:
|
|
410
|
+
if cc_atom.chem_type not in monlib.ener_lib.atoms:
|
|
411
|
+
deftype = atom.element.name.upper()
|
|
412
|
+
logger.writeln(f"WARNING: unknown chemical type {cc_atom.chem_type} of {atom_str}. Will use default type {deftype}")
|
|
413
|
+
cc_atom.chem_type = deftype
|
|
414
|
+
else:
|
|
415
|
+
# warning message should have already been given by gemmi
|
|
416
|
+
if cc_org and cc_org.find_atom(atom.name):
|
|
417
|
+
if check_hydrogen or not atom.is_hydrogen():
|
|
418
|
+
link_related.add(rinfo.res.name)
|
|
419
|
+
else:
|
|
420
|
+
if check_hydrogen or not atom.is_hydrogen():
|
|
421
|
+
unknown_cc.add(rinfo.res.name)
|
|
422
|
+
|
|
423
|
+
if atom.is_hydrogen() and atom.calc_flag == gemmi.CalcFlag.Dummy:
|
|
424
|
+
logger.writeln(" Warning: hydrogen {} could not be added - Check dictionary".format(atom_str))
|
|
425
|
+
unknown_cc.add(rinfo.res.name)
|
|
426
|
+
elif any(numpy.isnan(atom.pos.tolist())): # TODO add NaN test before prepare_toplogy
|
|
427
|
+
logger.writeln(" Warning: {} position NaN!".format(atom_str))
|
|
428
|
+
nan_hydr.add(rinfo.res.name)
|
|
429
|
+
|
|
430
|
+
if raise_error and (unknown_cc or link_related):
|
|
431
|
+
msgs = []
|
|
432
|
+
if unknown_cc: msgs.append("restraint cif file(s) for {}".format(",".join(unknown_cc)))
|
|
433
|
+
if link_related: msgs.append("proper link cif file(s) for {} or check your model".format(",".join(link_related)))
|
|
434
|
+
raise RuntimeError("Provide {}".format(" and ".join(msgs)))
|
|
435
|
+
if raise_error and nan_hydr:
|
|
436
|
+
raise RuntimeError("Some hydrogen positions became NaN. The geometry of your model may be of low quality. Consider not adding hydrogen")
|
|
437
|
+
if not use_cispeps:
|
|
438
|
+
topo.set_cispeps_in_structure(st)
|
|
439
|
+
if add_metal_restraints:
|
|
440
|
+
for i, con in sorted(con_bak):
|
|
441
|
+
st.connections.insert(i, con)
|
|
442
|
+
return topo, keywords
|
|
443
|
+
# prepare_topology()
|
|
444
|
+
|
|
445
|
+
def dump_topology(topo, st):
|
|
446
|
+
lookup = {x.atom: x for x in st[0].all()}
|
|
447
|
+
def get_details(rule):
|
|
448
|
+
lab, tt = {gemmi.RKind.Bond: ("bond", topo.bonds),
|
|
449
|
+
gemmi.RKind.Angle: ("angle", topo.angles),
|
|
450
|
+
gemmi.RKind.Torsion: ("torsion", topo.torsions),
|
|
451
|
+
gemmi.RKind.Chirality: ("chirality", topo.chirs),
|
|
452
|
+
gemmi.RKind.Plane: ("plane", topo.planes),
|
|
453
|
+
}[rule.rkind]
|
|
454
|
+
t = tt[rule.index]
|
|
455
|
+
ret = {}
|
|
456
|
+
ret["kind"] = lab
|
|
457
|
+
ret["atoms"] = [str(lookup[x]) for x in t.atoms]
|
|
458
|
+
if rule.rkind == gemmi.RKind.Chirality:
|
|
459
|
+
ret["ideal"] = {gemmi.ChiralityType.Both: "both",
|
|
460
|
+
gemmi.ChiralityType.Negative: "negative",
|
|
461
|
+
gemmi.ChiralityType.Positive: "positive"}[t.restr.sign]
|
|
462
|
+
elif rule.rkind == gemmi.RKind.Plane:
|
|
463
|
+
ret["esd"] = t.restr.esd
|
|
464
|
+
else:
|
|
465
|
+
ret["ideal"] = t.restr.value
|
|
466
|
+
ret["esd"] = t.restr.esd
|
|
467
|
+
if rule.rkind in (gemmi.RKind.Torsion, gemmi.RKind.Plane):
|
|
468
|
+
ret["label"] = t.restr.label
|
|
469
|
+
if rule.rkind in (gemmi.RKind.Angle, gemmi.RKind.Torsion):
|
|
470
|
+
ret["model"] = numpy.rad2deg(t.calculate())
|
|
471
|
+
elif rule.rkind == gemmi.RKind.Plane:
|
|
472
|
+
coef = gemmi.find_best_plane(t.atoms)
|
|
473
|
+
ret["model"] = [gemmi.get_distance_from_plane(x.pos, coef) for x in t.atoms]
|
|
474
|
+
else:
|
|
475
|
+
ret["model"] = t.calculate()
|
|
476
|
+
return ret
|
|
477
|
+
|
|
478
|
+
ret = []
|
|
479
|
+
for cinfo in topo.chain_infos:
|
|
480
|
+
for ri in cinfo.res_infos:
|
|
481
|
+
for prev in ri.prev:
|
|
482
|
+
for rule in prev.link_rules:
|
|
483
|
+
ret.append({"link_id": prev.link_id} | get_details(rule))
|
|
484
|
+
for rule in ri.monomer_rules:
|
|
485
|
+
ret.append(get_details(rule))
|
|
486
|
+
|
|
487
|
+
for extra in topo.extras:
|
|
488
|
+
for rule in extra.link_rules:
|
|
489
|
+
ret.append({"link_id": extra.link_id,
|
|
490
|
+
"asu": {gemmi.Asu.Different:"different",
|
|
491
|
+
gemmi.Asu.Any:"any",
|
|
492
|
+
gemmi.Asu.Same:"same"}[extra.asu],
|
|
493
|
+
} | get_details(rule))
|
|
494
|
+
return ret
|
|
495
|
+
# dump_topology()
|
|
496
|
+
|
|
497
|
+
def check_monlib_support_nucleus_distances(monlib, resnames):
|
|
498
|
+
good = True
|
|
499
|
+
nucl_not_found = []
|
|
500
|
+
for resn in resnames:
|
|
501
|
+
if resn not in monlib.monomers:
|
|
502
|
+
logger.error("ERROR: monomer information of {} not loaded".format(resn))
|
|
503
|
+
good = False
|
|
504
|
+
else:
|
|
505
|
+
mon = monlib.monomers[resn]
|
|
506
|
+
no_nuc = False
|
|
507
|
+
for bond in mon.rt.bonds:
|
|
508
|
+
is_h = (mon.get_atom(bond.id1.atom).is_hydrogen(), mon.get_atom(bond.id2.atom).is_hydrogen())
|
|
509
|
+
if any(is_h) and bond.value_nucleus != bond.value_nucleus:
|
|
510
|
+
no_nuc = True
|
|
511
|
+
break
|
|
512
|
+
if no_nuc:
|
|
513
|
+
nucl_not_found.append(resn)
|
|
514
|
+
good = False
|
|
515
|
+
|
|
516
|
+
if nucl_not_found:
|
|
517
|
+
logger.writeln("WARNING: nucleus distance is not found for: {}".format(" ".join(nucl_not_found)))
|
|
518
|
+
logger.writeln(" default scale ({}) is used for nucleus distances.".format(default_proton_scale))
|
|
519
|
+
return good
|
|
520
|
+
# check_monlib_support_nucleus_distances()
|
|
521
|
+
|
|
522
|
+
def remove_duplicated_links(connections):
|
|
523
|
+
# ignore p.res_id.name?
|
|
524
|
+
totuple = lambda p: (p.chain_name, p.res_id.seqid.num, p.res_id.seqid.icode, p.atom_name, p.altloc)
|
|
525
|
+
dic = {}
|
|
526
|
+
for i, con in enumerate(connections):
|
|
527
|
+
dic.setdefault(tuple(sorted([totuple(con.partner1), totuple(con.partner2)])), []).append(i)
|
|
528
|
+
todel = []
|
|
529
|
+
for k in dic:
|
|
530
|
+
if len(dic[k]) > 1:
|
|
531
|
+
ids = set(connections[c].link_id for c in dic[k] if connections[c].link_id.strip())
|
|
532
|
+
if len(ids) > 1:
|
|
533
|
+
logger.writeln(" WARNING: duplicated links are found with different link_id")
|
|
534
|
+
tokeep = dic[k][0]
|
|
535
|
+
if ids:
|
|
536
|
+
for c in dic[k]:
|
|
537
|
+
if connections[c].link_id.strip():
|
|
538
|
+
tokeep = c
|
|
539
|
+
break
|
|
540
|
+
todel.extend(c for c in dic[k] if c != tokeep)
|
|
541
|
+
|
|
542
|
+
for i in sorted(todel, reverse=True):
|
|
543
|
+
del connections[i]
|
|
544
|
+
if todel:
|
|
545
|
+
logger.writeln(" {} duplicated links were removed.".format(len(todel)))
|
|
546
|
+
# remove_duplicated_links()
|
|
547
|
+
|
|
548
|
+
def find_and_fix_links(st, monlib, bond_margin=1.3, find_metal_links=True, add_found=True, find_symmetry_related=True,
|
|
549
|
+
metal_margin=1.1, add_only_from=None):
|
|
550
|
+
metalc = MetalCoordination(monlib)
|
|
551
|
+
"""
|
|
552
|
+
Identify link ids for st.connections and find new links
|
|
553
|
+
This is required for correctly recognizing link in gemmi.prepare_topology
|
|
554
|
+
Note that it ignores segment IDs
|
|
555
|
+
FIXME it assumes only one bond exists in a link. It may not be the case in future.
|
|
556
|
+
"""
|
|
557
|
+
from servalcat.utils import model
|
|
558
|
+
|
|
559
|
+
logger.writeln("Checking links defined in the model")
|
|
560
|
+
remove_duplicated_links(st.connections)
|
|
561
|
+
for con in st.connections:
|
|
562
|
+
if con.type == gemmi.ConnectionType.Hydrog: continue
|
|
563
|
+
if con.link_id == "gap": continue # TODO check residues?
|
|
564
|
+
cra1, cra2 = st[0].find_cra(con.partner1, ignore_segment=True), st[0].find_cra(con.partner2, ignore_segment=True)
|
|
565
|
+
if None in (cra1.atom, cra2.atom):
|
|
566
|
+
logger.writeln(" WARNING: atom(s) not found for link: id= {} atom1= {} atom2= {}".format(con.link_id, con.partner1, con.partner2))
|
|
567
|
+
continue
|
|
568
|
+
if cra1.atom.element.is_metal or cra2.atom.element.is_metal:
|
|
569
|
+
con.type = gemmi.ConnectionType.MetalC
|
|
570
|
+
if con.asu != gemmi.Asu.Same: # XXX info from metadata may be wrong
|
|
571
|
+
im = st.cell.find_nearest_image(cra1.atom.pos, cra2.atom.pos, con.asu)
|
|
572
|
+
image_idx = im.sym_idx
|
|
573
|
+
con.asu = gemmi.Asu.Same if im.same_asu() else gemmi.Asu.Different
|
|
574
|
+
dist = im.dist()
|
|
575
|
+
else:
|
|
576
|
+
image_idx = 0
|
|
577
|
+
con.asu = gemmi.Asu.Same
|
|
578
|
+
dist = cra1.atom.pos.dist(cra2.atom.pos)
|
|
579
|
+
con.reported_distance = dist
|
|
580
|
+
atoms_str = "atom1= {} atom2= {} image= {}".format(cra1, cra2, image_idx)
|
|
581
|
+
if con.link_id:
|
|
582
|
+
link = monlib.get_link(con.link_id)
|
|
583
|
+
inv = False
|
|
584
|
+
if link is None:
|
|
585
|
+
logger.writeln(" WARNING: link {} not found in the library. Please provide link dictionary.".format(con.link_id))
|
|
586
|
+
con.link_id = "" # let gemmi find proper link in prepare_topology()
|
|
587
|
+
continue
|
|
588
|
+
else:
|
|
589
|
+
match, _, _ = monlib.test_link(link, cra1.residue.name, cra1.atom.name, cra2.residue.name, cra2.atom.name)
|
|
590
|
+
if not match and monlib.test_link(link, cra2.residue.name, cra2.atom.name, cra1.residue.name, cra1.atom.name)[0]:
|
|
591
|
+
match = True
|
|
592
|
+
inv = True
|
|
593
|
+
if not match:
|
|
594
|
+
logger.writeln(" WARNING: link id and atoms mismatch: id= {} {}".format(link.id, atoms_str))
|
|
595
|
+
continue
|
|
596
|
+
else:
|
|
597
|
+
link, inv, _, _ = monlib.match_link(cra1.residue, cra1.atom.name, cra1.atom.altloc,
|
|
598
|
+
cra2.residue, cra2.atom.name, cra2.atom.altloc)
|
|
599
|
+
if link:
|
|
600
|
+
con.link_id = link.id
|
|
601
|
+
elif con.type == gemmi.ConnectionType.MetalC:
|
|
602
|
+
logger.writeln(" Metal link will be added: {} dist= {:.2f}".format(atoms_str, dist))
|
|
603
|
+
if cra2.atom.element.is_metal:
|
|
604
|
+
inv = True # make metal first
|
|
605
|
+
else:
|
|
606
|
+
ideal_dist = monlib.find_ideal_distance(cra1, cra2)
|
|
607
|
+
logger.writeln(" Link unidentified (simple bond will be used): {} dist= {:.2f} ideal= {:.2f}".format(atoms_str,
|
|
608
|
+
dist,
|
|
609
|
+
ideal_dist))
|
|
610
|
+
continue
|
|
611
|
+
if link:
|
|
612
|
+
logger.writeln(" Link confirmed: id= {} {} dist= {:.2f} ideal= {:.2f}".format(link.id,
|
|
613
|
+
atoms_str,
|
|
614
|
+
dist,
|
|
615
|
+
link.rt.bonds[0].value))
|
|
616
|
+
if con.link_id == "disulf":
|
|
617
|
+
con.type = gemmi.ConnectionType.Disulf
|
|
618
|
+
if inv:
|
|
619
|
+
con.partner1 = model.cra_to_atomaddress(cra2)
|
|
620
|
+
con.partner2 = model.cra_to_atomaddress(cra1)
|
|
621
|
+
if len(st.connections) == 0:
|
|
622
|
+
logger.writeln(" no links defined in the model")
|
|
623
|
+
|
|
624
|
+
logger.writeln("Finding new links (will be added if marked by *)")
|
|
625
|
+
ns = gemmi.NeighborSearch(st[0], st.cell, 5.).populate()
|
|
626
|
+
cs = gemmi.ContactSearch(4.)
|
|
627
|
+
cs.ignore = gemmi.ContactSearch.Ignore.SameResidue
|
|
628
|
+
results = cs.find_contacts(ns)
|
|
629
|
+
onsb = set(gemmi.Element(x) for x in "ONSB")
|
|
630
|
+
n_found = 0
|
|
631
|
+
|
|
632
|
+
# st.find_connection_by_cra is quite slow (spent ~12 sec for 7k00, 6301 connections)
|
|
633
|
+
# now it's ~6 times faster
|
|
634
|
+
connections = {tuple((p.chain_name, p.res_id.seqid, p.res_id.name, p.atom_name, p.altloc) for p in (c.partner1, c.partner2))
|
|
635
|
+
for c in st.connections if c.type != gemmi.ConnectionType.Hydrog}
|
|
636
|
+
def find_connection(cra1, cra2):
|
|
637
|
+
key = lambda cra: (cra.chain.name, cra.residue.seqid, cra.residue.name, cra.atom.name, cra.atom.altloc)
|
|
638
|
+
return (key(cra1), key(cra2)) in connections or (key(cra2), key(cra1)) in connections
|
|
639
|
+
|
|
640
|
+
for r in results:
|
|
641
|
+
# skip adjacent residues in a polymer entity
|
|
642
|
+
if (r.partner1.chain == r.partner2.chain and
|
|
643
|
+
r.partner1.residue.entity_type == r.partner2.residue.entity_type == gemmi.EntityType.Polymer and
|
|
644
|
+
r.partner1.residue.entity_id == r.partner2.residue.entity_id):
|
|
645
|
+
if r.partner1.chain.next_residue(r.partner1.residue) == r.partner2.residue:
|
|
646
|
+
atom1, atom2 = r.partner1.atom.name, r.partner2.atom.name
|
|
647
|
+
elif r.partner1.chain.next_residue(r.partner2.residue) == r.partner1.residue:
|
|
648
|
+
atom1, atom2 = r.partner2.atom.name, r.partner1.atom.name
|
|
649
|
+
else:
|
|
650
|
+
atom1, atom2 = None, None
|
|
651
|
+
if atom1 is not None:
|
|
652
|
+
ent = st.get_entity(r.partner1.residue.entity_id)
|
|
653
|
+
if (ent.polymer_type in (gemmi.PolymerType.PeptideL, gemmi.PolymerType.PeptideD) and
|
|
654
|
+
atom1 == "C" and atom2 == "N"):
|
|
655
|
+
continue
|
|
656
|
+
if (ent.polymer_type in (gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid) and
|
|
657
|
+
atom1 == "O3'" and atom2 == "P"):
|
|
658
|
+
continue
|
|
659
|
+
if find_connection(r.partner1, r.partner2): continue
|
|
660
|
+
link, inv, _, _ = monlib.match_link(r.partner1.residue, r.partner1.atom.name, r.partner1.atom.altloc,
|
|
661
|
+
r.partner2.residue, r.partner2.atom.name, r.partner2.atom.altloc,
|
|
662
|
+
(r.dist / 1.4)**2)
|
|
663
|
+
if link is None and r.partner2.atom.element.is_metal:
|
|
664
|
+
inv = True # make metal first
|
|
665
|
+
if inv:
|
|
666
|
+
cra1, cra2 = r.partner2, r.partner1
|
|
667
|
+
else:
|
|
668
|
+
cra1, cra2 = r.partner1, r.partner2
|
|
669
|
+
im = st.cell.find_nearest_pbc_image(cra1.atom.pos, cra2.atom.pos, r.image_idx)
|
|
670
|
+
#assert r.image_idx == im.sym_idx # should we check this?
|
|
671
|
+
if not find_symmetry_related and not im.same_asu():
|
|
672
|
+
continue
|
|
673
|
+
atoms_str = "atom1= {} atom2= {} image= {}".format(cra1, cra2, r.image_idx)
|
|
674
|
+
if im.pbc_shift != (0,0,0):
|
|
675
|
+
atoms_str += " ({},{},{})".format(*im.pbc_shift)
|
|
676
|
+
if link:
|
|
677
|
+
if r.dist > link.rt.bonds[0].value * bond_margin: continue
|
|
678
|
+
will_be_added = add_found and (not add_only_from or link.id in add_only_from)
|
|
679
|
+
logger.writeln(" {}New link found: id= {} {} dist= {:.2f} ideal= {:.2f}".format("*" if will_be_added else " ",
|
|
680
|
+
link.id,
|
|
681
|
+
atoms_str,
|
|
682
|
+
r.dist,
|
|
683
|
+
link.rt.bonds[0].value))
|
|
684
|
+
elif find_metal_links:
|
|
685
|
+
# link only metal - O/N/S/B
|
|
686
|
+
if r.partner1.atom.element.is_metal == r.partner2.atom.element.is_metal: continue
|
|
687
|
+
if not cra2.atom.element in onsb: continue
|
|
688
|
+
max_ideal = metalc.find_max_dist(cra1, cra2)
|
|
689
|
+
if r.dist > max_ideal * metal_margin: continue # tolerance should be smaller than that for other links
|
|
690
|
+
will_be_added = add_found
|
|
691
|
+
logger.writeln(" {}Metal link found: {} dist= {:.2f} max_ideal= {:.2f}".format("*" if will_be_added else " ",
|
|
692
|
+
atoms_str,
|
|
693
|
+
r.dist, max_ideal))
|
|
694
|
+
else:
|
|
695
|
+
continue
|
|
696
|
+
n_found += 1
|
|
697
|
+
if not will_be_added: continue
|
|
698
|
+
con = gemmi.Connection()
|
|
699
|
+
con.name = "added{}".format(n_found)
|
|
700
|
+
if link:
|
|
701
|
+
con.link_id = link.id
|
|
702
|
+
con.type = gemmi.ConnectionType.Disulf if link.id == "disulf" else gemmi.ConnectionType.Covale
|
|
703
|
+
if cra1.atom.element.is_metal or cra2.atom.element.is_metal:
|
|
704
|
+
con.type = gemmi.ConnectionType.MetalC
|
|
705
|
+
con.asu = gemmi.Asu.Same if im.same_asu() else gemmi.Asu.Different
|
|
706
|
+
con.partner1 = model.cra_to_atomaddress(cra1)
|
|
707
|
+
con.partner2 = model.cra_to_atomaddress(cra2)
|
|
708
|
+
con.reported_distance = r.dist
|
|
709
|
+
st.connections.append(con)
|
|
710
|
+
if n_found == 0:
|
|
711
|
+
logger.writeln(" no links found")
|
|
712
|
+
# find_and_fix_links()
|
|
713
|
+
|
|
714
|
+
def add_hydrogens(st, monlib, pos="elec"):
|
|
715
|
+
assert pos in ("elec", "nucl")
|
|
716
|
+
topo = prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.ReAddButWater, ignore_unknown_links=False)
|
|
717
|
+
|
|
718
|
+
if pos == "nucl":
|
|
719
|
+
logger.writeln("Generating hydrogens at nucleus positions")
|
|
720
|
+
resnames = st[0].get_all_residue_names()
|
|
721
|
+
check_monlib_support_nucleus_distances(monlib, resnames)
|
|
722
|
+
topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus, default_scale=default_proton_scale)
|
|
723
|
+
else:
|
|
724
|
+
logger.writeln("Generating hydrogens at electron positions")
|
|
725
|
+
# add_hydrogens()
|
|
726
|
+
|
|
727
|
+
def make_atom_spec(cra):
|
|
728
|
+
chain = cra.chain.name
|
|
729
|
+
resi = cra.residue.seqid.num
|
|
730
|
+
ins = cra.residue.seqid.icode
|
|
731
|
+
atom = cra.atom.name
|
|
732
|
+
s = "chain {} resi {} ins {} atom {}".format(chain, resi, ins if ins.strip() else ".", atom)
|
|
733
|
+
if cra.atom.altloc != "\0":
|
|
734
|
+
s += " alt {}".format(cra.atom.altloc)
|
|
735
|
+
return s
|
|
736
|
+
# make_atom_spec()
|
|
737
|
+
|
|
738
|
+
def dictionary_block_names(monlib, topo):
|
|
739
|
+
used = {x.lower() for x in monlib.monomers}
|
|
740
|
+
for chain_info in topo.chain_infos:
|
|
741
|
+
for res_info in chain_info.res_infos:
|
|
742
|
+
for link in res_info.prev:
|
|
743
|
+
# won't be included if the name starts with "auto-", but don't do such checks here
|
|
744
|
+
used.add("link_" + link.link_id.lower())
|
|
745
|
+
for mod in res_info.mods:
|
|
746
|
+
used.add("mod_" + mod.id.lower())
|
|
747
|
+
for extra in topo.extras:
|
|
748
|
+
used.add("link_" + extra.link_id.lower())
|
|
749
|
+
return used
|
|
750
|
+
# dictionary_block_names()
|
|
751
|
+
|
|
752
|
+
def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
|
|
753
|
+
logger.writeln("Finding NCS..")
|
|
754
|
+
polymers = {}
|
|
755
|
+
for chain in st[0]:
|
|
756
|
+
rs = chain.get_polymer()
|
|
757
|
+
p_type = rs.check_polymer_type()
|
|
758
|
+
if p_type in (gemmi.PolymerType.PeptideL, gemmi.PolymerType.PeptideD,
|
|
759
|
+
gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid):
|
|
760
|
+
polymers.setdefault(p_type, []).append((chain, rs))
|
|
761
|
+
|
|
762
|
+
scoring = gemmi.AlignmentScoring("p") # AlignmentScoring::partial_model
|
|
763
|
+
al_res = []
|
|
764
|
+
ncslist = ext.NcsList()
|
|
765
|
+
for pt in polymers:
|
|
766
|
+
#print(pt, [x[0].name for x in polymers[pt]])
|
|
767
|
+
pols = polymers[pt]
|
|
768
|
+
for i in range(len(pols)-1):
|
|
769
|
+
q = [x.name for x in pols[i][1]]
|
|
770
|
+
for j in range(i+1, len(pols)):
|
|
771
|
+
al = gemmi.align_sequence_to_polymer(q, pols[j][1], pt, scoring)
|
|
772
|
+
if al.match_count < min_nalign: continue
|
|
773
|
+
su = gemmi.calculate_superposition(pols[i][1], pols[j][1], pt, gemmi.SupSelect.All)
|
|
774
|
+
obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1], pols[i][0].name, pols[j][0].name)
|
|
775
|
+
obj.calculate_local_rms(rms_loc_nlen)
|
|
776
|
+
if len(obj.local_rms) == 0 or numpy.all(numpy.isnan(obj.local_rms)):
|
|
777
|
+
continue
|
|
778
|
+
ave_local_rms = numpy.nanmean(obj.local_rms)
|
|
779
|
+
if ave_local_rms > max_rms_loc: continue
|
|
780
|
+
ncslist.ncss.append(obj)
|
|
781
|
+
al_res.append({"chain_1": "{} ({}..{})".format(obj.chains[0], obj.seqids[0][0], obj.seqids[-1][0]),
|
|
782
|
+
"chain_2": "{} ({}..{})".format(obj.chains[1], obj.seqids[0][1], obj.seqids[-1][1]),
|
|
783
|
+
"aligned": al.match_count,
|
|
784
|
+
"identity": al.calculate_identity(1),
|
|
785
|
+
"rms": su.rmsd,
|
|
786
|
+
"ave(rmsloc)": ave_local_rms,
|
|
787
|
+
})
|
|
788
|
+
if al_res[-1]["identity"] < 100:
|
|
789
|
+
wrap_width = 100
|
|
790
|
+
logger.writeln(f"seq1: {pols[i][0].name} {pols[i][1][0].seqid}..{pols[i][1][-1].seqid}")
|
|
791
|
+
logger.writeln(f"seq2: {pols[j][0].name} {pols[j][1][0].seqid}..{pols[j][1][-1].seqid}")
|
|
792
|
+
logger.writeln(f"match_count: {al.match_count} (identity: {al_res[-1]['identity']:.2f})")
|
|
793
|
+
s1 = gemmi.one_letter_code(q)
|
|
794
|
+
p_seq = gemmi.one_letter_code(pols[j][1].extract_sequence())
|
|
795
|
+
p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
|
|
796
|
+
for k in range(0, len(p1), wrap_width):
|
|
797
|
+
logger.writeln(" seq1 {}".format(p1[k:k+wrap_width]))
|
|
798
|
+
logger.writeln(" {}".format(al.match_string[k:k+wrap_width]))
|
|
799
|
+
logger.writeln(" seq2 {}\n".format(p2[k:k+wrap_width]))
|
|
800
|
+
|
|
801
|
+
ncslist.set_pairs()
|
|
802
|
+
df = pandas.DataFrame(al_res)
|
|
803
|
+
df.index += 1
|
|
804
|
+
logger.writeln(df.to_string(float_format="%.2f"))
|
|
805
|
+
return ncslist
|
|
806
|
+
# prepare_ncs_restraints()
|
|
807
|
+
|
|
808
|
+
class MetalCoordination:
|
|
809
|
+
def __init__(self, monlib, dbfile=None):
|
|
810
|
+
self.monlib = monlib
|
|
811
|
+
if dbfile is None:
|
|
812
|
+
dbfile = os.path.join(monlib.path(), "metals.json")
|
|
813
|
+
if os.path.exists(dbfile):
|
|
814
|
+
with open(dbfile) as f:
|
|
815
|
+
self.metals = json.load(f)["metal_coordination"]
|
|
816
|
+
else:
|
|
817
|
+
self.metals = {}
|
|
818
|
+
logger.writeln("WARNING: {} not found".format(dbfile))
|
|
819
|
+
# __init__()
|
|
820
|
+
|
|
821
|
+
def find_max_dist(self, cra_metal, cra_ligand):
|
|
822
|
+
vals = self.find_ideal_distances(cra_metal.atom.element, cra_ligand.atom.element)
|
|
823
|
+
if len(vals) == 0:
|
|
824
|
+
# if not found
|
|
825
|
+
return self.monlib.find_ideal_distance(cra_metal, cra_ligand)
|
|
826
|
+
return max(x["median"] for x in vals)
|
|
827
|
+
# find_max_dist()
|
|
828
|
+
|
|
829
|
+
def find_ideal_distances(self, el_metal, el_ligand):
|
|
830
|
+
ideals = {}
|
|
831
|
+
if el_metal.name not in self.metals or el_ligand.name not in self.metals[el_metal.name]:
|
|
832
|
+
return []
|
|
833
|
+
return self.metals[el_metal.name][el_ligand.name]
|
|
834
|
+
# find_ideal_distances
|
|
835
|
+
|
|
836
|
+
def setup_restraints(self, st):
|
|
837
|
+
ret = [] # returns Refmac keywords
|
|
838
|
+
lookup = {x.atom: x for x in st[0].all()}
|
|
839
|
+
coords = {}
|
|
840
|
+
todel = []
|
|
841
|
+
for i, con in enumerate(st.connections):
|
|
842
|
+
if con.link_id == "" and con.type == gemmi.ConnectionType.MetalC:
|
|
843
|
+
cra1 = st[0].find_cra(con.partner1, ignore_segment=True)
|
|
844
|
+
cra2 = st[0].find_cra(con.partner2, ignore_segment=True)
|
|
845
|
+
if None in (cra1.atom, cra2.atom): continue
|
|
846
|
+
ener_ideal = self.monlib.find_ideal_distance(cra1, cra2)
|
|
847
|
+
coords.setdefault(cra1.atom.element, {}).setdefault(cra1.atom, []).append((cra2.atom, i, ener_ideal))
|
|
848
|
+
if coords:
|
|
849
|
+
logger.writeln("Metal coordinations detected")
|
|
850
|
+
for metal in coords:
|
|
851
|
+
logger.writeln(" Metal: {}".format(metal.name))
|
|
852
|
+
ligand_els = {x[0].element for m in coords[metal] for x in coords[metal][m]}
|
|
853
|
+
logger.writeln(" ideal distances")
|
|
854
|
+
ideals = {}
|
|
855
|
+
for el in ligand_els:
|
|
856
|
+
logger.write(" {}: ".format(el.name))
|
|
857
|
+
vals = self.find_ideal_distances(metal, el)
|
|
858
|
+
if len(vals) == 0:
|
|
859
|
+
ener_ideals = {x[2] for m in coords[metal] for x in coords[metal][m] if x[0].element == el}
|
|
860
|
+
logger.write(" ".join("{:.2f}".format(x) for x in ener_ideals))
|
|
861
|
+
logger.writeln(" (from ener_lib)")
|
|
862
|
+
else:
|
|
863
|
+
logger.writeln(" ".join("{:.4f} ({} coord)".format(x["median"], x["coord"]) for x in vals))
|
|
864
|
+
ideals[el] = [(x["median"], max(0.02, x["mad"]*1.5)) for x in vals if x["mad"] > 0]
|
|
865
|
+
logger.writeln("")
|
|
866
|
+
for i, am in enumerate(coords[metal]):
|
|
867
|
+
logger.writeln(" site {}: {}".format(i+1, lookup[am]))
|
|
868
|
+
for j, (lig, con_idx, _) in enumerate(coords[metal][am]):
|
|
869
|
+
con = st.connections[con_idx]
|
|
870
|
+
logger.writeln(" ligand {}: {} dist= {:.2f}".format(j+1, lookup[lig],
|
|
871
|
+
con.reported_distance))
|
|
872
|
+
specs = [make_atom_spec(x) for x in (lookup[am], lookup[lig])]
|
|
873
|
+
if lig.element not in ideals:
|
|
874
|
+
continue
|
|
875
|
+
todel.append(con_idx)
|
|
876
|
+
for k, (ideal, sigma) in enumerate(ideals[lig.element]):
|
|
877
|
+
exte_str = "exte dist first {} seco {} ".format(*specs)
|
|
878
|
+
exte_str += "valu {:.4f} sigm {:.4f} type 1 ".format(ideal, sigma)
|
|
879
|
+
if con.asu == gemmi.Asu.Different:
|
|
880
|
+
exte_str += "symm y"
|
|
881
|
+
ret.append(exte_str)
|
|
882
|
+
#b = ext.Geometry.Bond(am, lig)
|
|
883
|
+
#b.values.append(ext.Geometry.Bond.Value(ideal, sigma, ideal, sigma))
|
|
884
|
+
#b.type = 0 if k == 0 else 1
|
|
885
|
+
#ret.append(b)
|
|
886
|
+
logger.writeln("")
|
|
887
|
+
return ret, list(set(todel))
|
|
888
|
+
# setup_restraints()
|