servalcat 0.4.99__cp38-cp38-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-38-darwin.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +906 -0
  7. servalcat/refine/refine_geom.py +233 -0
  8. servalcat/refine/refine_spa.py +366 -0
  9. servalcat/refine/refine_xtal.py +281 -0
  10. servalcat/refine/spa.py +144 -0
  11. servalcat/refine/xtal.py +276 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +639 -0
  15. servalcat/refmac/refmac_wrapper.py +395 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +479 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +977 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1547 -0
  27. servalcat/utils/fileio.py +744 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +714 -0
  30. servalcat/utils/logger.py +140 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +782 -0
  33. servalcat/utils/refmac.py +760 -0
  34. servalcat/utils/restraints.py +781 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +258 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1644 -0
  40. servalcat/xtal/twin.py +121 -0
  41. servalcat-0.4.99.dist-info/METADATA +55 -0
  42. servalcat-0.4.99.dist-info/RECORD +45 -0
  43. servalcat-0.4.99.dist-info/WHEEL +5 -0
  44. servalcat-0.4.99.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.99.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,781 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ from servalcat.utils import logger
10
+ from servalcat.refmac import refmac_keywords
11
+ from servalcat import ext
12
+ import os
13
+ import gemmi
14
+ import string
15
+ import random
16
+ import numpy
17
+ import pandas
18
+ import json
19
+ import fnmatch
20
+
21
+ default_proton_scale = 1.13 # scale of X-proton distance to X-H(e) distance
22
+
23
+ def decide_new_mod_id(mod_id, mods):
24
+ # Refmac only allows up to 8 characters
25
+ letters = string.digits + string.ascii_lowercase
26
+ if len(mod_id) < 8:
27
+ for l in letters:
28
+ new_id = "{}{}{}".format(mod_id, "" if len(mod_id)==7 else "-", l)
29
+ if new_id not in mods:
30
+ return new_id
31
+
32
+ # give up keeping original name
33
+ while True: # XXX risk of infinite loop.. less likely though
34
+ new_id = "mod" + "".join([random.choice(letters) for _ in range(4)])
35
+ if new_id not in mods:
36
+ return new_id
37
+ # decide_new_mod_id()
38
+
39
+ def rename_cif_modification_if_necessary(doc, known_ids):
40
+ # FIXME Problematic if other file refers to modification that is renamed in this function - but how can we know?
41
+ trans = {}
42
+ for b in doc:
43
+ for row in b.find("_chem_mod.", ["id"]):
44
+ mod_id = row.str(0)
45
+ if mod_id in known_ids:
46
+ new_id = decide_new_mod_id(mod_id, known_ids)
47
+ trans[mod_id] = new_id
48
+ row[0] = new_id # modify id
49
+ logger.writeln("INFO:: renaming modification id {} to {}".format(mod_id, new_id))
50
+
51
+ # modify ids in mod_* blocks
52
+ for mod_id in trans:
53
+ b = doc.find_block("mod_{}".format(mod_id))
54
+ if not b: # should raise error?
55
+ logger.writeln("WARNING:: inconsistent mod description for {}".format(mod_id))
56
+ continue
57
+ b.name = "mod_{}".format(trans[mod_id]) # modify name
58
+ for item in b:
59
+ for tag in item.loop.tags:
60
+ if tag.endswith(".mod_id"):
61
+ for row in b.find(tag[:tag.rindex(".")+1], ["mod_id"]):
62
+ row[0] = trans[mod_id]
63
+
64
+ # Update mod id in links
65
+ if trans:
66
+ for b in doc:
67
+ for row in b.find("_chem_link.", ["mod_id_1", "mod_id_2"]):
68
+ for i in range(2):
69
+ if row.str(i) in trans:
70
+ row[i] = trans[row.str(i)]
71
+
72
+ return trans
73
+ # rename_cif_modification_if_necessary()
74
+
75
+ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns=False,
76
+ ignore_monomer_dir=False, update_old_atom_names=True,
77
+ params=None):
78
+ resnames = st[0].get_all_residue_names()
79
+
80
+ if monomer_dir is None and not ignore_monomer_dir:
81
+ if "CLIBD_MON" not in os.environ:
82
+ logger.error("WARNING: CLIBD_MON is not set")
83
+ else:
84
+ monomer_dir = os.environ["CLIBD_MON"]
85
+
86
+ if cif_files is None:
87
+ cif_files = []
88
+
89
+ monlib = gemmi.MonLib()
90
+ if monomer_dir and not ignore_monomer_dir:
91
+ if not os.path.isdir(monomer_dir):
92
+ raise RuntimeError("not a directory: {}".format(monomer_dir))
93
+
94
+ logger.writeln("Reading monomers from {}".format(monomer_dir))
95
+ monlib.read_monomer_lib(monomer_dir, resnames, logger)
96
+
97
+ for f in cif_files:
98
+ logger.writeln("Reading monomer: {}".format(f))
99
+ doc = gemmi.cif.read(f)
100
+ for b in doc:
101
+ atom_id_list = b.find_values("_chem_comp_atom.atom_id")
102
+ if atom_id_list:
103
+ name = b.name.replace("comp_", "")
104
+ if name in monlib.monomers:
105
+ logger.writeln("WARNING:: updating monomer {} using {}".format(name, f))
106
+ del monlib.monomers[name]
107
+
108
+ # Check if bond length values are included
109
+ # This is to fail if cif file is e.g. from PDB website
110
+ if b.find_values("_chem_comp_bond.comp_id") and not b.find_values("_chem_comp_bond.value_dist"):
111
+ raise RuntimeError(f"Bond length information for {name} is missing from {f}. Please generate restraints using a tool like acedrg.")
112
+
113
+ for row in b.find("_chem_link.", ["id"]):
114
+ link_id = row.str(0)
115
+ if link_id in monlib.links:
116
+ logger.writeln("WARNING:: updating link {} using {}".format(link_id, f))
117
+ del monlib.links[link_id]
118
+
119
+ # If modification id is duplicated, need to rename
120
+ rename_cif_modification_if_necessary(doc, monlib.modifications)
121
+ monlib.read_monomer_doc(doc)
122
+ for b in doc:
123
+ for row in b.find("_chem_comp.", ["id", "group"]):
124
+ if row.str(0) in monlib.monomers:
125
+ monlib.monomers[row.str(0)].set_group(row.str(1))
126
+
127
+ not_loaded = set(resnames).difference(monlib.monomers)
128
+ if not_loaded:
129
+ logger.writeln("WARNING: monomers not loaded: {}".format(" ".join(not_loaded)))
130
+
131
+ logger.writeln("Monomer library loaded: {} monomers, {} links, {} modifications".format(len(monlib.monomers),
132
+ len(monlib.links),
133
+ len(monlib.modifications)))
134
+ logger.writeln(" loaded monomers: {}".format(" ".join([x for x in monlib.monomers])))
135
+ logger.writeln("")
136
+
137
+ logger.writeln("Checking if unknown atoms exist..")
138
+
139
+ unknown_cc = set()
140
+ for chain in st[0]: unknown_cc.update(res.name for res in chain if res.name not in monlib.monomers)
141
+ if unknown_cc:
142
+ if stop_for_unknowns:
143
+ raise RuntimeError("Provide restraint cif file(s) for {}".format(",".join(unknown_cc)))
144
+ else:
145
+ logger.writeln("WARNING: ad-hoc restraints will be generated for {}".format(",".join(unknown_cc)))
146
+ logger.writeln(" it is strongly recommended to generate them using AceDRG.")
147
+
148
+ if update_old_atom_names:
149
+ monlib.update_old_atom_names(st, logger)
150
+
151
+ if params:
152
+ update_torsions(monlib, params.get("restr", {}).get("torsion_include", {}))
153
+
154
+ return monlib
155
+ # load_monomer_library()
156
+
157
+ def fix_elements_in_model(monlib, st):
158
+ monlib_els = {m: {a.id: a.el for a in monlib.monomers[m].atoms} for m in monlib.monomers}
159
+ lookup = {x.atom: x for x in st[0].all()}
160
+ for chain in st[0]:
161
+ for res in chain:
162
+ d = monlib_els.get(res.name)
163
+ if not d: continue # should not happen
164
+ for at in res:
165
+ if at.name not in d: # for example atom names of element D may be different, which will be sorted later
166
+ continue
167
+ el = d[at.name]
168
+ if at.element != el:
169
+ logger.writeln(f"WARNING: correcting element of {lookup[at]} to {el.name}")
170
+ at.element = el
171
+ # correct_elements_in_model()
172
+
173
+ def update_torsions(monlib, params):
174
+ # take subset
175
+ params = [p for p in params
176
+ if any(x in p for x in ("tors_value", "tors_sigma", "tors_period"))]
177
+ if not params:
178
+ return
179
+ logger.writeln("Updating torsion targets in dictionaries")
180
+ for p in params:
181
+ if "residue" in p:
182
+ tors = [cc.rt.torsions for cc in monlib.monomers.values()
183
+ if fnmatch.fnmatch(cc.name, p["residue"])]
184
+ elif "group" in p:
185
+ g = gemmi.ChemComp.read_group(p["group"])
186
+ # should warn if g is Null
187
+ tors = [cc.rt.torsions for cc in monlib.monomers.values()
188
+ if cc.group == g]
189
+ elif "link" in p:
190
+ tors = [ln.rt.torsions for ln in monlib.links.values()
191
+ if fnmatch.fnmatch(ln.id, p["link"])]
192
+ else:
193
+ tors = []
194
+ if not tors:
195
+ continue
196
+ logger.writeln(f" rule = {p}")
197
+ for tt in tors:
198
+ for t in tt:
199
+ if fnmatch.fnmatch(t.label, p["tors_name"]):
200
+ if "tors_value" in p:
201
+ t.value = p["tors_value"]
202
+ if "tors_sigma" in p:
203
+ t.esd = p["tors_sigma"]
204
+ if "tors_period" in p:
205
+ t.period = p["tors_period"]
206
+ # update_torsions()
207
+
208
+ def make_torsion_rules(restr_params):
209
+ # Defaults
210
+ include_rules = [{"group": "peptide", "tors_name": "chi*"},
211
+ {"link": "*", "tors_name": "omega"},
212
+ {"residue": "*", "tors_name": "sp2_sp2*"},
213
+ {"link": "*", "tors_name": "sp2_sp2*"},
214
+ ]
215
+ exclude_rules = []
216
+
217
+ # Override include/exclude rules
218
+ for i, name in enumerate(("torsion_include", "torsion_exclude")):
219
+ rules = (include_rules, exclude_rules)[i]
220
+ for p in restr_params.get(name, []):
221
+ r = {}
222
+ if p["flag"]:
223
+ for k in "residue", "group", "link":
224
+ if k in p:
225
+ r[k] = p[k]
226
+ if r and "tors_name" in p:
227
+ r["tors_name"] = p["tors_name"]
228
+ rules.append(r)
229
+ else:
230
+ rules.clear()
231
+
232
+ # How to tell about hydrogen?
233
+ logger.writeln("Torsion angle rules:")
234
+ for l, rr in (("include", include_rules), ("exclude", exclude_rules)):
235
+ logger.writeln(f" {l}:")
236
+ if not rr:
237
+ logger.writeln(f" none")
238
+ for r in rr:
239
+ logger.writeln(f" {r}")
240
+
241
+ return include_rules, exclude_rules
242
+ # make_torsion_rules())
243
+
244
+ def select_restrained_torsions(monlib, include_rules, exclude_rules):
245
+ ret = {"monomer": {}, "link": {}}
246
+
247
+ # Collect monomer/link related torsions
248
+ all_tors = {"mon": {}, "link": {}}
249
+ groups = {}
250
+ for mon_id in monlib.monomers:
251
+ mon = monlib.monomers[mon_id]
252
+ groups.setdefault(mon.group, []).append(mon_id)
253
+ all_tors["mon"][mon_id] = [x.label for x in mon.rt.torsions]
254
+ for mod_id in monlib.modifications:
255
+ mod = monlib.modifications[mod_id]
256
+ tors = [x.label for x in mod.rt.torsions if chr(x.id1.comp) in ("a", "c")] # don't need delete
257
+ if not tors: continue
258
+ gr = gemmi.ChemComp.read_group(mod.group_id)
259
+ if mod.comp_id and mod.comp_id in all_tors["mon"]:
260
+ all_tors["mon"][mod.comp_id].extend(tors)
261
+ elif not mod.comp_id and gr in groups:
262
+ for mon_id in groups[gr]:
263
+ all_tors["mon"][mon_id].extend(tors)
264
+ for lnk_id in monlib.links:
265
+ lnk = monlib.links[lnk_id]
266
+ if lnk.rt.torsions:
267
+ all_tors["link"][lnk_id] = [x.label for x in lnk.rt.torsions]
268
+ for k in all_tors:
269
+ for kk in all_tors[k]:
270
+ all_tors[k][kk] = set(all_tors[k][kk])
271
+
272
+ # Apply include/exclude rule
273
+ for mon in all_tors["mon"]:
274
+ match_f = lambda r: ("tors_name" in r and
275
+ ("residue" in r and fnmatch.fnmatch(mon, r["residue"]) or
276
+ mon in groups.get(gemmi.ChemComp.read_group(r.get("group", "")), [])))
277
+ use_tors = []
278
+ for r in include_rules:
279
+ if match_f(r):
280
+ use_tors.extend(x for x in all_tors["mon"][mon] if fnmatch.fnmatch(x, r["tors_name"]))
281
+ for r in exclude_rules:
282
+ if match_f(r):
283
+ use_tors = [x for x in use_tors if not fnmatch.fnmatch(x, r["tors_name"])]
284
+ if use_tors:
285
+ ret["monomer"][mon] = sorted(use_tors)
286
+ for lnk in all_tors["link"]:
287
+ match_f = lambda r: ("tors_name" in r and
288
+ "link" in r and fnmatch.fnmatch(lnk, r["link"]))
289
+ use_tors = []
290
+ for r in include_rules:
291
+ if match_f(r):
292
+ use_tors.extend(x for x in all_tors["link"][lnk] if fnmatch.fnmatch(x, r["tors_name"]))
293
+ for r in exclude_rules:
294
+ if match_f(r):
295
+ use_tors = [x for x in use_tors if not fnmatch.fnmatch(x, r["tors_name"])]
296
+ if use_tors:
297
+ ret["link"][lnk] = sorted(use_tors)
298
+
299
+ return ret
300
+ # select_restrained_torsions()
301
+
302
+ def prepare_topology(st, monlib, h_change, ignore_unknown_links=False, raise_error=True, check_hydrogen=False,
303
+ use_cispeps=False, add_metal_restraints=True, params=None):
304
+ # Check duplicated atoms
305
+ bad = []
306
+ for chain in st[0]:
307
+ bad_res = []
308
+ for res in chain:
309
+ n_uniq = len({(a.name, a.altloc) for a in res})
310
+ if n_uniq != len(res):
311
+ bad_res.append(str(res.seqid))
312
+ if bad_res:
313
+ bad.append(" chain {}: {}".format(chain.name, " ".join(bad_res)))
314
+ if bad:
315
+ raise RuntimeError("Following residues have duplicated atoms. Check your model.\n{}".format("\n".join(bad)))
316
+
317
+ if add_metal_restraints:
318
+ metalc = MetalCoordination(monlib)
319
+ keywords, todel = metalc.setup_restraints(st)
320
+ con_bak = []
321
+ for i in sorted(todel, reverse=True):
322
+ # temporarily remove connection not to put a bond restraint
323
+ con = st.connections.pop(i)
324
+ con_bak.append((i, con))
325
+ # flag non-hydrogen
326
+ cra2 = st[0].find_cra(con.partner2, ignore_segment=True)
327
+ cra2.atom.calc_flag = gemmi.CalcFlag.NoHydrogen
328
+ if params:
329
+ parsed = refmac_keywords.parse_keywords(keywords).get("exte")
330
+ if parsed:
331
+ params["exte"] = params.get("exte", []) + parsed
332
+ else:
333
+ keywords = []
334
+ # these checks can be done after sorting links
335
+ logger.writeln("Creating restraints..")
336
+ with logger.with_prefix(" "):
337
+ topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=logger, reorder=False,
338
+ ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
339
+ unknown_cc = set()
340
+ link_related = set()
341
+ nan_hydr = set()
342
+
343
+ def extra_defined(res1, res2): # TODO should check alt
344
+ for link in topo.extras:
345
+ res12 = (link.res1, link.res2)
346
+ if link.link_id and (res12 == (res1, res2) or res12 == (res2, res1)):
347
+ return True
348
+ return False
349
+
350
+ # collect info
351
+ info = {}
352
+ for cinfo in topo.chain_infos:
353
+ toadd = info.setdefault(cinfo.chain_ref.name, {})
354
+ if cinfo.polymer:
355
+ gaps = []
356
+ for rinfo in cinfo.res_infos:
357
+ if (rinfo.prev and rinfo.prev[0].link_id in ("gap", "") and
358
+ not extra_defined(rinfo.prev[0].res1, rinfo.prev[0].res2)):
359
+ gaps.append((rinfo.prev[0].res1, rinfo.prev[0].res2))
360
+ toadd["polymer"] = (str(cinfo.polymer_type).replace("PolymerType.", ""),
361
+ cinfo.res_infos[0].res.seqid,
362
+ cinfo.res_infos[-1].res.seqid,
363
+ len(cinfo.res_infos), gaps)
364
+ else:
365
+ l = toadd.setdefault("nonpolymer", [])
366
+ for ri in cinfo.res_infos:
367
+ l.append(ri.res.name)
368
+ logger.writeln("\nChain info:")
369
+ for chain in info:
370
+ logger.writeln(" chain {}".format(chain))
371
+ if "polymer" in info[chain]:
372
+ logger.writeln(" {}: {}..{} ({} residues)".format(*info[chain]["polymer"][:-1]))
373
+ for gap in info[chain]["polymer"][-1]:
374
+ logger.writeln(" gap between {} and {}".format(*gap))
375
+ if "nonpolymer" in info[chain]:
376
+ n_res = len(info[chain]["nonpolymer"])
377
+ uniq = set(info[chain]["nonpolymer"])
378
+ logger.writeln(" ligands: {} ({} residues)".format(" ".join(uniq), n_res))
379
+ logger.writeln("")
380
+
381
+ for cinfo in topo.chain_infos:
382
+ for rinfo in cinfo.res_infos:
383
+ cc_org = monlib.monomers[rinfo.res.name] if rinfo.res.name in monlib.monomers else None
384
+ for ia in reversed(range(len(rinfo.res))):
385
+ atom = rinfo.res[ia]
386
+ atom_str = "{}/{} {}/{}".format(cinfo.chain_ref.name, rinfo.res.name, rinfo.res.seqid, atom.name)
387
+ cc = rinfo.get_final_chemcomp(atom.altloc)
388
+ if not cc.find_atom(atom.name):
389
+ # warning message should have already been given by gemmi
390
+ if cc_org and cc_org.find_atom(atom.name):
391
+ if check_hydrogen or not atom.is_hydrogen():
392
+ link_related.add(rinfo.res.name)
393
+ else:
394
+ if check_hydrogen or not atom.is_hydrogen():
395
+ unknown_cc.add(rinfo.res.name)
396
+
397
+ if atom.is_hydrogen() and atom.calc_flag == gemmi.CalcFlag.Dummy:
398
+ logger.writeln(" Warning: hydrogen {} could not be added - Check dictionary".format(atom_str))
399
+ unknown_cc.add(rinfo.res.name)
400
+ elif any(numpy.isnan(atom.pos.tolist())): # TODO add NaN test before prepare_toplogy
401
+ logger.writeln(" Warning: {} position NaN!".format(atom_str))
402
+ nan_hydr.add(rinfo.res.name)
403
+
404
+ if raise_error and (unknown_cc or link_related):
405
+ msgs = []
406
+ if unknown_cc: msgs.append("restraint cif file(s) for {}".format(",".join(unknown_cc)))
407
+ if link_related: msgs.append("proper link cif file(s) for {} or check your model".format(",".join(link_related)))
408
+ raise RuntimeError("Provide {}".format(" and ".join(msgs)))
409
+ if raise_error and nan_hydr:
410
+ raise RuntimeError("Some hydrogen positions became NaN. The geometry of your model may be of low quality. Consider not adding hydrogen")
411
+ if not use_cispeps:
412
+ topo.set_cispeps_in_structure(st)
413
+ if add_metal_restraints:
414
+ for i, con in sorted(con_bak):
415
+ st.connections.insert(i, con)
416
+ return topo, keywords
417
+ # prepare_topology()
418
+
419
+ def check_monlib_support_nucleus_distances(monlib, resnames):
420
+ good = True
421
+ nucl_not_found = []
422
+ for resn in resnames:
423
+ if resn not in monlib.monomers:
424
+ logger.error("ERROR: monomer information of {} not loaded".format(resn))
425
+ good = False
426
+ else:
427
+ mon = monlib.monomers[resn]
428
+ no_nuc = False
429
+ for bond in mon.rt.bonds:
430
+ is_h = (mon.get_atom(bond.id1.atom).is_hydrogen(), mon.get_atom(bond.id2.atom).is_hydrogen())
431
+ if any(is_h) and bond.value_nucleus != bond.value_nucleus:
432
+ no_nuc = True
433
+ break
434
+ if no_nuc:
435
+ nucl_not_found.append(resn)
436
+ good = False
437
+
438
+ if nucl_not_found:
439
+ logger.writeln("WARNING: nucleus distance is not found for: {}".format(" ".join(nucl_not_found)))
440
+ logger.writeln(" default scale ({}) is used for nucleus distances.".format(default_proton_scale))
441
+ return good
442
+ # check_monlib_support_nucleus_distances()
443
+
444
+ def remove_duplicated_links(connections):
445
+ # ignore p.res_id.name?
446
+ totuple = lambda p: (p.chain_name, p.res_id.seqid.num, p.res_id.seqid.icode, p.atom_name, p.altloc)
447
+ dic = {}
448
+ for i, con in enumerate(connections):
449
+ dic.setdefault(tuple(sorted([totuple(con.partner1), totuple(con.partner2)])), []).append(i)
450
+ todel = []
451
+ for k in dic:
452
+ if len(dic[k]) > 1:
453
+ ids = set(connections[c].link_id for c in dic[k] if connections[c].link_id.strip())
454
+ if len(ids) > 1:
455
+ logger.writeln(" WARNING: duplicated links are found with different link_id")
456
+ tokeep = dic[k][0]
457
+ if ids:
458
+ for c in dic[k]:
459
+ if connections[c].link_id.strip():
460
+ tokeep = c
461
+ break
462
+ todel.extend(c for c in dic[k] if c != tokeep)
463
+
464
+ for i in sorted(todel, reverse=True):
465
+ del connections[i]
466
+ if todel:
467
+ logger.writeln(" {} duplicated links were removed.".format(len(todel)))
468
+ # remove_duplicated_links()
469
+
470
+ def find_and_fix_links(st, monlib, bond_margin=1.3, find_metal_links=True, add_found=True, find_symmetry_related=True,
471
+ metal_margin=1.1, add_only_from=None):
472
+ metalc = MetalCoordination(monlib)
473
+ """
474
+ Identify link ids for st.connections and find new links
475
+ This is required for correctly recognizing link in gemmi.prepare_topology
476
+ Note that it ignores segment IDs
477
+ FIXME it assumes only one bond exists in a link. It may not be the case in future.
478
+ """
479
+ from servalcat.utils import model
480
+
481
+ logger.writeln("Checking links defined in the model")
482
+ remove_duplicated_links(st.connections)
483
+ for con in st.connections:
484
+ if con.type == gemmi.ConnectionType.Hydrog: continue
485
+ if con.link_id == "gap": continue # TODO check residues?
486
+ cra1, cra2 = st[0].find_cra(con.partner1, ignore_segment=True), st[0].find_cra(con.partner2, ignore_segment=True)
487
+ if None in (cra1.atom, cra2.atom):
488
+ logger.writeln(" WARNING: atom(s) not found for link: id= {} atom1= {} atom2= {}".format(con.link_id, con.partner1, con.partner2))
489
+ continue
490
+ if cra1.atom.element.is_metal or cra2.atom.element.is_metal:
491
+ con.type = gemmi.ConnectionType.MetalC
492
+ if con.asu != gemmi.Asu.Same: # XXX info from metadata may be wrong
493
+ nimage = st.cell.find_nearest_image(cra1.atom.pos, cra2.atom.pos, con.asu)
494
+ image_idx = nimage.sym_idx
495
+ dist = nimage.dist()
496
+ else:
497
+ image_idx = 0
498
+ con.asu = gemmi.Asu.Same
499
+ dist = cra1.atom.pos.dist(cra2.atom.pos)
500
+ con.reported_distance = dist
501
+ atoms_str = "atom1= {} atom2= {} image= {}".format(cra1, cra2, image_idx)
502
+ if con.link_id:
503
+ link = monlib.get_link(con.link_id)
504
+ inv = False
505
+ if link is None:
506
+ logger.writeln(" WARNING: link {} not found in the library. Please provide link dictionary.".format(con.link_id))
507
+ con.link_id = "" # let gemmi find proper link in prepare_topology()
508
+ continue
509
+ else:
510
+ match, _, _ = monlib.test_link(link, cra1.residue.name, cra1.atom.name, cra2.residue.name, cra2.atom.name)
511
+ if not match and monlib.test_link(link, cra2.residue.name, cra2.atom.name, cra1.residue.name, cra1.atom.name)[0]:
512
+ match = True
513
+ inv = True
514
+ if not match:
515
+ logger.writeln(" WARNING: link id and atoms mismatch: id= {} {}".format(link.id, atoms_str))
516
+ continue
517
+ else:
518
+ link, inv, _, _ = monlib.match_link(cra1.residue, cra1.atom.name, cra1.atom.altloc,
519
+ cra2.residue, cra2.atom.name, cra2.atom.altloc)
520
+ if link:
521
+ con.link_id = link.id
522
+ elif con.type == gemmi.ConnectionType.MetalC:
523
+ logger.writeln(" Metal link will be added: {} dist= {:.2f}".format(atoms_str, dist))
524
+ if cra2.atom.element.is_metal:
525
+ inv = True # make metal first
526
+ else:
527
+ ideal_dist = monlib.find_ideal_distance(cra1, cra2)
528
+ logger.writeln(" Link unidentified (simple bond will be used): {} dist= {:.2f} ideal= {:.2f}".format(atoms_str,
529
+ dist,
530
+ ideal_dist))
531
+ continue
532
+ if link:
533
+ logger.writeln(" Link confirmed: id= {} {} dist= {:.2f} ideal= {:.2f}".format(link.id,
534
+ atoms_str,
535
+ dist,
536
+ link.rt.bonds[0].value))
537
+ if con.link_id == "disulf":
538
+ con.type = gemmi.ConnectionType.Disulf
539
+ if inv:
540
+ con.partner1 = model.cra_to_atomaddress(cra2)
541
+ con.partner2 = model.cra_to_atomaddress(cra1)
542
+ if len(st.connections) == 0:
543
+ logger.writeln(" no links defined in the model")
544
+
545
+ logger.writeln("Finding new links (will be added if marked by *)")
546
+ ns = gemmi.NeighborSearch(st[0], st.cell, 5.).populate()
547
+ cs = gemmi.ContactSearch(4.)
548
+ cs.ignore = gemmi.ContactSearch.Ignore.AdjacentResidues # may miss polymer links not contiguous in a chain?
549
+ results = cs.find_contacts(ns)
550
+ onsb = set(gemmi.Element(x) for x in "ONSB")
551
+ n_found = 0
552
+ for r in results:
553
+ if st.find_connection_by_cra(r.partner1, r.partner2, ignore_segment=True): continue
554
+ link, inv, _, _ = monlib.match_link(r.partner1.residue, r.partner1.atom.name, r.partner1.atom.altloc,
555
+ r.partner2.residue, r.partner2.atom.name, r.partner2.atom.altloc,
556
+ (r.dist / 1.4)**2)
557
+ if link is None and r.partner2.atom.element.is_metal:
558
+ inv = True # make metal first
559
+ if inv:
560
+ cra1, cra2 = r.partner2, r.partner1
561
+ else:
562
+ cra1, cra2 = r.partner1, r.partner2
563
+ im = st.cell.find_nearest_pbc_image(cra1.atom.pos, cra2.atom.pos, r.image_idx)
564
+ #assert r.image_idx == im.sym_idx # should we check this?
565
+ if not find_symmetry_related and not im.same_asu():
566
+ continue
567
+ atoms_str = "atom1= {} atom2= {} image= {}".format(cra1, cra2, r.image_idx)
568
+ if im.pbc_shift != (0,0,0):
569
+ atoms_str += " ({},{},{})".format(*im.pbc_shift)
570
+ if link:
571
+ if r.dist > link.rt.bonds[0].value * bond_margin: continue
572
+ will_be_added = add_found and (not add_only_from or link.id in add_only_from)
573
+ logger.writeln(" {}New link found: id= {} {} dist= {:.2f} ideal= {:.2f}".format("*" if will_be_added else " ",
574
+ link.id,
575
+ atoms_str,
576
+ r.dist,
577
+ link.rt.bonds[0].value))
578
+ elif find_metal_links:
579
+ # link only metal - O/N/S/B
580
+ if r.partner1.atom.element.is_metal == r.partner2.atom.element.is_metal: continue
581
+ if not cra2.atom.element in onsb: continue
582
+ max_ideal = metalc.find_max_dist(cra1, cra2)
583
+ if r.dist > max_ideal * metal_margin: continue # tolerance should be smaller than that for other links
584
+ will_be_added = add_found
585
+ logger.writeln(" {}Metal link found: {} dist= {:.2f} max_ideal= {:.2f}".format("*" if will_be_added else " ",
586
+ atoms_str,
587
+ r.dist, max_ideal))
588
+ else:
589
+ continue
590
+ n_found += 1
591
+ if not will_be_added: continue
592
+ con = gemmi.Connection()
593
+ con.name = "added{}".format(n_found)
594
+ if link:
595
+ con.link_id = link.id
596
+ con.type = gemmi.ConnectionType.Disulf if link.id == "disulf" else gemmi.ConnectionType.Covale
597
+ if cra1.atom.element.is_metal or cra2.atom.element.is_metal:
598
+ con.type = gemmi.ConnectionType.MetalC
599
+ con.asu = gemmi.Asu.Same if im.same_asu() else gemmi.Asu.Different
600
+ con.partner1 = model.cra_to_atomaddress(cra1)
601
+ con.partner2 = model.cra_to_atomaddress(cra2)
602
+ con.reported_distance = r.dist
603
+ st.connections.append(con)
604
+ if n_found == 0:
605
+ logger.writeln(" no links found")
606
+ # find_and_fix_links()
607
+
608
+ def add_hydrogens(st, monlib, pos="elec"):
609
+ assert pos in ("elec", "nucl")
610
+ topo = prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.ReAddButWater, ignore_unknown_links=True)
611
+
612
+ if pos == "nucl":
613
+ logger.writeln("Generating hydrogens at nucleus positions")
614
+ resnames = st[0].get_all_residue_names()
615
+ check_monlib_support_nucleus_distances(monlib, resnames)
616
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus, default_scale=default_proton_scale)
617
+ else:
618
+ logger.writeln("Generating hydrogens at electron positions")
619
+ # add_hydrogens()
620
+
621
+ def make_atom_spec(cra):
622
+ chain = cra.chain.name
623
+ resi = cra.residue.seqid.num
624
+ ins = cra.residue.seqid.icode
625
+ atom = cra.atom.name
626
+ s = "chain {} resi {} ins {} atom {}".format(chain, resi, ins if ins.strip() else ".", atom)
627
+ if cra.atom.altloc != "\0":
628
+ s += " alt {}".format(cra.atom.altloc)
629
+ return s
630
+ # make_atom_spec()
631
+
632
+ def dictionary_block_names(monlib, topo):
633
+ used = {x.lower() for x in monlib.monomers}
634
+ for chain_info in topo.chain_infos:
635
+ for res_info in chain_info.res_infos:
636
+ for link in res_info.prev:
637
+ # won't be included if the name starts with "auto-", but don't do such checks here
638
+ used.add("link_" + link.link_id.lower())
639
+ for mod in res_info.mods:
640
+ used.add("mod_" + mod.id.lower())
641
+ for extra in topo.extras:
642
+ used.add("link_" + extra.link_id.lower())
643
+ return used
644
+ # dictionary_block_names()
645
+
646
+ def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
647
+ logger.writeln("Finding NCS..")
648
+ polymers = {}
649
+ for chain in st[0]:
650
+ rs = chain.get_polymer()
651
+ p_type = rs.check_polymer_type()
652
+ if p_type in (gemmi.PolymerType.PeptideL, gemmi.PolymerType.PeptideD,
653
+ gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid):
654
+ polymers.setdefault(p_type, []).append((chain, rs))
655
+
656
+ scoring = gemmi.AlignmentScoring("p") # AlignmentScoring::partial_model
657
+ al_res = []
658
+ ncslist = ext.NcsList()
659
+ for pt in polymers:
660
+ #print(pt, [x[0].name for x in polymers[pt]])
661
+ pols = polymers[pt]
662
+ for i in range(len(pols)-1):
663
+ q = [x.name for x in pols[i][1]]
664
+ for j in range(i+1, len(pols)):
665
+ al = gemmi.align_sequence_to_polymer(q, pols[j][1], pt, scoring)
666
+ if al.match_count < min_nalign: continue
667
+ su = gemmi.calculate_superposition(pols[i][1], pols[j][1], pt, gemmi.SupSelect.All)
668
+ obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1], pols[i][0].name, pols[j][0].name)
669
+ obj.calculate_local_rms(rms_loc_nlen)
670
+ if len(obj.local_rms) == 0 or numpy.all(numpy.isnan(obj.local_rms)):
671
+ continue
672
+ ave_local_rms = numpy.nanmean(obj.local_rms)
673
+ if ave_local_rms > max_rms_loc: continue
674
+ ncslist.ncss.append(obj)
675
+ al_res.append({"chain_1": "{} ({}..{})".format(obj.chains[0], obj.seqids[0][0], obj.seqids[-1][0]),
676
+ "chain_2": "{} ({}..{})".format(obj.chains[1], obj.seqids[0][1], obj.seqids[-1][1]),
677
+ "aligned": al.match_count,
678
+ "identity": al.calculate_identity(1),
679
+ "rms": su.rmsd,
680
+ "ave(rmsloc)": ave_local_rms,
681
+ })
682
+ if al_res[-1]["identity"] < 100:
683
+ wrap_width = 100
684
+ logger.writeln(f"seq1: {pols[i][0].name} {pols[i][1][0].seqid}..{pols[i][1][-1].seqid}")
685
+ logger.writeln(f"seq2: {pols[j][0].name} {pols[j][1][0].seqid}..{pols[j][1][-1].seqid}")
686
+ logger.writeln(f"match_count: {al.match_count} (identity: {al_res[-1]['identity']:.2f})")
687
+ s1 = gemmi.one_letter_code(q)
688
+ p_seq = gemmi.one_letter_code(pols[j][1].extract_sequence())
689
+ p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
690
+ for k in range(0, len(p1), wrap_width):
691
+ logger.writeln(" seq1 {}".format(p1[k:k+wrap_width]))
692
+ logger.writeln(" {}".format(al.match_string[k:k+wrap_width]))
693
+ logger.writeln(" seq2 {}\n".format(p2[k:k+wrap_width]))
694
+
695
+ ncslist.set_pairs()
696
+ df = pandas.DataFrame(al_res)
697
+ df.index += 1
698
+ logger.writeln(df.to_string(float_format="%.2f"))
699
+ return ncslist
700
+ # prepare_ncs_restraints()
701
+
702
+ class MetalCoordination:
703
+ def __init__(self, monlib, dbfile=None):
704
+ self.monlib = monlib
705
+ if dbfile is None:
706
+ dbfile = os.path.join(monlib.path(), "metals.json")
707
+ if os.path.exists(dbfile):
708
+ self.metals = json.load(open(dbfile))["metal_coordination"]
709
+ else:
710
+ self.metals = {}
711
+ logger.writeln("WARNING: {} not found".format(dbfile))
712
+ # __init__()
713
+
714
+ def find_max_dist(self, cra_metal, cra_ligand):
715
+ vals = self.find_ideal_distances(cra_metal.atom.element, cra_ligand.atom.element)
716
+ if len(vals) == 0:
717
+ # if not found
718
+ return self.monlib.find_ideal_distance(cra_metal, cra_ligand)
719
+ return max(x["median"] for x in vals)
720
+ # find_max_dist()
721
+
722
+ def find_ideal_distances(self, el_metal, el_ligand):
723
+ ideals = {}
724
+ if el_metal.name not in self.metals or el_ligand.name not in self.metals[el_metal.name]:
725
+ return []
726
+ return self.metals[el_metal.name][el_ligand.name]
727
+ # find_ideal_distances
728
+
729
+ def setup_restraints(self, st):
730
+ ret = [] # returns Refmac keywords
731
+ lookup = {x.atom: x for x in st[0].all()}
732
+ coords = {}
733
+ todel = []
734
+ for i, con in enumerate(st.connections):
735
+ if con.link_id == "" and con.type == gemmi.ConnectionType.MetalC:
736
+ cra1 = st[0].find_cra(con.partner1, ignore_segment=True)
737
+ cra2 = st[0].find_cra(con.partner2, ignore_segment=True)
738
+ if None in (cra1.atom, cra2.atom): continue
739
+ ener_ideal = self.monlib.find_ideal_distance(cra1, cra2)
740
+ coords.setdefault(cra1.atom.element, {}).setdefault(cra1.atom, []).append((cra2.atom, i, ener_ideal))
741
+ if coords:
742
+ logger.writeln("Metal coordinations detected")
743
+ for metal in coords:
744
+ logger.writeln(" Metal: {}".format(metal.name))
745
+ ligand_els = {x[0].element for m in coords[metal] for x in coords[metal][m]}
746
+ logger.writeln(" ideal distances")
747
+ ideals = {}
748
+ for el in ligand_els:
749
+ logger.write(" {}: ".format(el.name))
750
+ vals = self.find_ideal_distances(metal, el)
751
+ if len(vals) == 0:
752
+ ener_ideals = {x[2] for m in coords[metal] for x in coords[metal][m] if x[0].element == el}
753
+ logger.write(" ".join("{:.2f}".format(x) for x in ener_ideals))
754
+ logger.writeln(" (from ener_lib)")
755
+ else:
756
+ logger.writeln(" ".join("{:.4f} ({} coord)".format(x["median"], x["coord"]) for x in vals))
757
+ ideals[el] = [(x["median"], max(0.02, x["mad"]*1.5)) for x in vals if x["mad"] > 0]
758
+ logger.writeln("")
759
+ for i, am in enumerate(coords[metal]):
760
+ logger.writeln(" site {}: {}".format(i+1, lookup[am]))
761
+ for j, (lig, con_idx, _) in enumerate(coords[metal][am]):
762
+ con = st.connections[con_idx]
763
+ logger.writeln(" ligand {}: {} dist= {:.2f}".format(j+1, lookup[lig],
764
+ con.reported_distance))
765
+ specs = [make_atom_spec(x) for x in (lookup[am], lookup[lig])]
766
+ if lig.element not in ideals:
767
+ continue
768
+ todel.append(con_idx)
769
+ for k, (ideal, sigma) in enumerate(ideals[lig.element]):
770
+ exte_str = "exte dist first {} seco {} ".format(*specs)
771
+ exte_str += "valu {:.4f} sigm {:.4f} type 1 ".format(ideal, sigma)
772
+ if con.asu == gemmi.Asu.Different:
773
+ exte_str += "symm y"
774
+ ret.append(exte_str)
775
+ #b = ext.Geometry.Bond(am, lig)
776
+ #b.values.append(ext.Geometry.Bond.Value(ideal, sigma, ideal, sigma))
777
+ #b.type = 0 if k == 0 else 1
778
+ #ret.append(b)
779
+ logger.writeln("")
780
+ return ret, list(set(todel))
781
+ # setup_restraints()