servalcat 0.4.131__cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-314t-x86_64-linux-gnu.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +1162 -0
  7. servalcat/refine/refine_geom.py +245 -0
  8. servalcat/refine/refine_spa.py +400 -0
  9. servalcat/refine/refine_xtal.py +339 -0
  10. servalcat/refine/spa.py +151 -0
  11. servalcat/refine/xtal.py +312 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +191 -0
  14. servalcat/refmac/refmac_keywords.py +660 -0
  15. servalcat/refmac/refmac_wrapper.py +423 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +488 -0
  18. servalcat/spa/fsc.py +391 -0
  19. servalcat/spa/localcc.py +197 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +979 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1629 -0
  27. servalcat/utils/fileio.py +836 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +811 -0
  30. servalcat/utils/logger.py +140 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +933 -0
  33. servalcat/utils/refmac.py +759 -0
  34. servalcat/utils/restraints.py +888 -0
  35. servalcat/utils/symmetry.py +298 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +262 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1954 -0
  40. servalcat/xtal/twin.py +316 -0
  41. servalcat-0.4.131.dist-info/METADATA +60 -0
  42. servalcat-0.4.131.dist-info/RECORD +45 -0
  43. servalcat-0.4.131.dist-info/WHEEL +6 -0
  44. servalcat-0.4.131.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.131.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,888 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ from servalcat.utils import logger
10
+ from servalcat.refmac import refmac_keywords
11
+ from servalcat import ext
12
+ import os
13
+ import gemmi
14
+ import string
15
+ import random
16
+ import numpy
17
+ import pandas
18
+ import json
19
+ import fnmatch
20
+
21
+ default_proton_scale = 1.13 # scale of X-proton distance to X-H(e) distance
22
+
23
+ def decide_new_mod_id(mod_id, mods):
24
+ # Refmac only allows up to 8 characters
25
+ letters = string.digits + string.ascii_lowercase
26
+ if len(mod_id) < 8:
27
+ for l in letters:
28
+ new_id = "{}{}{}".format(mod_id, "" if len(mod_id)==7 else "-", l)
29
+ if new_id not in mods:
30
+ return new_id
31
+
32
+ # give up keeping original name
33
+ while True: # XXX risk of infinite loop.. less likely though
34
+ new_id = "mod" + "".join([random.choice(letters) for _ in range(4)])
35
+ if new_id not in mods:
36
+ return new_id
37
+ # decide_new_mod_id()
38
+
39
+ def rename_cif_modification_if_necessary(doc, known_ids):
40
+ # FIXME Problematic if other file refers to modification that is renamed in this function - but how can we know?
41
+ trans = {}
42
+ for b in doc:
43
+ for row in b.find("_chem_mod.", ["id"]):
44
+ mod_id = row.str(0)
45
+ if mod_id in known_ids:
46
+ new_id = decide_new_mod_id(mod_id, known_ids)
47
+ trans[mod_id] = new_id
48
+ row[0] = new_id # modify id
49
+ logger.writeln("INFO:: renaming modification id {} to {}".format(mod_id, new_id))
50
+
51
+ # modify ids in mod_* blocks
52
+ for mod_id in trans:
53
+ b = doc.find_block("mod_{}".format(mod_id))
54
+ if not b: # should raise error?
55
+ logger.writeln("WARNING:: inconsistent mod description for {}".format(mod_id))
56
+ continue
57
+ b.name = "mod_{}".format(trans[mod_id]) # modify name
58
+ for item in b:
59
+ for tag in item.loop.tags:
60
+ if tag.endswith(".mod_id"):
61
+ for row in b.find(tag[:tag.rindex(".")+1], ["mod_id"]):
62
+ row[0] = trans[mod_id]
63
+
64
+ # Update mod id in links
65
+ if trans:
66
+ for b in doc:
67
+ for row in b.find("_chem_link.", ["mod_id_1", "mod_id_2"]):
68
+ for i in range(2):
69
+ if row.str(i) in trans:
70
+ row[i] = trans[row.str(i)]
71
+
72
+ return trans
73
+ # rename_cif_modification_if_necessary()
74
+
75
+ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns=False,
76
+ ignore_monomer_dir=False, update_old_atom_names=True,
77
+ params=None):
78
+ resnames = st[0].get_all_residue_names()
79
+
80
+ if monomer_dir is None and not ignore_monomer_dir:
81
+ if "CLIBD_MON" not in os.environ:
82
+ logger.error("WARNING: CLIBD_MON is not set")
83
+ else:
84
+ monomer_dir = os.environ["CLIBD_MON"]
85
+
86
+ if cif_files is None:
87
+ cif_files = []
88
+
89
+ monlib = gemmi.MonLib()
90
+ if monomer_dir and not ignore_monomer_dir:
91
+ if not os.path.isdir(monomer_dir):
92
+ raise RuntimeError("not a directory: {}".format(monomer_dir))
93
+
94
+ logger.writeln("Reading monomers from {}".format(monomer_dir))
95
+ monlib.read_monomer_lib(monomer_dir, resnames, logger)
96
+
97
+ for f in cif_files:
98
+ logger.writeln("Reading monomer: {}".format(f))
99
+ doc = gemmi.cif.read(f)
100
+ for b in doc:
101
+ atom_id_list = b.find_values("_chem_comp_atom.atom_id")
102
+ if atom_id_list:
103
+ name = b.name.replace("comp_", "")
104
+ if name in monlib.monomers:
105
+ logger.writeln("WARNING:: updating monomer {} using {}".format(name, f))
106
+ del monlib.monomers[name]
107
+
108
+ # Check if bond length values are included
109
+ # This is to fail if cif file is e.g. from PDB website
110
+ if b.find_values("_chem_comp_bond.comp_id") and not b.find_values("_chem_comp_bond.value_dist"):
111
+ raise RuntimeError(f"Bond length information for {name} is missing from {f}. Please generate restraints using a tool like acedrg.")
112
+
113
+ for row in b.find("_chem_link.", ["id"]):
114
+ link_id = row.str(0)
115
+ if link_id in monlib.links:
116
+ logger.writeln("WARNING:: updating link {} using {}".format(link_id, f))
117
+ del monlib.links[link_id]
118
+
119
+ # If modification id is duplicated, need to rename
120
+ rename_cif_modification_if_necessary(doc, monlib.modifications)
121
+ monlib.read_monomer_doc(doc)
122
+ for b in doc:
123
+ for row in b.find("_chem_comp.", ["id", "group"]):
124
+ if row.str(0) in monlib.monomers:
125
+ monlib.monomers[row.str(0)].set_group(row.str(1))
126
+
127
+ not_loaded = set(resnames).difference(monlib.monomers)
128
+ if not_loaded:
129
+ logger.writeln("WARNING: monomers not loaded: {}".format(" ".join(not_loaded)))
130
+
131
+ logger.writeln("Monomer library loaded: {} monomers, {} links, {} modifications".format(len(monlib.monomers),
132
+ len(monlib.links),
133
+ len(monlib.modifications)))
134
+ logger.writeln(" loaded monomers: {}".format(" ".join([x for x in monlib.monomers])))
135
+ logger.writeln("")
136
+
137
+ logger.writeln("Checking if unknown atoms exist..")
138
+
139
+ unknown_cc = set()
140
+ for chain in st[0]: unknown_cc.update(res.name for res in chain if res.name not in monlib.monomers)
141
+ if unknown_cc:
142
+ if stop_for_unknowns:
143
+ raise RuntimeError("Provide restraint cif file(s) for {}".format(",".join(unknown_cc)))
144
+ else:
145
+ logger.writeln("WARNING: ad-hoc restraints will be generated for {}".format(",".join(unknown_cc)))
146
+ logger.writeln(" it is strongly recommended to generate them using AceDRG.")
147
+
148
+ if update_old_atom_names:
149
+ monlib.update_old_atom_names(st, logger)
150
+
151
+ if params:
152
+ update_torsions(monlib, params.get("restr", {}).get("torsion_include", {}))
153
+
154
+ return monlib
155
+ # load_monomer_library()
156
+
157
+ def fix_elements_in_model(monlib, st):
158
+ monlib_els = {m: {a.id: a.el for a in monlib.monomers[m].atoms} for m in monlib.monomers}
159
+ lookup = {x.atom: x for x in st[0].all()}
160
+ for chain in st[0]:
161
+ for res in chain:
162
+ d = monlib_els.get(res.name)
163
+ if not d: continue # should not happen
164
+ for at in res:
165
+ if at.name not in d: # for example atom names of element D may be different, which will be sorted later
166
+ continue
167
+ el = d[at.name]
168
+ if at.element != el:
169
+ logger.writeln(f"WARNING: correcting element of {lookup[at]} to {el.name}")
170
+ at.element = el
171
+ # correct_elements_in_model()
172
+
173
+ def update_torsions(monlib, params):
174
+ # take subset
175
+ params = [p for p in params
176
+ if any(x in p for x in ("tors_value", "tors_sigma", "tors_period"))]
177
+ if not params:
178
+ return
179
+ logger.writeln("Updating torsion targets in dictionaries")
180
+ for p in params:
181
+ if "residue" in p:
182
+ tors = [cc.rt.torsions for cc in monlib.monomers.values()
183
+ if fnmatch.fnmatch(cc.name, p["residue"])]
184
+ elif "group" in p:
185
+ g = gemmi.ChemComp.read_group(p["group"])
186
+ # should warn if g is Null
187
+ tors = [cc.rt.torsions for cc in monlib.monomers.values()
188
+ if cc.group == g]
189
+ elif "link" in p:
190
+ tors = [ln.rt.torsions for ln in monlib.links.values()
191
+ if fnmatch.fnmatch(ln.id, p["link"])]
192
+ else:
193
+ tors = []
194
+ if not tors:
195
+ continue
196
+ logger.writeln(f" rule = {p}")
197
+ for tt in tors:
198
+ for t in tt:
199
+ if fnmatch.fnmatch(t.label, p["tors_name"]):
200
+ if "tors_value" in p:
201
+ t.value = p["tors_value"]
202
+ if "tors_sigma" in p:
203
+ t.esd = p["tors_sigma"]
204
+ if "tors_period" in p:
205
+ t.period = p["tors_period"]
206
+ # update_torsions()
207
+
208
+ def make_torsion_rules(restr_params):
209
+ # Defaults
210
+ include_rules = [{"group": "peptide", "tors_name": "chi*"},
211
+ {"link": "*", "tors_name": "omega"},
212
+ {"residue": "*", "tors_name": "sp2_sp2*"},
213
+ {"link": "*", "tors_name": "sp2_sp2*"},
214
+ ]
215
+ exclude_rules = []
216
+
217
+ # Override include/exclude rules
218
+ for i, name in enumerate(("torsion_include", "torsion_exclude")):
219
+ rules = (include_rules, exclude_rules)[i]
220
+ for p in restr_params.get(name, []):
221
+ r = {}
222
+ if p["flag"]:
223
+ for k in "residue", "group", "link":
224
+ if k in p:
225
+ r[k] = p[k]
226
+ if r and "tors_name" in p:
227
+ r["tors_name"] = p["tors_name"]
228
+ rules.append(r)
229
+ else:
230
+ rules.clear()
231
+
232
+ # How to tell about hydrogen?
233
+ logger.writeln("Torsion angle rules:")
234
+ for l, rr in (("include", include_rules), ("exclude", exclude_rules)):
235
+ logger.writeln(f" {l}:")
236
+ if not rr:
237
+ logger.writeln(f" none")
238
+ for r in rr:
239
+ logger.writeln(f" {r}")
240
+
241
+ return include_rules, exclude_rules
242
+ # make_torsion_rules())
243
+
244
+ def select_restrained_torsions(monlib, include_rules, exclude_rules):
245
+ ret = {"monomer": {}, "link": {}}
246
+
247
+ # Collect monomer/link related torsions
248
+ all_tors = {"mon": {}, "link": {}}
249
+ groups = {}
250
+ for mon_id in monlib.monomers:
251
+ mon = monlib.monomers[mon_id]
252
+ groups.setdefault(mon.group, []).append(mon_id)
253
+ all_tors["mon"][mon_id] = [x.label for x in mon.rt.torsions]
254
+ for mod_id in monlib.modifications:
255
+ mod = monlib.modifications[mod_id]
256
+ tors = [x.label for x in mod.rt.torsions if chr(x.id1.comp) in ("a", "c")] # don't need delete
257
+ if not tors: continue
258
+ gr = gemmi.ChemComp.read_group(mod.group_id)
259
+ if mod.comp_id and mod.comp_id in all_tors["mon"]:
260
+ all_tors["mon"][mod.comp_id].extend(tors)
261
+ elif not mod.comp_id and gr in groups:
262
+ for mon_id in groups[gr]:
263
+ all_tors["mon"][mon_id].extend(tors)
264
+ for lnk_id in monlib.links:
265
+ lnk = monlib.links[lnk_id]
266
+ if lnk.rt.torsions:
267
+ all_tors["link"][lnk_id] = [x.label for x in lnk.rt.torsions]
268
+ for k in all_tors:
269
+ for kk in all_tors[k]:
270
+ all_tors[k][kk] = set(all_tors[k][kk])
271
+
272
+ # Apply include/exclude rule
273
+ for mon in all_tors["mon"]:
274
+ match_f = lambda r: ("tors_name" in r and
275
+ ("residue" in r and fnmatch.fnmatch(mon, r["residue"]) or
276
+ mon in groups.get(gemmi.ChemComp.read_group(r.get("group", "")), [])))
277
+ use_tors = []
278
+ for r in include_rules:
279
+ if match_f(r):
280
+ use_tors.extend(x for x in all_tors["mon"][mon] if fnmatch.fnmatch(x, r["tors_name"]))
281
+ for r in exclude_rules:
282
+ if match_f(r):
283
+ use_tors = [x for x in use_tors if not fnmatch.fnmatch(x, r["tors_name"])]
284
+ if use_tors:
285
+ ret["monomer"][mon] = sorted(use_tors)
286
+ for lnk in all_tors["link"]:
287
+ match_f = lambda r: ("tors_name" in r and
288
+ "link" in r and fnmatch.fnmatch(lnk, r["link"]))
289
+ use_tors = []
290
+ for r in include_rules:
291
+ if match_f(r):
292
+ use_tors.extend(x for x in all_tors["link"][lnk] if fnmatch.fnmatch(x, r["tors_name"]))
293
+ for r in exclude_rules:
294
+ if match_f(r):
295
+ use_tors = [x for x in use_tors if not fnmatch.fnmatch(x, r["tors_name"])]
296
+ if use_tors:
297
+ ret["link"][lnk] = sorted(use_tors)
298
+
299
+ return ret
300
+ # select_restrained_torsions()
301
+
302
+ def prepare_topology(st, monlib, h_change, ignore_unknown_links=False, raise_error=True, check_hydrogen=False,
303
+ remove_bad_hydrogen=True, use_cispeps=False, add_metal_restraints=True, params=None):
304
+ # Check duplicated atoms
305
+ bad = []
306
+ for chain in st[0]:
307
+ bad_res = []
308
+ for res in chain:
309
+ n_uniq = len({(a.name, a.altloc) for a in res})
310
+ if n_uniq != len(res):
311
+ bad_res.append(str(res.seqid))
312
+ if bad_res:
313
+ bad.append(" chain {}: {}".format(chain.name, " ".join(bad_res)))
314
+ if bad:
315
+ raise RuntimeError("Following residues have duplicated atoms. Check your model.\n{}".format("\n".join(bad)))
316
+
317
+ if add_metal_restraints:
318
+ metalc = MetalCoordination(monlib)
319
+ keywords, todel = metalc.setup_restraints(st)
320
+ con_bak = []
321
+ for i in sorted(todel, reverse=True):
322
+ # temporarily remove connection not to put a bond restraint
323
+ con = st.connections.pop(i)
324
+ con_bak.append((i, con))
325
+ # flag non-hydrogen
326
+ cra2 = st[0].find_cra(con.partner2, ignore_segment=True)
327
+ cra2.atom.calc_flag = gemmi.CalcFlag.NoHydrogen
328
+ if params:
329
+ parsed = refmac_keywords.parse_keywords(keywords).get("exte")
330
+ if parsed:
331
+ params["exte"] = parsed + params.get("exte", [])
332
+ else:
333
+ keywords = []
334
+ # these checks can be done after sorting links
335
+ logger.writeln("Creating restraints..")
336
+ with logger.with_prefix(" "):
337
+ topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=logger, reorder=False,
338
+ ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
339
+
340
+ if remove_bad_hydrogen:
341
+ deleted = False
342
+ for chain in st[0]:
343
+ for res in chain:
344
+ todel = []
345
+ for i, atom in enumerate(res):
346
+ if atom.is_hydrogen() and (atom.calc_flag == gemmi.CalcFlag.Dummy
347
+ or any(numpy.isnan(atom.pos.tolist()))):
348
+ logger.writeln(f" Removing failed hydrogen: {chain.name}/{res.name} {res.seqid}/{atom.name}")
349
+ todel.append(i)
350
+ deleted = True
351
+ for i in reversed(todel):
352
+ del res[i]
353
+ if deleted: # needs re-creation, as the deletion invalidates pointers stored in topo
354
+ logger.writeln("Re-creating restraints..")
355
+ with logger.with_prefix(" "):
356
+ topo = gemmi.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.NoChange, warnings=logger, reorder=False,
357
+ ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
358
+
359
+ unknown_cc = set()
360
+ link_related = set()
361
+ nan_hydr = set()
362
+
363
+ def extra_defined(res1, res2): # TODO should check alt
364
+ for link in topo.extras:
365
+ res12 = (link.res1, link.res2)
366
+ if link.link_id and (res12 == (res1, res2) or res12 == (res2, res1)):
367
+ return True
368
+ return False
369
+
370
+ # collect info
371
+ info = {}
372
+ for cinfo in topo.chain_infos:
373
+ toadd = info.setdefault(cinfo.chain_ref.name, {})
374
+ if cinfo.polymer:
375
+ gaps = []
376
+ for rinfo in cinfo.res_infos:
377
+ if (rinfo.prev and rinfo.prev[0].link_id in ("gap", "") and
378
+ not extra_defined(rinfo.prev[0].res1, rinfo.prev[0].res2)):
379
+ gaps.append((rinfo.prev[0].res1, rinfo.prev[0].res2))
380
+ toadd["polymer"] = (str(cinfo.polymer_type).replace("PolymerType.", ""),
381
+ cinfo.res_infos[0].res.seqid,
382
+ cinfo.res_infos[-1].res.seqid,
383
+ len(cinfo.res_infos), gaps)
384
+ else:
385
+ l = toadd.setdefault("nonpolymer", [])
386
+ for ri in cinfo.res_infos:
387
+ l.append(ri.res.name)
388
+ logger.writeln("\nChain info:")
389
+ for chain in info:
390
+ logger.writeln(" chain {}".format(chain))
391
+ if "polymer" in info[chain]:
392
+ logger.writeln(" {}: {}..{} ({} residues)".format(*info[chain]["polymer"][:-1]))
393
+ for gap in info[chain]["polymer"][-1]:
394
+ logger.writeln(" gap between {} and {}".format(*gap))
395
+ if "nonpolymer" in info[chain]:
396
+ n_res = len(info[chain]["nonpolymer"])
397
+ uniq = set(info[chain]["nonpolymer"])
398
+ logger.writeln(" ligands: {} ({} residues)".format(" ".join(uniq), n_res))
399
+ logger.writeln("")
400
+
401
+ for cinfo in topo.chain_infos:
402
+ for rinfo in cinfo.res_infos:
403
+ cc_org = monlib.monomers[rinfo.res.name] if rinfo.res.name in monlib.monomers else None
404
+ for ia in reversed(range(len(rinfo.res))):
405
+ atom = rinfo.res[ia]
406
+ atom_str = "{}/{} {}/{}".format(cinfo.chain_ref.name, rinfo.res.name, rinfo.res.seqid, atom.name)
407
+ cc = rinfo.get_final_chemcomp(atom.altloc)
408
+ cc_atom = cc.find_atom(atom.name)
409
+ if cc_atom:
410
+ if cc_atom.chem_type not in monlib.ener_lib.atoms:
411
+ deftype = atom.element.name.upper()
412
+ logger.writeln(f"WARNING: unknown chemical type {cc_atom.chem_type} of {atom_str}. Will use default type {deftype}")
413
+ cc_atom.chem_type = deftype
414
+ else:
415
+ # warning message should have already been given by gemmi
416
+ if cc_org and cc_org.find_atom(atom.name):
417
+ if check_hydrogen or not atom.is_hydrogen():
418
+ link_related.add(rinfo.res.name)
419
+ else:
420
+ if check_hydrogen or not atom.is_hydrogen():
421
+ unknown_cc.add(rinfo.res.name)
422
+
423
+ if atom.is_hydrogen() and atom.calc_flag == gemmi.CalcFlag.Dummy:
424
+ logger.writeln(" Warning: hydrogen {} could not be added - Check dictionary".format(atom_str))
425
+ unknown_cc.add(rinfo.res.name)
426
+ elif any(numpy.isnan(atom.pos.tolist())): # TODO add NaN test before prepare_toplogy
427
+ logger.writeln(" Warning: {} position NaN!".format(atom_str))
428
+ nan_hydr.add(rinfo.res.name)
429
+
430
+ if raise_error and (unknown_cc or link_related):
431
+ msgs = []
432
+ if unknown_cc: msgs.append("restraint cif file(s) for {}".format(",".join(unknown_cc)))
433
+ if link_related: msgs.append("proper link cif file(s) for {} or check your model".format(",".join(link_related)))
434
+ raise RuntimeError("Provide {}".format(" and ".join(msgs)))
435
+ if raise_error and nan_hydr:
436
+ raise RuntimeError("Some hydrogen positions became NaN. The geometry of your model may be of low quality. Consider not adding hydrogen")
437
+ if not use_cispeps:
438
+ topo.set_cispeps_in_structure(st)
439
+ if add_metal_restraints:
440
+ for i, con in sorted(con_bak):
441
+ st.connections.insert(i, con)
442
+ return topo, keywords
443
+ # prepare_topology()
444
+
445
+ def dump_topology(topo, st):
446
+ lookup = {x.atom: x for x in st[0].all()}
447
+ def get_details(rule):
448
+ lab, tt = {gemmi.RKind.Bond: ("bond", topo.bonds),
449
+ gemmi.RKind.Angle: ("angle", topo.angles),
450
+ gemmi.RKind.Torsion: ("torsion", topo.torsions),
451
+ gemmi.RKind.Chirality: ("chirality", topo.chirs),
452
+ gemmi.RKind.Plane: ("plane", topo.planes),
453
+ }[rule.rkind]
454
+ t = tt[rule.index]
455
+ ret = {}
456
+ ret["kind"] = lab
457
+ ret["atoms"] = [str(lookup[x]) for x in t.atoms]
458
+ if rule.rkind == gemmi.RKind.Chirality:
459
+ ret["ideal"] = {gemmi.ChiralityType.Both: "both",
460
+ gemmi.ChiralityType.Negative: "negative",
461
+ gemmi.ChiralityType.Positive: "positive"}[t.restr.sign]
462
+ elif rule.rkind == gemmi.RKind.Plane:
463
+ ret["esd"] = t.restr.esd
464
+ else:
465
+ ret["ideal"] = t.restr.value
466
+ ret["esd"] = t.restr.esd
467
+ if rule.rkind in (gemmi.RKind.Torsion, gemmi.RKind.Plane):
468
+ ret["label"] = t.restr.label
469
+ if rule.rkind in (gemmi.RKind.Angle, gemmi.RKind.Torsion):
470
+ ret["model"] = numpy.rad2deg(t.calculate())
471
+ elif rule.rkind == gemmi.RKind.Plane:
472
+ coef = gemmi.find_best_plane(t.atoms)
473
+ ret["model"] = [gemmi.get_distance_from_plane(x.pos, coef) for x in t.atoms]
474
+ else:
475
+ ret["model"] = t.calculate()
476
+ return ret
477
+
478
+ ret = []
479
+ for cinfo in topo.chain_infos:
480
+ for ri in cinfo.res_infos:
481
+ for prev in ri.prev:
482
+ for rule in prev.link_rules:
483
+ ret.append({"link_id": prev.link_id} | get_details(rule))
484
+ for rule in ri.monomer_rules:
485
+ ret.append(get_details(rule))
486
+
487
+ for extra in topo.extras:
488
+ for rule in extra.link_rules:
489
+ ret.append({"link_id": extra.link_id,
490
+ "asu": {gemmi.Asu.Different:"different",
491
+ gemmi.Asu.Any:"any",
492
+ gemmi.Asu.Same:"same"}[extra.asu],
493
+ } | get_details(rule))
494
+ return ret
495
+ # dump_topology()
496
+
497
+ def check_monlib_support_nucleus_distances(monlib, resnames):
498
+ good = True
499
+ nucl_not_found = []
500
+ for resn in resnames:
501
+ if resn not in monlib.monomers:
502
+ logger.error("ERROR: monomer information of {} not loaded".format(resn))
503
+ good = False
504
+ else:
505
+ mon = monlib.monomers[resn]
506
+ no_nuc = False
507
+ for bond in mon.rt.bonds:
508
+ is_h = (mon.get_atom(bond.id1.atom).is_hydrogen(), mon.get_atom(bond.id2.atom).is_hydrogen())
509
+ if any(is_h) and bond.value_nucleus != bond.value_nucleus:
510
+ no_nuc = True
511
+ break
512
+ if no_nuc:
513
+ nucl_not_found.append(resn)
514
+ good = False
515
+
516
+ if nucl_not_found:
517
+ logger.writeln("WARNING: nucleus distance is not found for: {}".format(" ".join(nucl_not_found)))
518
+ logger.writeln(" default scale ({}) is used for nucleus distances.".format(default_proton_scale))
519
+ return good
520
+ # check_monlib_support_nucleus_distances()
521
+
522
+ def remove_duplicated_links(connections):
523
+ # ignore p.res_id.name?
524
+ totuple = lambda p: (p.chain_name, p.res_id.seqid.num, p.res_id.seqid.icode, p.atom_name, p.altloc)
525
+ dic = {}
526
+ for i, con in enumerate(connections):
527
+ dic.setdefault(tuple(sorted([totuple(con.partner1), totuple(con.partner2)])), []).append(i)
528
+ todel = []
529
+ for k in dic:
530
+ if len(dic[k]) > 1:
531
+ ids = set(connections[c].link_id for c in dic[k] if connections[c].link_id.strip())
532
+ if len(ids) > 1:
533
+ logger.writeln(" WARNING: duplicated links are found with different link_id")
534
+ tokeep = dic[k][0]
535
+ if ids:
536
+ for c in dic[k]:
537
+ if connections[c].link_id.strip():
538
+ tokeep = c
539
+ break
540
+ todel.extend(c for c in dic[k] if c != tokeep)
541
+
542
+ for i in sorted(todel, reverse=True):
543
+ del connections[i]
544
+ if todel:
545
+ logger.writeln(" {} duplicated links were removed.".format(len(todel)))
546
+ # remove_duplicated_links()
547
+
548
+ def find_and_fix_links(st, monlib, bond_margin=1.3, find_metal_links=True, add_found=True, find_symmetry_related=True,
549
+ metal_margin=1.1, add_only_from=None):
550
+ metalc = MetalCoordination(monlib)
551
+ """
552
+ Identify link ids for st.connections and find new links
553
+ This is required for correctly recognizing link in gemmi.prepare_topology
554
+ Note that it ignores segment IDs
555
+ FIXME it assumes only one bond exists in a link. It may not be the case in future.
556
+ """
557
+ from servalcat.utils import model
558
+
559
+ logger.writeln("Checking links defined in the model")
560
+ remove_duplicated_links(st.connections)
561
+ for con in st.connections:
562
+ if con.type == gemmi.ConnectionType.Hydrog: continue
563
+ if con.link_id == "gap": continue # TODO check residues?
564
+ cra1, cra2 = st[0].find_cra(con.partner1, ignore_segment=True), st[0].find_cra(con.partner2, ignore_segment=True)
565
+ if None in (cra1.atom, cra2.atom):
566
+ logger.writeln(" WARNING: atom(s) not found for link: id= {} atom1= {} atom2= {}".format(con.link_id, con.partner1, con.partner2))
567
+ continue
568
+ if cra1.atom.element.is_metal or cra2.atom.element.is_metal:
569
+ con.type = gemmi.ConnectionType.MetalC
570
+ if con.asu != gemmi.Asu.Same: # XXX info from metadata may be wrong
571
+ im = st.cell.find_nearest_image(cra1.atom.pos, cra2.atom.pos, con.asu)
572
+ image_idx = im.sym_idx
573
+ con.asu = gemmi.Asu.Same if im.same_asu() else gemmi.Asu.Different
574
+ dist = im.dist()
575
+ else:
576
+ image_idx = 0
577
+ con.asu = gemmi.Asu.Same
578
+ dist = cra1.atom.pos.dist(cra2.atom.pos)
579
+ con.reported_distance = dist
580
+ atoms_str = "atom1= {} atom2= {} image= {}".format(cra1, cra2, image_idx)
581
+ if con.link_id:
582
+ link = monlib.get_link(con.link_id)
583
+ inv = False
584
+ if link is None:
585
+ logger.writeln(" WARNING: link {} not found in the library. Please provide link dictionary.".format(con.link_id))
586
+ con.link_id = "" # let gemmi find proper link in prepare_topology()
587
+ continue
588
+ else:
589
+ match, _, _ = monlib.test_link(link, cra1.residue.name, cra1.atom.name, cra2.residue.name, cra2.atom.name)
590
+ if not match and monlib.test_link(link, cra2.residue.name, cra2.atom.name, cra1.residue.name, cra1.atom.name)[0]:
591
+ match = True
592
+ inv = True
593
+ if not match:
594
+ logger.writeln(" WARNING: link id and atoms mismatch: id= {} {}".format(link.id, atoms_str))
595
+ continue
596
+ else:
597
+ link, inv, _, _ = monlib.match_link(cra1.residue, cra1.atom.name, cra1.atom.altloc,
598
+ cra2.residue, cra2.atom.name, cra2.atom.altloc)
599
+ if link:
600
+ con.link_id = link.id
601
+ elif con.type == gemmi.ConnectionType.MetalC:
602
+ logger.writeln(" Metal link will be added: {} dist= {:.2f}".format(atoms_str, dist))
603
+ if cra2.atom.element.is_metal:
604
+ inv = True # make metal first
605
+ else:
606
+ ideal_dist = monlib.find_ideal_distance(cra1, cra2)
607
+ logger.writeln(" Link unidentified (simple bond will be used): {} dist= {:.2f} ideal= {:.2f}".format(atoms_str,
608
+ dist,
609
+ ideal_dist))
610
+ continue
611
+ if link:
612
+ logger.writeln(" Link confirmed: id= {} {} dist= {:.2f} ideal= {:.2f}".format(link.id,
613
+ atoms_str,
614
+ dist,
615
+ link.rt.bonds[0].value))
616
+ if con.link_id == "disulf":
617
+ con.type = gemmi.ConnectionType.Disulf
618
+ if inv:
619
+ con.partner1 = model.cra_to_atomaddress(cra2)
620
+ con.partner2 = model.cra_to_atomaddress(cra1)
621
+ if len(st.connections) == 0:
622
+ logger.writeln(" no links defined in the model")
623
+
624
+ logger.writeln("Finding new links (will be added if marked by *)")
625
+ ns = gemmi.NeighborSearch(st[0], st.cell, 5.).populate()
626
+ cs = gemmi.ContactSearch(4.)
627
+ cs.ignore = gemmi.ContactSearch.Ignore.SameResidue
628
+ results = cs.find_contacts(ns)
629
+ onsb = set(gemmi.Element(x) for x in "ONSB")
630
+ n_found = 0
631
+
632
+ # st.find_connection_by_cra is quite slow (spent ~12 sec for 7k00, 6301 connections)
633
+ # now it's ~6 times faster
634
+ connections = {tuple((p.chain_name, p.res_id.seqid, p.res_id.name, p.atom_name, p.altloc) for p in (c.partner1, c.partner2))
635
+ for c in st.connections if c.type != gemmi.ConnectionType.Hydrog}
636
+ def find_connection(cra1, cra2):
637
+ key = lambda cra: (cra.chain.name, cra.residue.seqid, cra.residue.name, cra.atom.name, cra.atom.altloc)
638
+ return (key(cra1), key(cra2)) in connections or (key(cra2), key(cra1)) in connections
639
+
640
+ for r in results:
641
+ # skip adjacent residues in a polymer entity
642
+ if (r.partner1.chain == r.partner2.chain and
643
+ r.partner1.residue.entity_type == r.partner2.residue.entity_type == gemmi.EntityType.Polymer and
644
+ r.partner1.residue.entity_id == r.partner2.residue.entity_id):
645
+ if r.partner1.chain.next_residue(r.partner1.residue) == r.partner2.residue:
646
+ atom1, atom2 = r.partner1.atom.name, r.partner2.atom.name
647
+ elif r.partner1.chain.next_residue(r.partner2.residue) == r.partner1.residue:
648
+ atom1, atom2 = r.partner2.atom.name, r.partner1.atom.name
649
+ else:
650
+ atom1, atom2 = None, None
651
+ if atom1 is not None:
652
+ ent = st.get_entity(r.partner1.residue.entity_id)
653
+ if (ent.polymer_type in (gemmi.PolymerType.PeptideL, gemmi.PolymerType.PeptideD) and
654
+ atom1 == "C" and atom2 == "N"):
655
+ continue
656
+ if (ent.polymer_type in (gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid) and
657
+ atom1 == "O3'" and atom2 == "P"):
658
+ continue
659
+ if find_connection(r.partner1, r.partner2): continue
660
+ link, inv, _, _ = monlib.match_link(r.partner1.residue, r.partner1.atom.name, r.partner1.atom.altloc,
661
+ r.partner2.residue, r.partner2.atom.name, r.partner2.atom.altloc,
662
+ (r.dist / 1.4)**2)
663
+ if link is None and r.partner2.atom.element.is_metal:
664
+ inv = True # make metal first
665
+ if inv:
666
+ cra1, cra2 = r.partner2, r.partner1
667
+ else:
668
+ cra1, cra2 = r.partner1, r.partner2
669
+ im = st.cell.find_nearest_pbc_image(cra1.atom.pos, cra2.atom.pos, r.image_idx)
670
+ #assert r.image_idx == im.sym_idx # should we check this?
671
+ if not find_symmetry_related and not im.same_asu():
672
+ continue
673
+ atoms_str = "atom1= {} atom2= {} image= {}".format(cra1, cra2, r.image_idx)
674
+ if im.pbc_shift != (0,0,0):
675
+ atoms_str += " ({},{},{})".format(*im.pbc_shift)
676
+ if link:
677
+ if r.dist > link.rt.bonds[0].value * bond_margin: continue
678
+ will_be_added = add_found and (not add_only_from or link.id in add_only_from)
679
+ logger.writeln(" {}New link found: id= {} {} dist= {:.2f} ideal= {:.2f}".format("*" if will_be_added else " ",
680
+ link.id,
681
+ atoms_str,
682
+ r.dist,
683
+ link.rt.bonds[0].value))
684
+ elif find_metal_links:
685
+ # link only metal - O/N/S/B
686
+ if r.partner1.atom.element.is_metal == r.partner2.atom.element.is_metal: continue
687
+ if not cra2.atom.element in onsb: continue
688
+ max_ideal = metalc.find_max_dist(cra1, cra2)
689
+ if r.dist > max_ideal * metal_margin: continue # tolerance should be smaller than that for other links
690
+ will_be_added = add_found
691
+ logger.writeln(" {}Metal link found: {} dist= {:.2f} max_ideal= {:.2f}".format("*" if will_be_added else " ",
692
+ atoms_str,
693
+ r.dist, max_ideal))
694
+ else:
695
+ continue
696
+ n_found += 1
697
+ if not will_be_added: continue
698
+ con = gemmi.Connection()
699
+ con.name = "added{}".format(n_found)
700
+ if link:
701
+ con.link_id = link.id
702
+ con.type = gemmi.ConnectionType.Disulf if link.id == "disulf" else gemmi.ConnectionType.Covale
703
+ if cra1.atom.element.is_metal or cra2.atom.element.is_metal:
704
+ con.type = gemmi.ConnectionType.MetalC
705
+ con.asu = gemmi.Asu.Same if im.same_asu() else gemmi.Asu.Different
706
+ con.partner1 = model.cra_to_atomaddress(cra1)
707
+ con.partner2 = model.cra_to_atomaddress(cra2)
708
+ con.reported_distance = r.dist
709
+ st.connections.append(con)
710
+ if n_found == 0:
711
+ logger.writeln(" no links found")
712
+ # find_and_fix_links()
713
+
714
+ def add_hydrogens(st, monlib, pos="elec"):
715
+ assert pos in ("elec", "nucl")
716
+ topo = prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.ReAddButWater, ignore_unknown_links=False)
717
+
718
+ if pos == "nucl":
719
+ logger.writeln("Generating hydrogens at nucleus positions")
720
+ resnames = st[0].get_all_residue_names()
721
+ check_monlib_support_nucleus_distances(monlib, resnames)
722
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus, default_scale=default_proton_scale)
723
+ else:
724
+ logger.writeln("Generating hydrogens at electron positions")
725
+ # add_hydrogens()
726
+
727
+ def make_atom_spec(cra):
728
+ chain = cra.chain.name
729
+ resi = cra.residue.seqid.num
730
+ ins = cra.residue.seqid.icode
731
+ atom = cra.atom.name
732
+ s = "chain {} resi {} ins {} atom {}".format(chain, resi, ins if ins.strip() else ".", atom)
733
+ if cra.atom.altloc != "\0":
734
+ s += " alt {}".format(cra.atom.altloc)
735
+ return s
736
+ # make_atom_spec()
737
+
738
+ def dictionary_block_names(monlib, topo):
739
+ used = {x.lower() for x in monlib.monomers}
740
+ for chain_info in topo.chain_infos:
741
+ for res_info in chain_info.res_infos:
742
+ for link in res_info.prev:
743
+ # won't be included if the name starts with "auto-", but don't do such checks here
744
+ used.add("link_" + link.link_id.lower())
745
+ for mod in res_info.mods:
746
+ used.add("mod_" + mod.id.lower())
747
+ for extra in topo.extras:
748
+ used.add("link_" + extra.link_id.lower())
749
+ return used
750
+ # dictionary_block_names()
751
+
752
+ def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
753
+ logger.writeln("Finding NCS..")
754
+ polymers = {}
755
+ for chain in st[0]:
756
+ rs = chain.get_polymer()
757
+ p_type = rs.check_polymer_type()
758
+ if p_type in (gemmi.PolymerType.PeptideL, gemmi.PolymerType.PeptideD,
759
+ gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid):
760
+ polymers.setdefault(p_type, []).append((chain, rs))
761
+
762
+ scoring = gemmi.AlignmentScoring("p") # AlignmentScoring::partial_model
763
+ al_res = []
764
+ ncslist = ext.NcsList()
765
+ for pt in polymers:
766
+ #print(pt, [x[0].name for x in polymers[pt]])
767
+ pols = polymers[pt]
768
+ for i in range(len(pols)-1):
769
+ q = [x.name for x in pols[i][1]]
770
+ for j in range(i+1, len(pols)):
771
+ al = gemmi.align_sequence_to_polymer(q, pols[j][1], pt, scoring)
772
+ if al.match_count < min_nalign: continue
773
+ su = gemmi.calculate_superposition(pols[i][1], pols[j][1], pt, gemmi.SupSelect.All)
774
+ obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1], pols[i][0].name, pols[j][0].name)
775
+ obj.calculate_local_rms(rms_loc_nlen)
776
+ if len(obj.local_rms) == 0 or numpy.all(numpy.isnan(obj.local_rms)):
777
+ continue
778
+ ave_local_rms = numpy.nanmean(obj.local_rms)
779
+ if ave_local_rms > max_rms_loc: continue
780
+ ncslist.ncss.append(obj)
781
+ al_res.append({"chain_1": "{} ({}..{})".format(obj.chains[0], obj.seqids[0][0], obj.seqids[-1][0]),
782
+ "chain_2": "{} ({}..{})".format(obj.chains[1], obj.seqids[0][1], obj.seqids[-1][1]),
783
+ "aligned": al.match_count,
784
+ "identity": al.calculate_identity(1),
785
+ "rms": su.rmsd,
786
+ "ave(rmsloc)": ave_local_rms,
787
+ })
788
+ if al_res[-1]["identity"] < 100:
789
+ wrap_width = 100
790
+ logger.writeln(f"seq1: {pols[i][0].name} {pols[i][1][0].seqid}..{pols[i][1][-1].seqid}")
791
+ logger.writeln(f"seq2: {pols[j][0].name} {pols[j][1][0].seqid}..{pols[j][1][-1].seqid}")
792
+ logger.writeln(f"match_count: {al.match_count} (identity: {al_res[-1]['identity']:.2f})")
793
+ s1 = gemmi.one_letter_code(q)
794
+ p_seq = gemmi.one_letter_code(pols[j][1].extract_sequence())
795
+ p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
796
+ for k in range(0, len(p1), wrap_width):
797
+ logger.writeln(" seq1 {}".format(p1[k:k+wrap_width]))
798
+ logger.writeln(" {}".format(al.match_string[k:k+wrap_width]))
799
+ logger.writeln(" seq2 {}\n".format(p2[k:k+wrap_width]))
800
+
801
+ ncslist.set_pairs()
802
+ df = pandas.DataFrame(al_res)
803
+ df.index += 1
804
+ logger.writeln(df.to_string(float_format="%.2f"))
805
+ return ncslist
806
+ # prepare_ncs_restraints()
807
+
808
+ class MetalCoordination:
809
+ def __init__(self, monlib, dbfile=None):
810
+ self.monlib = monlib
811
+ if dbfile is None:
812
+ dbfile = os.path.join(monlib.path(), "metals.json")
813
+ if os.path.exists(dbfile):
814
+ with open(dbfile) as f:
815
+ self.metals = json.load(f)["metal_coordination"]
816
+ else:
817
+ self.metals = {}
818
+ logger.writeln("WARNING: {} not found".format(dbfile))
819
+ # __init__()
820
+
821
+ def find_max_dist(self, cra_metal, cra_ligand):
822
+ vals = self.find_ideal_distances(cra_metal.atom.element, cra_ligand.atom.element)
823
+ if len(vals) == 0:
824
+ # if not found
825
+ return self.monlib.find_ideal_distance(cra_metal, cra_ligand)
826
+ return max(x["median"] for x in vals)
827
+ # find_max_dist()
828
+
829
+ def find_ideal_distances(self, el_metal, el_ligand):
830
+ ideals = {}
831
+ if el_metal.name not in self.metals or el_ligand.name not in self.metals[el_metal.name]:
832
+ return []
833
+ return self.metals[el_metal.name][el_ligand.name]
834
+ # find_ideal_distances
835
+
836
+ def setup_restraints(self, st):
837
+ ret = [] # returns Refmac keywords
838
+ lookup = {x.atom: x for x in st[0].all()}
839
+ coords = {}
840
+ todel = []
841
+ for i, con in enumerate(st.connections):
842
+ if con.link_id == "" and con.type == gemmi.ConnectionType.MetalC:
843
+ cra1 = st[0].find_cra(con.partner1, ignore_segment=True)
844
+ cra2 = st[0].find_cra(con.partner2, ignore_segment=True)
845
+ if None in (cra1.atom, cra2.atom): continue
846
+ ener_ideal = self.monlib.find_ideal_distance(cra1, cra2)
847
+ coords.setdefault(cra1.atom.element, {}).setdefault(cra1.atom, []).append((cra2.atom, i, ener_ideal))
848
+ if coords:
849
+ logger.writeln("Metal coordinations detected")
850
+ for metal in coords:
851
+ logger.writeln(" Metal: {}".format(metal.name))
852
+ ligand_els = {x[0].element for m in coords[metal] for x in coords[metal][m]}
853
+ logger.writeln(" ideal distances")
854
+ ideals = {}
855
+ for el in ligand_els:
856
+ logger.write(" {}: ".format(el.name))
857
+ vals = self.find_ideal_distances(metal, el)
858
+ if len(vals) == 0:
859
+ ener_ideals = {x[2] for m in coords[metal] for x in coords[metal][m] if x[0].element == el}
860
+ logger.write(" ".join("{:.2f}".format(x) for x in ener_ideals))
861
+ logger.writeln(" (from ener_lib)")
862
+ else:
863
+ logger.writeln(" ".join("{:.4f} ({} coord)".format(x["median"], x["coord"]) for x in vals))
864
+ ideals[el] = [(x["median"], max(0.02, x["mad"]*1.5)) for x in vals if x["mad"] > 0]
865
+ logger.writeln("")
866
+ for i, am in enumerate(coords[metal]):
867
+ logger.writeln(" site {}: {}".format(i+1, lookup[am]))
868
+ for j, (lig, con_idx, _) in enumerate(coords[metal][am]):
869
+ con = st.connections[con_idx]
870
+ logger.writeln(" ligand {}: {} dist= {:.2f}".format(j+1, lookup[lig],
871
+ con.reported_distance))
872
+ specs = [make_atom_spec(x) for x in (lookup[am], lookup[lig])]
873
+ if lig.element not in ideals:
874
+ continue
875
+ todel.append(con_idx)
876
+ for k, (ideal, sigma) in enumerate(ideals[lig.element]):
877
+ exte_str = "exte dist first {} seco {} ".format(*specs)
878
+ exte_str += "valu {:.4f} sigm {:.4f} type 1 ".format(ideal, sigma)
879
+ if con.asu == gemmi.Asu.Different:
880
+ exte_str += "symm y"
881
+ ret.append(exte_str)
882
+ #b = ext.Geometry.Bond(am, lig)
883
+ #b.values.append(ext.Geometry.Bond.Value(ideal, sigma, ideal, sigma))
884
+ #b.type = 0 if k == 0 else 1
885
+ #ret.append(b)
886
+ logger.writeln("")
887
+ return ret, list(set(todel))
888
+ # setup_restraints()