servalcat 0.4.88__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cp313-win_amd64.pyd +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +823 -0
  7. servalcat/refine/refine_geom.py +220 -0
  8. servalcat/refine/refine_spa.py +345 -0
  9. servalcat/refine/refine_xtal.py +268 -0
  10. servalcat/refine/spa.py +136 -0
  11. servalcat/refine/xtal.py +273 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +639 -0
  15. servalcat/refmac/refmac_wrapper.py +403 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +473 -0
  18. servalcat/spa/fsc.py +387 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +972 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1397 -0
  27. servalcat/utils/fileio.py +737 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +712 -0
  30. servalcat/utils/logger.py +116 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +782 -0
  33. servalcat/utils/refmac.py +760 -0
  34. servalcat/utils/restraints.py +782 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +256 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1622 -0
  40. servalcat/xtal/twin.py +115 -0
  41. servalcat-0.4.88.dist-info/METADATA +55 -0
  42. servalcat-0.4.88.dist-info/RECORD +45 -0
  43. servalcat-0.4.88.dist-info/WHEEL +5 -0
  44. servalcat-0.4.88.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.88.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,782 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ from servalcat.utils import logger
10
+ from servalcat.refmac import refmac_keywords
11
+ from servalcat import ext
12
+ import os
13
+ import io
14
+ import gemmi
15
+ import string
16
+ import random
17
+ import numpy
18
+ import pandas
19
+ import json
20
+ import fnmatch
21
+
22
+ default_proton_scale = 1.13 # scale of X-proton distance to X-H(e) distance
23
+
24
+ def decide_new_mod_id(mod_id, mods):
25
+ # Refmac only allows up to 8 characters
26
+ letters = string.digits + string.ascii_lowercase
27
+ if len(mod_id) < 8:
28
+ for l in letters:
29
+ new_id = "{}{}{}".format(mod_id, "" if len(mod_id)==7 else "-", l)
30
+ if new_id not in mods:
31
+ return new_id
32
+
33
+ # give up keeping original name
34
+ while True: # XXX risk of infinite loop.. less likely though
35
+ new_id = "mod" + "".join([random.choice(letters) for _ in range(4)])
36
+ if new_id not in mods:
37
+ return new_id
38
+ # decide_new_mod_id()
39
+
40
+ def rename_cif_modification_if_necessary(doc, known_ids):
41
+ # FIXME Problematic if other file refers to modification that is renamed in this function - but how can we know?
42
+ trans = {}
43
+ for b in doc:
44
+ for row in b.find("_chem_mod.", ["id"]):
45
+ mod_id = row.str(0)
46
+ if mod_id in known_ids:
47
+ new_id = decide_new_mod_id(mod_id, known_ids)
48
+ trans[mod_id] = new_id
49
+ row[0] = new_id # modify id
50
+ logger.writeln("INFO:: renaming modification id {} to {}".format(mod_id, new_id))
51
+
52
+ # modify ids in mod_* blocks
53
+ for mod_id in trans:
54
+ b = doc.find_block("mod_{}".format(mod_id))
55
+ if not b: # should raise error?
56
+ logger.writeln("WARNING:: inconsistent mod description for {}".format(mod_id))
57
+ continue
58
+ b.name = "mod_{}".format(trans[mod_id]) # modify name
59
+ for item in b:
60
+ for tag in item.loop.tags:
61
+ if tag.endswith(".mod_id"):
62
+ for row in b.find(tag[:tag.rindex(".")+1], ["mod_id"]):
63
+ row[0] = trans[mod_id]
64
+
65
+ # Update mod id in links
66
+ if trans:
67
+ for b in doc:
68
+ for row in b.find("_chem_link.", ["mod_id_1", "mod_id_2"]):
69
+ for i in range(2):
70
+ if row.str(i) in trans:
71
+ row[i] = trans[row.str(i)]
72
+
73
+ return trans
74
+ # rename_cif_modification_if_necessary()
75
+
76
+ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns=False,
77
+ ignore_monomer_dir=False, update_old_atom_names=True,
78
+ params=None):
79
+ resnames = st[0].get_all_residue_names()
80
+
81
+ if monomer_dir is None and not ignore_monomer_dir:
82
+ if "CLIBD_MON" not in os.environ:
83
+ logger.error("WARNING: CLIBD_MON is not set")
84
+ else:
85
+ monomer_dir = os.environ["CLIBD_MON"]
86
+
87
+ if cif_files is None:
88
+ cif_files = []
89
+
90
+ if monomer_dir and not ignore_monomer_dir:
91
+ if not os.path.isdir(monomer_dir):
92
+ raise RuntimeError("not a directory: {}".format(monomer_dir))
93
+
94
+ logger.writeln("Reading monomers from {}".format(monomer_dir))
95
+ monlib = gemmi.read_monomer_lib(monomer_dir, resnames, ignore_missing=True)
96
+ else:
97
+ monlib = gemmi.MonLib()
98
+
99
+ for f in cif_files:
100
+ logger.writeln("Reading monomer: {}".format(f))
101
+ doc = gemmi.cif.read(f)
102
+ for b in doc:
103
+ atom_id_list = b.find_values("_chem_comp_atom.atom_id")
104
+ if atom_id_list:
105
+ name = b.name.replace("comp_", "")
106
+ if name in monlib.monomers:
107
+ logger.writeln("WARNING:: updating monomer {} using {}".format(name, f))
108
+ del monlib.monomers[name]
109
+
110
+ # Check if bond length values are included
111
+ # This is to fail if cif file is e.g. from PDB website
112
+ if len(atom_id_list) > 1 and not b.find_values("_chem_comp_bond.value_dist"):
113
+ raise RuntimeError("{} does not contain bond length value for {}. You need to generate restraints (e.g. using acedrg).".format(f, name))
114
+
115
+ for row in b.find("_chem_link.", ["id"]):
116
+ link_id = row.str(0)
117
+ if link_id in monlib.links:
118
+ logger.writeln("WARNING:: updating link {} using {}".format(link_id, f))
119
+ del monlib.links[link_id]
120
+
121
+ # If modification id is duplicated, need to rename
122
+ rename_cif_modification_if_necessary(doc, monlib.modifications)
123
+ monlib.read_monomer_doc(doc)
124
+ for b in doc:
125
+ for row in b.find("_chem_comp.", ["id", "group"]):
126
+ if row.str(0) in monlib.monomers:
127
+ monlib.monomers[row.str(0)].set_group(row.str(1))
128
+
129
+ not_loaded = set(resnames).difference(monlib.monomers)
130
+ if not_loaded:
131
+ logger.writeln("WARNING: monomers not loaded: {}".format(" ".join(not_loaded)))
132
+
133
+ logger.writeln("Monomer library loaded: {} monomers, {} links, {} modifications".format(len(monlib.monomers),
134
+ len(monlib.links),
135
+ len(monlib.modifications)))
136
+ logger.writeln(" loaded monomers: {}".format(" ".join([x for x in monlib.monomers])))
137
+ logger.writeln("")
138
+
139
+ logger.writeln("Checking if unknown atoms exist..")
140
+
141
+ unknown_cc = set()
142
+ for chain in st[0]: unknown_cc.update(res.name for res in chain if res.name not in monlib.monomers)
143
+ if unknown_cc:
144
+ if stop_for_unknowns:
145
+ raise RuntimeError("Provide restraint cif file(s) for {}".format(",".join(unknown_cc)))
146
+ else:
147
+ logger.writeln("WARNING: ad-hoc restraints will be generated for {}".format(",".join(unknown_cc)))
148
+ logger.writeln(" it is strongly recommended to generate them using AceDRG.")
149
+
150
+ if update_old_atom_names:
151
+ logger.write(monlib.update_old_atom_names(st))
152
+
153
+ if params:
154
+ update_torsions(monlib, params.get("restr", {}).get("torsion_include", {}))
155
+
156
+ return monlib
157
+ # load_monomer_library()
158
+
159
+ def fix_elements_in_model(monlib, st):
160
+ monlib_els = {m: {a.id: a.el for a in monlib.monomers[m].atoms} for m in monlib.monomers}
161
+ for chain in st[0]:
162
+ for res in chain:
163
+ d = monlib_els.get(res.name)
164
+ if not d: continue # should not happen
165
+ for at in res:
166
+ if at.name not in d: # for example atom names of element D may be different, which will be sorted later
167
+ continue
168
+ el = d[at.name]
169
+ if at.element != el:
170
+ logger.writeln(f"WARNING: correcting element of {st[0].get_cra(at)} to {el.name}")
171
+ at.element = el
172
+ # correct_elements_in_model()
173
+
174
+ def update_torsions(monlib, params):
175
+ # take subset
176
+ params = [p for p in params
177
+ if any(x in p for x in ("tors_value", "tors_sigma", "tors_period"))]
178
+ if not params:
179
+ return
180
+ logger.writeln("Updating torsion targets in dictionaries")
181
+ for p in params:
182
+ if "residue" in p:
183
+ tors = [cc.rt.torsions for cc in monlib.monomers.values()
184
+ if fnmatch.fnmatch(cc.name, p["residue"])]
185
+ elif "group" in p:
186
+ g = gemmi.ChemComp.read_group(p["group"])
187
+ # should warn if g is Null
188
+ tors = [cc.rt.torsions for cc in monlib.monomers.values()
189
+ if cc.group == g]
190
+ elif "link" in p:
191
+ tors = [ln.rt.torsions for ln in monlib.links.values()
192
+ if fnmatch.fnmatch(ln.id, p["link"])]
193
+ else:
194
+ tors = []
195
+ if not tors:
196
+ continue
197
+ logger.writeln(f" rule = {p}")
198
+ for tt in tors:
199
+ for t in tt:
200
+ if fnmatch.fnmatch(t.label, p["tors_name"]):
201
+ if "tors_value" in p:
202
+ t.value = p["tors_value"]
203
+ if "tors_sigma" in p:
204
+ t.esd = p["tors_sigma"]
205
+ if "tors_period" in p:
206
+ t.period = p["tors_period"]
207
+ # update_torsions()
208
+
209
+ def make_torsion_rules(restr_params):
210
+ # Defaults
211
+ include_rules = [{"group": "peptide", "tors_name": "chi*"},
212
+ {"link": "*", "tors_name": "omega"},
213
+ {"residue": "*", "tors_name": "sp2_sp2*"},
214
+ {"link": "*", "tors_name": "sp2_sp2*"},
215
+ ]
216
+ exclude_rules = []
217
+
218
+ # Override include/exclude rules
219
+ for i, name in enumerate(("torsion_include", "torsion_exclude")):
220
+ rules = (include_rules, exclude_rules)[i]
221
+ for p in restr_params.get(name, []):
222
+ r = {}
223
+ if p["flag"]:
224
+ for k in "residue", "group", "link":
225
+ if k in p:
226
+ r[k] = p[k]
227
+ if r and "tors_name" in p:
228
+ r["tors_name"] = p["tors_name"]
229
+ rules.append(r)
230
+ else:
231
+ rules.clear()
232
+
233
+ # How to tell about hydrogen?
234
+ logger.writeln("Torsion angle rules:")
235
+ for l, rr in (("include", include_rules), ("exclude", exclude_rules)):
236
+ logger.writeln(f" {l}:")
237
+ if not rr:
238
+ logger.writeln(f" none")
239
+ for r in rr:
240
+ logger.writeln(f" {r}")
241
+
242
+ return include_rules, exclude_rules
243
+ # make_torsion_rules())
244
+
245
+ def select_restrained_torsions(monlib, include_rules, exclude_rules):
246
+ ret = {"monomer": {}, "link": {}}
247
+
248
+ # Collect monomer/link related torsions
249
+ all_tors = {"mon": {}, "link": {}}
250
+ groups = {}
251
+ for mon_id in monlib.monomers:
252
+ mon = monlib.monomers[mon_id]
253
+ groups.setdefault(mon.group, []).append(mon_id)
254
+ all_tors["mon"][mon_id] = [x.label for x in mon.rt.torsions]
255
+ for mod_id in monlib.modifications:
256
+ mod = monlib.modifications[mod_id]
257
+ tors = [x.label for x in mod.rt.torsions if chr(x.id1.comp) in ("a", "c")] # don't need delete
258
+ if not tors: continue
259
+ gr = gemmi.ChemComp.read_group(mod.group_id)
260
+ if mod.comp_id and mod.comp_id in all_tors["mon"]:
261
+ all_tors["mon"][mod.comp_id].extend(tors)
262
+ elif not mod.comp_id and gr in groups:
263
+ for mon_id in groups[gr]:
264
+ all_tors["mon"][mon_id].extend(tors)
265
+ for lnk_id in monlib.links:
266
+ lnk = monlib.links[lnk_id]
267
+ if lnk.rt.torsions:
268
+ all_tors["link"][lnk_id] = [x.label for x in lnk.rt.torsions]
269
+ for k in all_tors:
270
+ for kk in all_tors[k]:
271
+ all_tors[k][kk] = set(all_tors[k][kk])
272
+
273
+ # Apply include/exclude rule
274
+ for mon in all_tors["mon"]:
275
+ match_f = lambda r: ("tors_name" in r and
276
+ ("residue" in r and fnmatch.fnmatch(mon, r["residue"]) or
277
+ mon in groups.get(gemmi.ChemComp.read_group(r.get("group", "")), [])))
278
+ use_tors = []
279
+ for r in include_rules:
280
+ if match_f(r):
281
+ use_tors.extend(x for x in all_tors["mon"][mon] if fnmatch.fnmatch(x, r["tors_name"]))
282
+ for r in exclude_rules:
283
+ if match_f(r):
284
+ use_tors = [x for x in use_tors if not fnmatch.fnmatch(x, r["tors_name"])]
285
+ if use_tors:
286
+ ret["monomer"][mon] = sorted(use_tors)
287
+ for lnk in all_tors["link"]:
288
+ match_f = lambda r: ("tors_name" in r and
289
+ "link" in r and fnmatch.fnmatch(lnk, r["link"]))
290
+ use_tors = []
291
+ for r in include_rules:
292
+ if match_f(r):
293
+ use_tors.extend(x for x in all_tors["link"][lnk] if fnmatch.fnmatch(x, r["tors_name"]))
294
+ for r in exclude_rules:
295
+ if match_f(r):
296
+ use_tors = [x for x in use_tors if not fnmatch.fnmatch(x, r["tors_name"])]
297
+ if use_tors:
298
+ ret["link"][lnk] = sorted(use_tors)
299
+
300
+ return ret
301
+ # select_restrained_torsions()
302
+
303
+ def prepare_topology(st, monlib, h_change, ignore_unknown_links=False, raise_error=True, check_hydrogen=False,
304
+ use_cispeps=False, add_metal_restraints=True, params=None):
305
+ # Check duplicated atoms
306
+ bad = []
307
+ for chain in st[0]:
308
+ bad_res = []
309
+ for res in chain:
310
+ n_uniq = len({(a.name, a.altloc) for a in res})
311
+ if n_uniq != len(res):
312
+ bad_res.append(str(res.seqid))
313
+ if bad_res:
314
+ bad.append(" chain {}: {}".format(chain.name, " ".join(bad_res)))
315
+ if bad:
316
+ raise RuntimeError("Following residues have duplicated atoms. Check your model.\n{}".format("\n".join(bad)))
317
+
318
+ if add_metal_restraints:
319
+ metalc = MetalCoordination(monlib)
320
+ keywords, todel = metalc.setup_restraints(st)
321
+ con_bak = []
322
+ for i in sorted(todel, reverse=True):
323
+ # temporarily remove connection not to put a bond restraint
324
+ con = st.connections.pop(i)
325
+ con_bak.append((i, con))
326
+ # flag non-hydrogen
327
+ cra2 = st[0].find_cra(con.partner2, ignore_segment=True)
328
+ cra2.atom.calc_flag = gemmi.CalcFlag.NoHydrogen
329
+ if params:
330
+ parsed = refmac_keywords.parse_keywords(keywords).get("exte")
331
+ if parsed:
332
+ params["exte"] = params.get("exte", []) + parsed
333
+ else:
334
+ keywords = []
335
+ # these checks can be done after sorting links
336
+ logger.writeln("Creating restraints..")
337
+ sio = io.StringIO()
338
+ topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=sio, reorder=False,
339
+ ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
340
+ for l in sio.getvalue().splitlines(): logger.writeln(" " + l)
341
+ unknown_cc = set()
342
+ link_related = set()
343
+ nan_hydr = set()
344
+
345
+ def extra_defined(res1, res2): # TODO should check alt
346
+ for link in topo.extras:
347
+ res12 = (link.res1, link.res2)
348
+ if link.link_id and (res12 == (res1, res2) or res12 == (res2, res1)):
349
+ return True
350
+ return False
351
+
352
+ # collect info
353
+ info = {}
354
+ for cinfo in topo.chain_infos:
355
+ toadd = info.setdefault(cinfo.chain_ref.name, {})
356
+ if cinfo.polymer:
357
+ gaps = []
358
+ for rinfo in cinfo.res_infos:
359
+ if (rinfo.prev and rinfo.prev[0].link_id in ("gap", "") and
360
+ not extra_defined(rinfo.prev[0].res1, rinfo.prev[0].res2)):
361
+ gaps.append((rinfo.prev[0].res1, rinfo.prev[0].res2))
362
+ toadd["polymer"] = (str(cinfo.polymer_type).replace("PolymerType.", ""),
363
+ cinfo.res_infos[0].res.seqid,
364
+ cinfo.res_infos[-1].res.seqid,
365
+ len(cinfo.res_infos), gaps)
366
+ else:
367
+ l = toadd.setdefault("nonpolymer", [])
368
+ for ri in cinfo.res_infos:
369
+ l.append(ri.res.name)
370
+ logger.writeln("\nChain info:")
371
+ for chain in info:
372
+ logger.writeln(" chain {}".format(chain))
373
+ if "polymer" in info[chain]:
374
+ logger.writeln(" {}: {}..{} ({} residues)".format(*info[chain]["polymer"][:-1]))
375
+ for gap in info[chain]["polymer"][-1]:
376
+ logger.writeln(" gap between {} and {}".format(*gap))
377
+ if "nonpolymer" in info[chain]:
378
+ n_res = len(info[chain]["nonpolymer"])
379
+ uniq = set(info[chain]["nonpolymer"])
380
+ logger.writeln(" ligands: {} ({} residues)".format(" ".join(uniq), n_res))
381
+ logger.writeln("")
382
+
383
+ for cinfo in topo.chain_infos:
384
+ for rinfo in cinfo.res_infos:
385
+ cc_org = monlib.monomers[rinfo.res.name] if rinfo.res.name in monlib.monomers else None
386
+ for ia in reversed(range(len(rinfo.res))):
387
+ atom = rinfo.res[ia]
388
+ atom_str = "{}/{} {}/{}".format(cinfo.chain_ref.name, rinfo.res.name, rinfo.res.seqid, atom.name)
389
+ cc = rinfo.get_final_chemcomp(atom.altloc)
390
+ if not cc.find_atom(atom.name):
391
+ # warning message should have already been given by gemmi
392
+ if cc_org and cc_org.find_atom(atom.name):
393
+ if check_hydrogen or not atom.is_hydrogen():
394
+ link_related.add(rinfo.res.name)
395
+ else:
396
+ if check_hydrogen or not atom.is_hydrogen():
397
+ unknown_cc.add(rinfo.res.name)
398
+
399
+ if atom.is_hydrogen() and atom.calc_flag == gemmi.CalcFlag.Dummy:
400
+ logger.writeln(" Warning: hydrogen {} could not be added - Check dictionary".format(atom_str))
401
+ unknown_cc.add(rinfo.res.name)
402
+ elif any(numpy.isnan(atom.pos.tolist())): # TODO add NaN test before prepare_toplogy
403
+ logger.writeln(" Warning: {} position NaN!".format(atom_str))
404
+ nan_hydr.add(rinfo.res.name)
405
+
406
+ if raise_error and (unknown_cc or link_related):
407
+ msgs = []
408
+ if unknown_cc: msgs.append("restraint cif file(s) for {}".format(",".join(unknown_cc)))
409
+ if link_related: msgs.append("proper link cif file(s) for {} or check your model".format(",".join(link_related)))
410
+ raise RuntimeError("Provide {}".format(" and ".join(msgs)))
411
+ if raise_error and nan_hydr:
412
+ raise RuntimeError("Some hydrogen positions became NaN. The geometry of your model may be of low quality. Consider not adding hydrogen")
413
+ if not use_cispeps:
414
+ topo.set_cispeps_in_structure(st)
415
+ if add_metal_restraints:
416
+ for i, con in sorted(con_bak):
417
+ st.connections.insert(i, con)
418
+ return topo, keywords
419
+ # prepare_topology()
420
+
421
+ def check_monlib_support_nucleus_distances(monlib, resnames):
422
+ good = True
423
+ nucl_not_found = []
424
+ for resn in resnames:
425
+ if resn not in monlib.monomers:
426
+ logger.error("ERROR: monomer information of {} not loaded".format(resn))
427
+ good = False
428
+ else:
429
+ mon = monlib.monomers[resn]
430
+ no_nuc = False
431
+ for bond in mon.rt.bonds:
432
+ is_h = (mon.get_atom(bond.id1.atom).is_hydrogen(), mon.get_atom(bond.id2.atom).is_hydrogen())
433
+ if any(is_h) and bond.value_nucleus != bond.value_nucleus:
434
+ no_nuc = True
435
+ break
436
+ if no_nuc:
437
+ nucl_not_found.append(resn)
438
+ good = False
439
+
440
+ if nucl_not_found:
441
+ logger.writeln("WARNING: nucleus distance is not found for: {}".format(" ".join(nucl_not_found)))
442
+ logger.writeln(" default scale ({}) is used for nucleus distances.".format(default_proton_scale))
443
+ return good
444
+ # check_monlib_support_nucleus_distances()
445
+
446
+ def remove_duplicated_links(connections):
447
+ # ignore p.res_id.name?
448
+ totuple = lambda p: (p.chain_name, p.res_id.seqid.num, p.res_id.seqid.icode, p.atom_name, p.altloc)
449
+ dic = {}
450
+ for i, con in enumerate(connections):
451
+ dic.setdefault(tuple(sorted([totuple(con.partner1), totuple(con.partner2)])), []).append(i)
452
+ todel = []
453
+ for k in dic:
454
+ if len(dic[k]) > 1:
455
+ ids = set(connections[c].link_id for c in dic[k] if connections[c].link_id.strip())
456
+ if len(ids) > 1:
457
+ logger.writeln(" WARNING: duplicated links are found with different link_id")
458
+ tokeep = dic[k][0]
459
+ if ids:
460
+ for c in dic[k]:
461
+ if connections[c].link_id.strip():
462
+ tokeep = c
463
+ break
464
+ todel.extend(c for c in dic[k] if c != tokeep)
465
+
466
+ for i in sorted(todel, reverse=True):
467
+ del connections[i]
468
+ if todel:
469
+ logger.writeln(" {} duplicated links were removed.".format(len(todel)))
470
+ # remove_duplicated_links()
471
+
472
+ def find_and_fix_links(st, monlib, bond_margin=1.3, find_metal_links=True, add_found=True, find_symmetry_related=True,
473
+ metal_margin=1.1, add_only_from=None):
474
+ metalc = MetalCoordination(monlib)
475
+ """
476
+ Identify link ids for st.connections and find new links
477
+ This is required for correctly recognizing link in gemmi.prepare_topology
478
+ Note that it ignores segment IDs
479
+ FIXME it assumes only one bond exists in a link. It may not be the case in future.
480
+ """
481
+ from servalcat.utils import model
482
+
483
+ logger.writeln("Checking links defined in the model")
484
+ remove_duplicated_links(st.connections)
485
+ for con in st.connections:
486
+ if con.type == gemmi.ConnectionType.Hydrog: continue
487
+ if con.link_id == "gap": continue # TODO check residues?
488
+ cra1, cra2 = st[0].find_cra(con.partner1, ignore_segment=True), st[0].find_cra(con.partner2, ignore_segment=True)
489
+ if None in (cra1.atom, cra2.atom):
490
+ logger.writeln(" WARNING: atom(s) not found for link: id= {} atom1= {} atom2= {}".format(con.link_id, con.partner1, con.partner2))
491
+ continue
492
+ if cra1.atom.element.is_metal or cra2.atom.element.is_metal:
493
+ con.type = gemmi.ConnectionType.MetalC
494
+ if con.asu != gemmi.Asu.Same: # XXX info from metadata may be wrong
495
+ nimage = st.cell.find_nearest_image(cra1.atom.pos, cra2.atom.pos, con.asu)
496
+ image_idx = nimage.sym_idx
497
+ dist = nimage.dist()
498
+ else:
499
+ image_idx = 0
500
+ con.asu = gemmi.Asu.Same
501
+ dist = cra1.atom.pos.dist(cra2.atom.pos)
502
+ con.reported_distance = dist
503
+ atoms_str = "atom1= {} atom2= {} image= {}".format(cra1, cra2, image_idx)
504
+ if con.link_id:
505
+ link = monlib.get_link(con.link_id)
506
+ inv = False
507
+ if link is None:
508
+ logger.writeln(" WARNING: link {} not found in the library. Please provide link dictionary.".format(con.link_id))
509
+ con.link_id = "" # let gemmi find proper link in prepare_topology()
510
+ continue
511
+ else:
512
+ match, _, _ = monlib.test_link(link, cra1.residue.name, cra1.atom.name, cra2.residue.name, cra2.atom.name)
513
+ if not match and monlib.test_link(link, cra2.residue.name, cra2.atom.name, cra1.residue.name, cra1.atom.name)[0]:
514
+ match = True
515
+ inv = True
516
+ if not match:
517
+ logger.writeln(" WARNING: link id and atoms mismatch: id= {} {}".format(link.id, atoms_str))
518
+ continue
519
+ else:
520
+ link, inv, _, _ = monlib.match_link(cra1.residue, cra1.atom.name, cra1.atom.altloc,
521
+ cra2.residue, cra2.atom.name, cra2.atom.altloc)
522
+ if link:
523
+ con.link_id = link.id
524
+ elif con.type == gemmi.ConnectionType.MetalC:
525
+ logger.writeln(" Metal link will be added: {} dist= {:.2f}".format(atoms_str, dist))
526
+ if cra2.atom.element.is_metal:
527
+ inv = True # make metal first
528
+ else:
529
+ ideal_dist = monlib.find_ideal_distance(cra1, cra2)
530
+ logger.writeln(" Link unidentified (simple bond will be used): {} dist= {:.2f} ideal= {:.2f}".format(atoms_str,
531
+ dist,
532
+ ideal_dist))
533
+ continue
534
+ if link:
535
+ logger.writeln(" Link confirmed: id= {} {} dist= {:.2f} ideal= {:.2f}".format(link.id,
536
+ atoms_str,
537
+ dist,
538
+ link.rt.bonds[0].value))
539
+ if con.link_id == "disulf":
540
+ con.type = gemmi.ConnectionType.Disulf
541
+ if inv:
542
+ con.partner1 = model.cra_to_atomaddress(cra2)
543
+ con.partner2 = model.cra_to_atomaddress(cra1)
544
+ if len(st.connections) == 0:
545
+ logger.writeln(" no links defined in the model")
546
+
547
+ logger.writeln("Finding new links (will be added if marked by *)")
548
+ ns = gemmi.NeighborSearch(st[0], st.cell, 5.).populate()
549
+ cs = gemmi.ContactSearch(4.)
550
+ cs.ignore = gemmi.ContactSearch.Ignore.AdjacentResidues # may miss polymer links not contiguous in a chain?
551
+ results = cs.find_contacts(ns)
552
+ onsb = set(gemmi.Element(x) for x in "ONSB")
553
+ n_found = 0
554
+ for r in results:
555
+ if st.find_connection_by_cra(r.partner1, r.partner2, ignore_segment=True): continue
556
+ link, inv, _, _ = monlib.match_link(r.partner1.residue, r.partner1.atom.name, r.partner1.atom.altloc,
557
+ r.partner2.residue, r.partner2.atom.name, r.partner2.atom.altloc,
558
+ (r.dist / 1.4)**2)
559
+ if link is None and r.partner2.atom.element.is_metal:
560
+ inv = True # make metal first
561
+ if inv:
562
+ cra1, cra2 = r.partner2, r.partner1
563
+ else:
564
+ cra1, cra2 = r.partner1, r.partner2
565
+ im = st.cell.find_nearest_pbc_image(cra1.atom.pos, cra2.atom.pos, r.image_idx)
566
+ #assert r.image_idx == im.sym_idx # should we check this?
567
+ if not find_symmetry_related and not im.same_asu():
568
+ continue
569
+ atoms_str = "atom1= {} atom2= {} image= {}".format(cra1, cra2, r.image_idx)
570
+ if im.pbc_shift != (0,0,0):
571
+ atoms_str += " ({},{},{})".format(*im.pbc_shift)
572
+ if link:
573
+ if r.dist > link.rt.bonds[0].value * bond_margin: continue
574
+ will_be_added = add_found and (not add_only_from or link.id in add_only_from)
575
+ logger.writeln(" {}New link found: id= {} {} dist= {:.2f} ideal= {:.2f}".format("*" if will_be_added else " ",
576
+ link.id,
577
+ atoms_str,
578
+ r.dist,
579
+ link.rt.bonds[0].value))
580
+ elif find_metal_links:
581
+ # link only metal - O/N/S/B
582
+ if r.partner1.atom.element.is_metal == r.partner2.atom.element.is_metal: continue
583
+ if not cra2.atom.element in onsb: continue
584
+ max_ideal = metalc.find_max_dist(cra1, cra2)
585
+ if r.dist > max_ideal * metal_margin: continue # tolerance should be smaller than that for other links
586
+ will_be_added = add_found
587
+ logger.writeln(" {}Metal link found: {} dist= {:.2f} max_ideal= {:.2f}".format("*" if will_be_added else " ",
588
+ atoms_str,
589
+ r.dist, max_ideal))
590
+ else:
591
+ continue
592
+ n_found += 1
593
+ if not will_be_added: continue
594
+ con = gemmi.Connection()
595
+ con.name = "added{}".format(n_found)
596
+ if link:
597
+ con.link_id = link.id
598
+ con.type = gemmi.ConnectionType.Disulf if link.id == "disulf" else gemmi.ConnectionType.Covale
599
+ if cra1.atom.element.is_metal or cra2.atom.element.is_metal:
600
+ con.type = gemmi.ConnectionType.MetalC
601
+ con.asu = gemmi.Asu.Same if im.same_asu() else gemmi.Asu.Different
602
+ con.partner1 = model.cra_to_atomaddress(cra1)
603
+ con.partner2 = model.cra_to_atomaddress(cra2)
604
+ con.reported_distance = r.dist
605
+ st.connections.append(con)
606
+ if n_found == 0:
607
+ logger.writeln(" no links found")
608
+ # find_and_fix_links()
609
+
610
+ def add_hydrogens(st, monlib, pos="elec"):
611
+ assert pos in ("elec", "nucl")
612
+ topo = prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.ReAddButWater, ignore_unknown_links=True)
613
+
614
+ if pos == "nucl":
615
+ logger.writeln("Generating hydrogens at nucleus positions")
616
+ resnames = st[0].get_all_residue_names()
617
+ check_monlib_support_nucleus_distances(monlib, resnames)
618
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus, default_scale=default_proton_scale)
619
+ else:
620
+ logger.writeln("Generating hydrogens at electron positions")
621
+ # add_hydrogens()
622
+
623
+ def make_atom_spec(cra):
624
+ chain = cra.chain.name
625
+ resi = cra.residue.seqid.num
626
+ ins = cra.residue.seqid.icode
627
+ atom = cra.atom.name
628
+ s = "chain {} resi {} ins {} atom {}".format(chain, resi, ins if ins.strip() else ".", atom)
629
+ if cra.atom.altloc != "\0":
630
+ s += " alt {}".format(cra.atom.altloc)
631
+ return s
632
+ # make_atom_spec()
633
+
634
+ def dictionary_block_names(monlib, topo):
635
+ used = {x.lower() for x in monlib.monomers}
636
+ for chain_info in topo.chain_infos:
637
+ for res_info in chain_info.res_infos:
638
+ for link in res_info.prev:
639
+ # won't be included if the name starts with "auto-", but don't do such checks here
640
+ used.add("link_" + link.link_id.lower())
641
+ for mod in res_info.mods:
642
+ used.add("mod_" + mod.id.lower())
643
+ for extra in topo.extras:
644
+ used.add("link_" + extra.link_id.lower())
645
+ return used
646
+ # dictionary_block_names()
647
+
648
+ def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
649
+ logger.writeln("Finding NCS..")
650
+ polymers = {}
651
+ for chain in st[0]:
652
+ rs = chain.get_polymer()
653
+ p_type = rs.check_polymer_type()
654
+ if p_type in (gemmi.PolymerType.PeptideL, gemmi.PolymerType.PeptideD,
655
+ gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid):
656
+ polymers.setdefault(p_type, []).append((chain, rs))
657
+
658
+ scoring = gemmi.AlignmentScoring("p") # AlignmentScoring::partial_model
659
+ al_res = []
660
+ ncslist = ext.NcsList()
661
+ for pt in polymers:
662
+ #print(pt, [x[0].name for x in polymers[pt]])
663
+ pols = polymers[pt]
664
+ for i in range(len(pols)-1):
665
+ q = [x.name for x in pols[i][1]]
666
+ for j in range(i+1, len(pols)):
667
+ al = gemmi.align_sequence_to_polymer(q, pols[j][1], pt, scoring)
668
+ if 0: # debug
669
+ wrap_width = 100
670
+ logger.writeln(f"seq1: {pols[i][0].name} {pols[i][1][0].seqid}..{pols[i][1][-1].seqid}")
671
+ logger.writeln(f"seq2: {pols[j][0].name} {pols[j][1][0].seqid}..{pols[j][1][-1].seqid}")
672
+ logger.writeln(f"match_count: {al.match_count}")
673
+ s1 = gemmi.one_letter_code(q)
674
+ p_seq = gemmi.one_letter_code(pols[j][1].extract_sequence())
675
+ p1, p2 = al.add_gaps(s1, 1), al.add_gaps(p_seq, 2)
676
+ for k in range(0, len(p1), wrap_width):
677
+ logger.writeln(" seq. {}".format(p1[k:k+wrap_width]))
678
+ logger.writeln(" {}".format(al.match_string[k:k+wrap_width]))
679
+ logger.writeln(" model {}\n".format(p2[k:k+wrap_width]))
680
+ if al.match_count < min_nalign: continue
681
+ su = gemmi.calculate_superposition(pols[i][1], pols[j][1], pt, gemmi.SupSelect.All)
682
+ obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1])
683
+ obj.calculate_local_rms(rms_loc_nlen)
684
+ if len(obj.local_rms) == 0 or numpy.all(numpy.isnan(obj.local_rms)):
685
+ continue
686
+ ave_local_rms = numpy.nanmean(obj.local_rms)
687
+ if ave_local_rms > max_rms_loc: continue
688
+ ncslist.ncss.append(obj)
689
+ al_res.append({"chain_1": "{} ({}..{})".format(pols[i][0].name, pols[i][1][0].seqid, pols[i][1][-1].seqid),
690
+ "chain_2": "{} ({}..{})".format(pols[j][0].name, pols[j][1][0].seqid, pols[j][1][-1].seqid),
691
+ "aligned": al.match_count,
692
+ "identity": al.calculate_identity(1),
693
+ "rms": su.rmsd,
694
+ "ave(rmsloc)": ave_local_rms,
695
+ })
696
+ ncslist.set_pairs()
697
+ df = pandas.DataFrame(al_res)
698
+ df.index += 1
699
+ logger.writeln(df.to_string(float_format="%.2f"))
700
+ return ncslist
701
+ # prepare_ncs_restraints()
702
+
703
+ class MetalCoordination:
704
+ def __init__(self, monlib, dbfile=None):
705
+ self.monlib = monlib
706
+ if dbfile is None:
707
+ dbfile = os.path.join(monlib.path(), "metals.json")
708
+ if os.path.exists(dbfile):
709
+ self.metals = json.load(open(dbfile))["metal_coordination"]
710
+ else:
711
+ self.metals = {}
712
+ logger.writeln("WARNING: {} not found".format(dbfile))
713
+ # __init__()
714
+
715
+ def find_max_dist(self, cra_metal, cra_ligand):
716
+ vals = self.find_ideal_distances(cra_metal.atom.element, cra_ligand.atom.element)
717
+ if len(vals) == 0:
718
+ # if not found
719
+ return self.monlib.find_ideal_distance(cra_metal, cra_ligand)
720
+ return max(x["median"] for x in vals)
721
+ # find_max_dist()
722
+
723
+ def find_ideal_distances(self, el_metal, el_ligand):
724
+ ideals = {}
725
+ if el_metal.name not in self.metals or el_ligand.name not in self.metals[el_metal.name]:
726
+ return []
727
+ return self.metals[el_metal.name][el_ligand.name]
728
+ # find_ideal_distances
729
+
730
+ def setup_restraints(self, st):
731
+ ret = [] # returns Refmac keywords
732
+ lookup = {x.atom: x for x in st[0].all()}
733
+ coords = {}
734
+ todel = []
735
+ for i, con in enumerate(st.connections):
736
+ if con.link_id == "" and con.type == gemmi.ConnectionType.MetalC:
737
+ cra1 = st[0].find_cra(con.partner1, ignore_segment=True)
738
+ cra2 = st[0].find_cra(con.partner2, ignore_segment=True)
739
+ if None in (cra1.atom, cra2.atom): continue
740
+ ener_ideal = self.monlib.find_ideal_distance(cra1, cra2)
741
+ coords.setdefault(cra1.atom.element, {}).setdefault(cra1.atom, []).append((cra2.atom, i, ener_ideal))
742
+ if coords:
743
+ logger.writeln("Metal coordinations detected")
744
+ for metal in coords:
745
+ logger.writeln(" Metal: {}".format(metal.name))
746
+ ligand_els = {x[0].element for m in coords[metal] for x in coords[metal][m]}
747
+ logger.writeln(" ideal distances")
748
+ ideals = {}
749
+ for el in ligand_els:
750
+ logger.write(" {}: ".format(el.name))
751
+ vals = self.find_ideal_distances(metal, el)
752
+ if len(vals) == 0:
753
+ ener_ideals = {x[2] for m in coords[metal] for x in coords[metal][m] if x[0].element == el}
754
+ logger.write(" ".join("{:.2f}".format(x) for x in ener_ideals))
755
+ logger.writeln(" (from ener_lib)")
756
+ else:
757
+ logger.writeln(" ".join("{:.4f} ({} coord)".format(x["median"], x["coord"]) for x in vals))
758
+ ideals[el] = [(x["median"], x["mad"]) for x in vals if x["mad"] > 0]
759
+ logger.writeln("")
760
+ for i, am in enumerate(coords[metal]):
761
+ logger.writeln(" site {}: {}".format(i+1, lookup[am]))
762
+ for j, (lig, con_idx, _) in enumerate(coords[metal][am]):
763
+ con = st.connections[con_idx]
764
+ logger.writeln(" ligand {}: {} dist= {:.2f}".format(j+1, lookup[lig],
765
+ con.reported_distance))
766
+ specs = [make_atom_spec(x) for x in (lookup[am], lookup[lig])]
767
+ if lig.element not in ideals:
768
+ continue
769
+ todel.append(con_idx)
770
+ for k, (ideal, sigma) in enumerate(ideals[lig.element]):
771
+ exte_str = "exte dist first {} seco {} ".format(*specs)
772
+ exte_str += "valu {:.4f} sigm {:.4f} type 1 ".format(ideal, sigma)
773
+ if con.asu == gemmi.Asu.Different:
774
+ exte_str += "symm y"
775
+ ret.append(exte_str)
776
+ #b = ext.Geometry.Bond(am, lig)
777
+ #b.values.append(ext.Geometry.Bond.Value(ideal, sigma, ideal, sigma))
778
+ #b.type = 0 if k == 0 else 1
779
+ #ret.append(b)
780
+ logger.writeln("")
781
+ return ret, list(set(todel))
782
+ # setup_restraints()