servalcat 0.4.60__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (44) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cp312-win_amd64.pyd +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +733 -0
  7. servalcat/refine/refine_geom.py +207 -0
  8. servalcat/refine/refine_spa.py +327 -0
  9. servalcat/refine/refine_xtal.py +242 -0
  10. servalcat/refine/spa.py +132 -0
  11. servalcat/refine/xtal.py +227 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +536 -0
  15. servalcat/refmac/refmac_wrapper.py +360 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +462 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +961 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1277 -0
  27. servalcat/utils/fileio.py +745 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +699 -0
  30. servalcat/utils/logger.py +116 -0
  31. servalcat/utils/maps.py +340 -0
  32. servalcat/utils/model.py +774 -0
  33. servalcat/utils/refmac.py +747 -0
  34. servalcat/utils/restraints.py +605 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +250 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1403 -0
  40. servalcat-0.4.60.dist-info/METADATA +56 -0
  41. servalcat-0.4.60.dist-info/RECORD +44 -0
  42. servalcat-0.4.60.dist-info/WHEEL +5 -0
  43. servalcat-0.4.60.dist-info/entry_points.txt +4 -0
  44. servalcat-0.4.60.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,605 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ from servalcat.utils import logger
10
+ from servalcat import ext
11
+ import os
12
+ import io
13
+ import gemmi
14
+ import string
15
+ import random
16
+ import numpy
17
+ import pandas
18
+ import json
19
+
20
+ default_proton_scale = 1.13 # scale of X-proton distance to X-H(e) distance
21
+
22
+ def decide_new_mod_id(mod_id, mods):
23
+ # Refmac only allows up to 8 characters
24
+ letters = string.digits + string.ascii_lowercase
25
+ if len(mod_id) < 8:
26
+ for l in letters:
27
+ new_id = "{}{}{}".format(mod_id, "" if len(mod_id)==7 else "-", l)
28
+ if new_id not in mods:
29
+ return new_id
30
+
31
+ # give up keeping original name
32
+ while True: # XXX risk of infinite loop.. less likely though
33
+ new_id = "mod" + "".join([random.choice(letters) for _ in range(4)])
34
+ if new_id not in mods:
35
+ return new_id
36
+ # decide_new_mod_id()
37
+
38
+ def rename_cif_modification_if_necessary(doc, known_ids):
39
+ # FIXME Problematic if other file refers to modification that is renamed in this function - but how can we know?
40
+ trans = {}
41
+ for b in doc:
42
+ for row in b.find("_chem_mod.", ["id"]):
43
+ mod_id = row.str(0)
44
+ if mod_id in known_ids:
45
+ new_id = decide_new_mod_id(mod_id, known_ids)
46
+ trans[mod_id] = new_id
47
+ row[0] = new_id # modify id
48
+ logger.writeln("INFO:: renaming modification id {} to {}".format(mod_id, new_id))
49
+
50
+ # modify ids in mod_* blocks
51
+ for mod_id in trans:
52
+ b = doc.find_block("mod_{}".format(mod_id))
53
+ if not b: # should raise error?
54
+ logger.writeln("WARNING:: inconsistent mod description for {}".format(mod_id))
55
+ continue
56
+ b.name = "mod_{}".format(trans[mod_id]) # modify name
57
+ for item in b:
58
+ for tag in item.loop.tags:
59
+ if tag.endswith(".mod_id"):
60
+ for row in b.find(tag[:tag.rindex(".")+1], ["mod_id"]):
61
+ row[0] = trans[mod_id]
62
+
63
+ # Update mod id in links
64
+ if trans:
65
+ for b in doc:
66
+ for row in b.find("_chem_link.", ["mod_id_1", "mod_id_2"]):
67
+ for i in range(2):
68
+ if row.str(i) in trans:
69
+ row[i] = trans[row.str(i)]
70
+
71
+ return trans
72
+ # rename_cif_modification_if_necessary()
73
+
74
+ def load_monomer_library(st, monomer_dir=None, cif_files=None, stop_for_unknowns=False,
75
+ ignore_monomer_dir=False, update_old_atom_names=True):
76
+ resnames = st[0].get_all_residue_names()
77
+
78
+ if monomer_dir is None and not ignore_monomer_dir:
79
+ if "CLIBD_MON" not in os.environ:
80
+ logger.error("WARNING: CLIBD_MON is not set")
81
+ else:
82
+ monomer_dir = os.environ["CLIBD_MON"]
83
+
84
+ if cif_files is None:
85
+ cif_files = []
86
+
87
+ if monomer_dir and not ignore_monomer_dir:
88
+ if not os.path.isdir(monomer_dir):
89
+ logger.error("ERROR: not a directory: {}".format(monomer_dir))
90
+ return
91
+
92
+ logger.writeln("Reading monomers from {}".format(monomer_dir))
93
+ monlib = gemmi.read_monomer_lib(monomer_dir, resnames, ignore_missing=True)
94
+ else:
95
+ monlib = gemmi.MonLib()
96
+
97
+ for f in cif_files:
98
+ logger.writeln("Reading monomer: {}".format(f))
99
+ doc = gemmi.cif.read(f)
100
+ for b in doc:
101
+ atom_id_list = b.find_values("_chem_comp_atom.atom_id")
102
+ if atom_id_list:
103
+ name = b.name.replace("comp_", "")
104
+ if name in monlib.monomers:
105
+ logger.writeln("WARNING:: updating monomer {} using {}".format(name, f))
106
+ del monlib.monomers[name]
107
+
108
+ # Check if bond length values are included
109
+ # This is to fail if cif file is e.g. from PDB website
110
+ if len(atom_id_list) > 1 and not b.find_values("_chem_comp_bond.value_dist"):
111
+ raise RuntimeError("{} does not contain bond length value for {}. You need to generate restraints (e.g. using acedrg).".format(f, name))
112
+
113
+ for row in b.find("_chem_link.", ["id"]):
114
+ link_id = row.str(0)
115
+ if link_id in monlib.links:
116
+ logger.writeln("WARNING:: updating link {} using {}".format(link_id, f))
117
+ del monlib.links[link_id]
118
+
119
+ # If modification id is duplicated, need to rename
120
+ rename_cif_modification_if_necessary(doc, monlib.modifications)
121
+ monlib.read_monomer_doc(doc)
122
+ for b in doc:
123
+ for row in b.find("_chem_comp.", ["id", "group"]):
124
+ if row.str(0) in monlib.monomers:
125
+ monlib.monomers[row.str(0)].set_group(row.str(1))
126
+
127
+ not_loaded = set(resnames).difference(monlib.monomers)
128
+ if not_loaded:
129
+ logger.writeln("WARNING: monomers not loaded: {}".format(" ".join(not_loaded)))
130
+
131
+ logger.writeln("Monomer library loaded: {} monomers, {} links, {} modifications".format(len(monlib.monomers),
132
+ len(monlib.links),
133
+ len(monlib.modifications)))
134
+ logger.writeln(" loaded monomers: {}".format(" ".join([x for x in monlib.monomers])))
135
+ logger.writeln("")
136
+
137
+ logger.writeln("Checking if unknown atoms exist..")
138
+
139
+ unknown_cc = set()
140
+ for chain in st[0]: unknown_cc.update(res.name for res in chain if res.name not in monlib.monomers)
141
+ if unknown_cc:
142
+ if stop_for_unknowns:
143
+ raise RuntimeError("Provide restraint cif file(s) for {}".format(",".join(unknown_cc)))
144
+ else:
145
+ logger.writeln("WARNING: ad-hoc restraints will be generated for {}".format(",".join(unknown_cc)))
146
+ logger.writeln(" it is strongly recommended to generate them using AceDRG.")
147
+
148
+ if update_old_atom_names:
149
+ logger.write(monlib.update_old_atom_names(st))
150
+
151
+ return monlib
152
+ # load_monomer_library()
153
+
154
+ def prepare_topology(st, monlib, h_change, ignore_unknown_links=False, raise_error=True, check_hydrogen=False,
155
+ use_cispeps=False, add_metal_restraints=True):
156
+ # Check duplicated atoms
157
+ bad = []
158
+ for chain in st[0]:
159
+ bad_res = []
160
+ for res in chain:
161
+ n_uniq = len({(a.name, a.altloc) for a in res})
162
+ if n_uniq != len(res):
163
+ bad_res.append(str(res.seqid))
164
+ if bad_res:
165
+ bad.append(" chain {}: {}".format(chain.name, " ".join(bad_res)))
166
+ if bad:
167
+ raise RuntimeError("Following residues have duplicated atoms. Check your model.\n{}".format("\n".join(bad)))
168
+
169
+ if add_metal_restraints:
170
+ metalc = MetalCoordination(monlib)
171
+ keywords, todel = metalc.setup_restraints(st)
172
+ con_bak = []
173
+ for i in sorted(todel, reverse=True):
174
+ # temporarily remove connection not to put a bond restraint
175
+ con = st.connections.pop(i)
176
+ con_bak.append((i, con))
177
+ # flag non-hydrogen
178
+ cra2 = st[0].find_cra(con.partner2, ignore_segment=True)
179
+ cra2.atom.calc_flag = gemmi.CalcFlag.NoHydrogen
180
+ else:
181
+ keywords = []
182
+ # these checks can be done after sorting links
183
+ logger.writeln("Creating restraints..")
184
+ sio = io.StringIO()
185
+ topo = gemmi.prepare_topology(st, monlib, h_change=h_change, warnings=sio, reorder=False,
186
+ ignore_unknown_links=ignore_unknown_links, use_cispeps=use_cispeps)
187
+ for l in sio.getvalue().splitlines(): logger.writeln(" " + l)
188
+ unknown_cc = set()
189
+ link_related = set()
190
+ nan_hydr = set()
191
+
192
+ def extra_defined(res1, res2): # TODO should check alt
193
+ for link in topo.extras:
194
+ res12 = (link.res1, link.res2)
195
+ if link.link_id and (res12 == (res1, res2) or res12 == (res2, res1)):
196
+ return True
197
+ return False
198
+
199
+ # collect info
200
+ info = {}
201
+ for cinfo in topo.chain_infos:
202
+ toadd = info.setdefault(cinfo.chain_ref.name, {})
203
+ if cinfo.polymer:
204
+ gaps = []
205
+ for rinfo in cinfo.res_infos:
206
+ if (rinfo.prev and rinfo.prev[0].link_id in ("gap", "") and
207
+ not extra_defined(rinfo.prev[0].res1, rinfo.prev[0].res2)):
208
+ gaps.append((rinfo.prev[0].res1, rinfo.prev[0].res2))
209
+ toadd["polymer"] = (str(cinfo.polymer_type).replace("PolymerType.", ""),
210
+ cinfo.res_infos[0].res.seqid,
211
+ cinfo.res_infos[-1].res.seqid,
212
+ len(cinfo.res_infos), gaps)
213
+ else:
214
+ l = toadd.setdefault("nonpolymer", [])
215
+ for ri in cinfo.res_infos:
216
+ l.append(ri.res.name)
217
+ logger.writeln("\nChain info:")
218
+ for chain in info:
219
+ logger.writeln(" chain {}".format(chain))
220
+ if "polymer" in info[chain]:
221
+ logger.writeln(" {}: {}..{} ({} residues)".format(*info[chain]["polymer"][:-1]))
222
+ for gap in info[chain]["polymer"][-1]:
223
+ logger.writeln(" gap between {} and {}".format(*gap))
224
+ if "nonpolymer" in info[chain]:
225
+ n_res = len(info[chain]["nonpolymer"])
226
+ uniq = set(info[chain]["nonpolymer"])
227
+ logger.writeln(" ligands: {} ({} residues)".format(" ".join(uniq), n_res))
228
+ logger.writeln("")
229
+
230
+ for cinfo in topo.chain_infos:
231
+ for rinfo in cinfo.res_infos:
232
+ cc_org = monlib.monomers[rinfo.res.name] if rinfo.res.name in monlib.monomers else None
233
+ for ia in reversed(range(len(rinfo.res))):
234
+ atom = rinfo.res[ia]
235
+ atom_str = "{}/{} {}/{}".format(cinfo.chain_ref.name, rinfo.res.name, rinfo.res.seqid, atom.name)
236
+ cc = rinfo.get_final_chemcomp(atom.altloc)
237
+ if not cc.find_atom(atom.name):
238
+ # warning message should have already been given by gemmi
239
+ if cc_org and cc_org.find_atom(atom.name):
240
+ if check_hydrogen or not atom.is_hydrogen():
241
+ link_related.add(rinfo.res.name)
242
+ else:
243
+ if check_hydrogen or not atom.is_hydrogen():
244
+ unknown_cc.add(rinfo.res.name)
245
+
246
+ if atom.is_hydrogen() and atom.calc_flag == gemmi.CalcFlag.Dummy:
247
+ logger.writeln(" Warning: hydrogen {} could not be added - Check dictionary".format(atom_str))
248
+ unknown_cc.add(rinfo.res.name)
249
+ elif any(numpy.isnan(atom.pos.tolist())): # TODO add NaN test before prepare_toplogy
250
+ logger.writeln(" Warning: {} position NaN!".format(atom_str))
251
+ nan_hydr.add(rinfo.res.name)
252
+
253
+ if raise_error and (unknown_cc or link_related):
254
+ msgs = []
255
+ if unknown_cc: msgs.append("restraint cif file(s) for {}".format(",".join(unknown_cc)))
256
+ if link_related: msgs.append("proper link cif file(s) for {} or check your model".format(",".join(link_related)))
257
+ raise RuntimeError("Provide {}".format(" and ".join(msgs)))
258
+ if raise_error and nan_hydr:
259
+ raise RuntimeError("Some hydrogen positions became NaN. The geometry of your model may be of low quality. Consider not adding hydrogen")
260
+ if not use_cispeps:
261
+ topo.set_cispeps_in_structure(st)
262
+ if add_metal_restraints:
263
+ for i, con in sorted(con_bak):
264
+ st.connections.insert(i, con)
265
+ return topo, keywords
266
+ # prepare_topology()
267
+
268
+ def check_monlib_support_nucleus_distances(monlib, resnames):
269
+ good = True
270
+ nucl_not_found = []
271
+ for resn in resnames:
272
+ if resn not in monlib.monomers:
273
+ logger.error("ERROR: monomer information of {} not loaded".format(resn))
274
+ good = False
275
+ else:
276
+ mon = monlib.monomers[resn]
277
+ no_nuc = False
278
+ for bond in mon.rt.bonds:
279
+ is_h = (mon.get_atom(bond.id1.atom).is_hydrogen(), mon.get_atom(bond.id2.atom).is_hydrogen())
280
+ if any(is_h) and bond.value_nucleus != bond.value_nucleus:
281
+ no_nuc = True
282
+ break
283
+ if no_nuc:
284
+ nucl_not_found.append(resn)
285
+ good = False
286
+
287
+ if nucl_not_found:
288
+ logger.writeln("WARNING: nucleus distance is not found for: {}".format(" ".join(nucl_not_found)))
289
+ logger.writeln(" default scale ({}) is used for nucleus distances.".format(default_proton_scale))
290
+ return good
291
+ # check_monlib_support_nucleus_distances()
292
+
293
+ def remove_duplicated_links(connections):
294
+ # ignore p.res_id.name?
295
+ totuple = lambda p: (p.chain_name, p.res_id.seqid.num, p.res_id.seqid.icode, p.atom_name, p.altloc)
296
+ dic = {}
297
+ for i, con in enumerate(connections):
298
+ dic.setdefault(tuple(sorted([totuple(con.partner1), totuple(con.partner2)])), []).append(i)
299
+ todel = []
300
+ for k in dic:
301
+ if len(dic[k]) > 1:
302
+ ids = set(connections[c].link_id for c in dic[k] if connections[c].link_id.strip())
303
+ if len(ids) > 1:
304
+ logger.writeln(" WARNING: duplicated links are found with different link_id")
305
+ tokeep = dic[k][0]
306
+ if ids:
307
+ for c in dic[k]:
308
+ if connections[c].link_id.strip():
309
+ tokeep = c
310
+ break
311
+ todel.extend(c for c in dic[k] if c != tokeep)
312
+
313
+ for i in sorted(todel, reverse=True):
314
+ del connections[i]
315
+ if todel:
316
+ logger.writeln(" {} duplicated links were removed.".format(len(todel)))
317
+ # remove_duplicated_links()
318
+
319
+ def find_and_fix_links(st, monlib, bond_margin=1.3, find_metal_links=True, add_found=True, find_symmetry_related=True,
320
+ add_only_from=None):
321
+ metalc = MetalCoordination(monlib)
322
+ """
323
+ Identify link ids for st.connections and find new links
324
+ This is required for correctly recognizing link in gemmi.prepare_topology
325
+ Note that it ignores segment IDs
326
+ FIXME it assumes only one bond exists in a link. It may not be the case in future.
327
+ """
328
+ from servalcat.utils import model
329
+
330
+ logger.writeln("Checking links defined in the model")
331
+ remove_duplicated_links(st.connections)
332
+ for con in st.connections:
333
+ if con.type == gemmi.ConnectionType.Hydrog: continue
334
+ if con.link_id == "gap": continue # TODO check residues?
335
+ cra1, cra2 = st[0].find_cra(con.partner1, ignore_segment=True), st[0].find_cra(con.partner2, ignore_segment=True)
336
+ if None in (cra1.atom, cra2.atom):
337
+ logger.writeln(" WARNING: atom(s) not found for link: id= {} atom1= {} atom2= {}".format(con.link_id, con.partner1, con.partner2))
338
+ continue
339
+ if cra1.atom.element.is_metal or cra2.atom.element.is_metal:
340
+ con.type = gemmi.ConnectionType.MetalC
341
+ if con.asu != gemmi.Asu.Same: # XXX info from metadata may be wrong
342
+ nimage = st.cell.find_nearest_image(cra1.atom.pos, cra2.atom.pos, con.asu)
343
+ image_idx = nimage.sym_idx
344
+ dist = nimage.dist()
345
+ else:
346
+ image_idx = 0
347
+ con.asu = gemmi.Asu.Same
348
+ dist = cra1.atom.pos.dist(cra2.atom.pos)
349
+ con.reported_distance = dist
350
+ atoms_str = "atom1= {} atom2= {} image= {}".format(cra1, cra2, image_idx)
351
+ if con.link_id:
352
+ link = monlib.get_link(con.link_id)
353
+ inv = False
354
+ if link is None:
355
+ logger.writeln(" WARNING: link {} not found in the library. Please provide link dictionary.".format(con.link_id))
356
+ con.link_id = "" # let gemmi find proper link in prepare_topology()
357
+ continue
358
+ else:
359
+ match, _, _ = monlib.test_link(link, cra1.residue.name, cra1.atom.name, cra2.residue.name, cra2.atom.name)
360
+ if not match and monlib.test_link(link, cra2.residue.name, cra2.atom.name, cra1.residue.name, cra1.atom.name)[0]:
361
+ match = True
362
+ inv = True
363
+ if not match:
364
+ logger.writeln(" WARNING: link id and atoms mismatch: id= {} {}".format(link.id, atoms_str))
365
+ continue
366
+ else:
367
+ link, inv, _, _ = monlib.match_link(cra1.residue, cra1.atom.name, cra1.atom.altloc,
368
+ cra2.residue, cra2.atom.name, cra2.atom.altloc)
369
+ if link:
370
+ con.link_id = link.id
371
+ elif find_metal_links and con.type == gemmi.ConnectionType.MetalC:
372
+ logger.writeln(" Metal link will be added: {} dist= {:.2f}".format(atoms_str, dist))
373
+ if cra2.atom.element.is_metal:
374
+ inv = True # make metal first
375
+ else:
376
+ ideal_dist = monlib.find_ideal_distance(cra1, cra2)
377
+ logger.writeln(" Link unidentified (simple bond will be used): {} dist= {:.2f} ideal= {:.2f}".format(atoms_str,
378
+ dist,
379
+ ideal_dist))
380
+ continue
381
+ if link:
382
+ logger.writeln(" Link confirmed: id= {} {} dist= {:.2f} ideal= {:.2f}".format(link.id,
383
+ atoms_str,
384
+ dist,
385
+ link.rt.bonds[0].value))
386
+ if con.link_id == "disulf":
387
+ con.type = gemmi.ConnectionType.Disulf
388
+ if inv:
389
+ con.partner1 = model.cra_to_atomaddress(cra2)
390
+ con.partner2 = model.cra_to_atomaddress(cra1)
391
+ if len(st.connections) == 0:
392
+ logger.writeln(" no links defined in the model")
393
+
394
+ logger.writeln("Finding new links (will be added if marked by *)")
395
+ ns = gemmi.NeighborSearch(st[0], st.cell, 5.).populate()
396
+ cs = gemmi.ContactSearch(4.)
397
+ cs.ignore = gemmi.ContactSearch.Ignore.AdjacentResidues # may miss polymer links not contiguous in a chain?
398
+ results = cs.find_contacts(ns)
399
+ onsb = set(gemmi.Element(x) for x in "ONSB")
400
+ n_found = 0
401
+ for r in results:
402
+ if st.find_connection_by_cra(r.partner1, r.partner2, ignore_segment=True): continue
403
+ link, inv, _, _ = monlib.match_link(r.partner1.residue, r.partner1.atom.name, r.partner1.atom.altloc,
404
+ r.partner2.residue, r.partner2.atom.name, r.partner2.atom.altloc,
405
+ (r.dist / 1.4)**2)
406
+ if link is None and r.partner2.atom.element.is_metal:
407
+ inv = True # make metal first
408
+ if inv:
409
+ cra1, cra2 = r.partner2, r.partner1
410
+ else:
411
+ cra1, cra2 = r.partner1, r.partner2
412
+ im = st.cell.find_nearest_pbc_image(cra1.atom.pos, cra2.atom.pos, r.image_idx)
413
+ #assert r.image_idx == im.sym_idx # should we check this?
414
+ if not find_symmetry_related and not im.same_asu():
415
+ continue
416
+ atoms_str = "atom1= {} atom2= {} image= {}".format(cra1, cra2, r.image_idx)
417
+ if im.pbc_shift != (0,0,0):
418
+ atoms_str += " ({},{},{})".format(*im.pbc_shift)
419
+ if link:
420
+ if r.dist > link.rt.bonds[0].value * bond_margin: continue
421
+ will_be_added = add_found and (not add_only_from or link.id in add_only_from)
422
+ logger.writeln(" {}New link found: id= {} {} dist= {:.2f} ideal= {:.2f}".format("*" if will_be_added else " ",
423
+ link.id,
424
+ atoms_str,
425
+ r.dist,
426
+ link.rt.bonds[0].value))
427
+ elif find_metal_links:
428
+ # link only metal - O/N/S/B
429
+ if r.partner1.atom.element.is_metal == r.partner2.atom.element.is_metal: continue
430
+ if not cra2.atom.element in onsb: continue
431
+ max_ideal = metalc.find_max_dist(cra1, cra2)
432
+ if r.dist > max_ideal * 1.1: continue # tolerance should be smaller than that for other links
433
+ will_be_added = add_found
434
+ logger.writeln(" {}Metal link found: {} dist= {:.2f} max_ideal= {:.2f}".format("*" if will_be_added else " ",
435
+ atoms_str,
436
+ r.dist, max_ideal))
437
+ n_found += 1
438
+ if not will_be_added: continue
439
+ con = gemmi.Connection()
440
+ con.name = "added{}".format(n_found)
441
+ if link:
442
+ con.link_id = link.id
443
+ con.type = gemmi.ConnectionType.Disulf if link.id == "disulf" else gemmi.ConnectionType.Covale
444
+ if cra1.atom.element.is_metal or cra2.atom.element.is_metal:
445
+ con.type = gemmi.ConnectionType.MetalC
446
+ con.asu = gemmi.Asu.Same if im.same_asu() else gemmi.Asu.Different
447
+ con.partner1 = model.cra_to_atomaddress(cra1)
448
+ con.partner2 = model.cra_to_atomaddress(cra2)
449
+ con.reported_distance = r.dist
450
+ st.connections.append(con)
451
+ if n_found == 0:
452
+ logger.writeln(" no links found")
453
+ # find_and_fix_links()
454
+
455
+ def add_hydrogens(st, monlib, pos="elec"):
456
+ assert pos in ("elec", "nucl")
457
+ topo = prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.ReAddButWater, ignore_unknown_links=True)
458
+
459
+ if pos == "nucl":
460
+ logger.writeln("Generating hydrogens at nucleus positions")
461
+ resnames = st[0].get_all_residue_names()
462
+ check_monlib_support_nucleus_distances(monlib, resnames)
463
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus, default_scale=default_proton_scale)
464
+ else:
465
+ logger.writeln("Generating hydrogens at electron positions")
466
+ # add_hydrogens()
467
+
468
+ def make_atom_spec(cra):
469
+ chain = cra.chain.name
470
+ resi = cra.residue.seqid.num
471
+ ins = cra.residue.seqid.icode
472
+ atom = cra.atom.name
473
+ s = "chain {} resi {} ins {} atom {}".format(chain, resi, ins if ins.strip() else ".", atom)
474
+ if cra.atom.altloc != "\0":
475
+ s += " alt {}".format(cra.atom.altloc)
476
+ return s
477
+ # make_atom_spec()
478
+
479
+ def prepare_ncs_restraints(st, rms_loc_nlen=5, min_nalign=10, max_rms_loc=2.0):
480
+ logger.writeln("Finding NCS..")
481
+ polymers = {}
482
+ for chain in st[0]:
483
+ rs = chain.get_polymer()
484
+ p_type = rs.check_polymer_type()
485
+ if p_type in (gemmi.PolymerType.PeptideL, gemmi.PolymerType.PeptideD,
486
+ gemmi.PolymerType.Dna, gemmi.PolymerType.Rna, gemmi.PolymerType.DnaRnaHybrid):
487
+ polymers.setdefault(p_type, []).append((chain, rs))
488
+
489
+ scoring = gemmi.AlignmentScoring()
490
+ scoring.match = 0
491
+ scoring.mismatch = -1
492
+ scoring.gapo = 0
493
+ scoring.gape = -1
494
+
495
+ al_res = []
496
+ ncslist = ext.NcsList()
497
+ for pt in polymers:
498
+ #print(pt, [x[0].name for x in polymers[pt]])
499
+ pols = polymers[pt]
500
+ for i in range(len(pols)-1):
501
+ q = [x.name for x in pols[i][1]]
502
+ for j in range(i+1, len(pols)):
503
+ al = gemmi.align_sequence_to_polymer(q, pols[j][1], pt, scoring)
504
+ if al.match_count < min_nalign: continue
505
+ su = gemmi.calculate_superposition(pols[i][1], pols[j][1], pt, gemmi.SupSelect.All)
506
+ obj = ext.NcsList.Ncs(al, pols[i][1], pols[j][1])
507
+ obj.calculate_local_rms(rms_loc_nlen)
508
+ if len(obj.local_rms) == 0: continue
509
+ ave_local_rms = numpy.mean(obj.local_rms)
510
+ if ave_local_rms > max_rms_loc: continue
511
+ ncslist.ncss.append(obj)
512
+ al_res.append({"chain_1": "{} ({}..{})".format(pols[i][0].name, pols[i][1][0].seqid, pols[i][1][-1].seqid),
513
+ "chain_2": "{} ({}..{})".format(pols[j][0].name, pols[j][1][0].seqid, pols[j][1][-1].seqid),
514
+ "aligned": al.match_count,
515
+ "identity": al.calculate_identity(1),
516
+ "rms": su.rmsd,
517
+ "ave(rmsloc)": ave_local_rms,
518
+ })
519
+ ncslist.set_pairs()
520
+ df = pandas.DataFrame(al_res)
521
+ df.index += 1
522
+ logger.writeln(df.to_string(float_format="%.2f"))
523
+ return ncslist
524
+ # prepare_ncs_restraints()
525
+
526
+ class MetalCoordination:
527
+ def __init__(self, monlib, dbfile=None):
528
+ self.monlib = monlib
529
+ if dbfile is None:
530
+ dbfile = os.path.join(monlib.path(), "metals.json")
531
+ if os.path.exists(dbfile):
532
+ self.metals = json.load(open(dbfile))["metal_coordination"]
533
+ else:
534
+ self.metals = {}
535
+ logger.writeln("WARNING: {} not found".format(dbfile))
536
+ # __init__()
537
+
538
+ def find_max_dist(self, cra_metal, cra_ligand):
539
+ vals = self.find_ideal_distances(cra_metal.atom.element, cra_ligand.atom.element)
540
+ if len(vals) == 0:
541
+ # if not found
542
+ return self.monlib.find_ideal_distance(cra_metal, cra_ligand)
543
+ return max(x["median"] for x in vals)
544
+ # find_max_dist()
545
+
546
+ def find_ideal_distances(self, el_metal, el_ligand):
547
+ ideals = {}
548
+ if el_metal.name not in self.metals or el_ligand.name not in self.metals[el_metal.name]:
549
+ return []
550
+ return self.metals[el_metal.name][el_ligand.name]
551
+ # find_ideal_distances
552
+
553
+ def setup_restraints(self, st):
554
+ ret = [] # returns Refmac keywords
555
+ lookup = {x.atom: x for x in st[0].all()}
556
+ coords = {}
557
+ todel = []
558
+ for i, con in enumerate(st.connections):
559
+ if con.link_id == "" and con.type == gemmi.ConnectionType.MetalC:
560
+ cra1 = st[0].find_cra(con.partner1, ignore_segment=True)
561
+ cra2 = st[0].find_cra(con.partner2, ignore_segment=True)
562
+ if None in (cra1.atom, cra2.atom): continue
563
+ ener_ideal = self.monlib.find_ideal_distance(cra1, cra2)
564
+ coords.setdefault(cra1.atom.element, {}).setdefault(cra1.atom, []).append((cra2.atom, i, ener_ideal))
565
+ if coords:
566
+ logger.writeln("Metal coordinations detected")
567
+ for metal in coords:
568
+ logger.writeln(" Metal: {}".format(metal.name))
569
+ ligand_els = {x[0].element for m in coords[metal] for x in coords[metal][m]}
570
+ logger.writeln(" ideal distances")
571
+ ideals = {}
572
+ for el in ligand_els:
573
+ logger.write(" {}: ".format(el.name))
574
+ vals = self.find_ideal_distances(metal, el)
575
+ if len(vals) == 0:
576
+ ener_ideals = {x[2] for m in coords[metal] for x in coords[metal][m] if x[0].element == el}
577
+ logger.write(" ".join("{:.2f}".format(x) for x in ener_ideals))
578
+ logger.writeln(" (from ener_lib)")
579
+ else:
580
+ logger.writeln(" ".join("{:.4f} ({} coord)".format(x["median"], x["coord"]) for x in vals))
581
+ ideals[el] = [(x["median"], x["mad"]) for x in vals if x["mad"] > 0]
582
+ logger.writeln("")
583
+ for i, am in enumerate(coords[metal]):
584
+ logger.writeln(" site {}: {}".format(i+1, lookup[am]))
585
+ for j, (lig, con_idx, _) in enumerate(coords[metal][am]):
586
+ con = st.connections[con_idx]
587
+ logger.writeln(" ligand {}: {} dist= {:.2f}".format(j+1, lookup[lig],
588
+ con.reported_distance))
589
+ specs = [make_atom_spec(x) for x in (lookup[am], lookup[lig])]
590
+ if lig.element not in ideals:
591
+ continue
592
+ todel.append(con_idx)
593
+ for k, (ideal, sigma) in enumerate(ideals[lig.element]):
594
+ exte_str = "exte dist first {} seco {} ".format(*specs)
595
+ exte_str += "valu {:.4f} sigm {:.4f} type 1 ".format(ideal, sigma)
596
+ if con.asu == gemmi.Asu.Different:
597
+ exte_str += "symm y"
598
+ ret.append(exte_str)
599
+ #b = ext.Geometry.Bond(am, lig)
600
+ #b.values.append(ext.Geometry.Bond.Value(ideal, sigma, ideal, sigma))
601
+ #b.type = 0 if k == 0 else 1
602
+ #ret.append(b)
603
+ logger.writeln("")
604
+ return ret, list(set(todel))
605
+ # setup_restraints()