servalcat 0.4.99__cp39-cp39-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-39-darwin.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +906 -0
  7. servalcat/refine/refine_geom.py +233 -0
  8. servalcat/refine/refine_spa.py +366 -0
  9. servalcat/refine/refine_xtal.py +281 -0
  10. servalcat/refine/spa.py +144 -0
  11. servalcat/refine/xtal.py +276 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +639 -0
  15. servalcat/refmac/refmac_wrapper.py +395 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +479 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +977 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1547 -0
  27. servalcat/utils/fileio.py +744 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +714 -0
  30. servalcat/utils/logger.py +140 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +782 -0
  33. servalcat/utils/refmac.py +760 -0
  34. servalcat/utils/restraints.py +781 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +258 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1644 -0
  40. servalcat/xtal/twin.py +121 -0
  41. servalcat-0.4.99.dist-info/METADATA +55 -0
  42. servalcat-0.4.99.dist-info/RECORD +45 -0
  43. servalcat-0.4.99.dist-info/WHEEL +5 -0
  44. servalcat-0.4.99.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.99.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,782 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ import gemmi
10
+ import numpy
11
+ import pandas
12
+ import scipy.sparse
13
+ import os
14
+ import time
15
+ import itertools
16
+ import string
17
+
18
+ gemmi.IT92_normalize()
19
+ gemmi.IT92_set_ignore_charge(False)
20
+ gemmi.Element("X").it92.set_coefs(gemmi.Element("O").it92.get_coefs()) # treat X (unknown) as O
21
+
22
+ u_to_b = 8 * numpy.pi**2
23
+ b_to_u = 1. / u_to_b
24
+
25
+ from servalcat.utils import logger
26
+ from servalcat.utils import restraints
27
+ from servalcat.utils import maps
28
+
29
+ def shake_structure(st, sigma, copy=True):
30
+ print("Randomizing structure with rmsd of {}".format(sigma))
31
+ if copy:
32
+ st2 = st.clone()
33
+ else:
34
+ st2 = st
35
+
36
+ sigma /= numpy.sqrt(3)
37
+ for model in st2:
38
+ for cra in model.all():
39
+ r = numpy.random.normal(0, sigma, 3)
40
+ cra.atom.pos += gemmi.Position(*r)
41
+
42
+ return st2
43
+ # shake_structure()
44
+
45
+ def setup_entities(st, clear=False, overwrite_entity_type=False, force_subchain_names=False):
46
+ if clear:
47
+ st.entities.clear()
48
+ st.add_entity_ids(overwrite=True) # clear entity_id so that ensure_entities() will work properly
49
+ st.add_entity_types(overwrite_entity_type)
50
+ st.assign_subchains(force_subchain_names)
51
+ st.ensure_entities()
52
+ st.add_entity_ids()
53
+ st.deduplicate_entities()
54
+ # setup_entities()
55
+
56
+ def determine_blur_for_dencalc(st, grid):
57
+ b_min = st[0].calculate_b_aniso_range()[0]
58
+ b_need = grid**2*8*numpy.pi**2/1.1 # Refmac's way
59
+ b_add = b_need - b_min
60
+ return b_add
61
+ # determine_blur_for_dencalc()
62
+
63
+ def remove_charge(sts):
64
+ nonzero = False
65
+ for st in sts:
66
+ for cra in st[0].all():
67
+ if cra.atom.charge != 0: nonzero = True
68
+ cra.atom.charge = 0
69
+ if nonzero:
70
+ logger.writeln("Warning: all atomic charges were set to zero.")
71
+ # remove_charge()
72
+
73
+ def check_atomsf(sts, source, mott_bethe=True):
74
+ assert source in ("xray", "electron", "neutron")
75
+ if source != "electron": mott_bethe = False
76
+ logger.writeln("Atomic scattering factors for {}".format("electron (Mott-Bethe)" if mott_bethe else source))
77
+ if source != "xray" and not mott_bethe:
78
+ logger.writeln(" Note that charges will be ignored")
79
+ el_charges = {(cra.atom.element, cra.atom.charge) for st in sts for cra in st[0].all()}
80
+ elems = {x[0] for x in el_charges}
81
+ if source == "xray" or mott_bethe:
82
+ shown = set()
83
+ for el, charge in sorted(el_charges, key=lambda x: (x[0].atomic_number, x[1])):
84
+ sf = gemmi.IT92_get_exact(el, charge)
85
+ if not sf:
86
+ logger.writeln(" Warning: no scattering factor found for {}{:+}".format(el.name, charge))
87
+ sf = el.it92
88
+ charge = 0
89
+ if (el, charge) in shown: continue
90
+ label = el.name if charge == 0 else "{}{:+}".format(el.name, charge)
91
+ logger.writeln(" {} {}".format(label, tuple(sf.get_coefs())))
92
+ shown.add((el, charge))
93
+ else:
94
+ for el in sorted(elems, key=lambda x: x.atomic_number):
95
+ sf = el.c4322 if source == "electron" else el.neutron92
96
+ logger.writeln(" {} {}".format(el.name, tuple(sf.get_coefs())))
97
+ logger.writeln("")
98
+ # check_atomsf()
99
+
100
+ def calc_sum_ab(st):
101
+ sum_ab = dict()
102
+ ret = 0.
103
+ for cra in st[0].all():
104
+ if cra.atom.element not in sum_ab:
105
+ it92 = cra.atom.element.it92
106
+ sum_ab[cra.atom.element] = sum(x*y for x,y in zip(it92.a, it92.b))
107
+ ret += sum_ab[cra.atom.element] * cra.atom.occ
108
+ return ret
109
+ # calc_sum_ab()
110
+
111
+ def calc_fc_fft(st, d_min, source, mott_bethe=True, monlib=None, blur=None, cutoff=1e-5, rate=1.5,
112
+ omit_proton=False, omit_h_electron=False, miller_array=None):
113
+ assert source in ("xray", "electron", "neutron")
114
+ if source != "electron": mott_bethe = False
115
+ if blur is None: blur = determine_blur_for_dencalc(st, d_min/2/rate)
116
+ #blur = max(0, blur) # negative blur may cause non-positive definite in case of anisotropic Bs
117
+ logger.writeln("Setting blur= {:.2f} in density calculation (unblurred later)".format(blur))
118
+ topo = None
119
+ if st[0].has_hydrogen():
120
+ st = st.clone()
121
+ if source == "neutron":
122
+ # nothing happens if not st.has_d_fraction
123
+ st.store_deuterium_as_fraction(False)
124
+ if omit_proton or omit_h_electron:
125
+ assert mott_bethe
126
+ if omit_proton and omit_h_electron:
127
+ logger.writeln("omit_proton and omit_h_electron requested. removing hydrogens")
128
+ st.remove_hydrogens()
129
+ omit_proton = omit_h_electron = False
130
+ if mott_bethe and not omit_proton and monlib is not None:
131
+ topo = gemmi.prepare_topology(st, monlib, warnings=logger, ignore_unknown_links=True)
132
+ resnames = st[0].get_all_residue_names()
133
+ restraints.check_monlib_support_nucleus_distances(monlib, resnames)
134
+ # Shift electron positions
135
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.ElectronCloud)
136
+ elif omit_proton or omit_h_electron:
137
+ logger.writeln("WARNING: omit_proton/h_electron requested, but no hydrogen exists!")
138
+ omit_proton = omit_h_electron = False
139
+
140
+ if source == "xray" or mott_bethe:
141
+ dc = gemmi.DensityCalculatorX()
142
+ elif source == "electron":
143
+ dc = gemmi.DensityCalculatorE()
144
+ elif source == "neutron":
145
+ dc = gemmi.DensityCalculatorN()
146
+ else:
147
+ raise RuntimeError("unknown source")
148
+
149
+ dc.d_min = d_min
150
+ dc.blur = blur
151
+ dc.cutoff = cutoff
152
+ dc.rate = rate
153
+ dc.grid.setup_from(st)
154
+
155
+ t_start = time.time()
156
+ if mott_bethe:
157
+ if omit_proton:
158
+ method_str = "proton-omit Fc"
159
+ elif omit_h_electron:
160
+ if topo is None:
161
+ method_str = "hydrogen electron-omit Fc"
162
+ else:
163
+ method_str = "hydrogen electron-omit, proton-shifted Fc"
164
+ elif topo is not None:
165
+ method_str = "proton-shifted Fc"
166
+ else:
167
+ method_str = "Fc"
168
+
169
+ logger.writeln("Calculating {} using Mott-Bethe formula".format(method_str))
170
+
171
+ dc.initialize_grid()
172
+ dc.addends.subtract_z(except_hydrogen=True)
173
+
174
+ if omit_h_electron:
175
+ st2 = st.clone()
176
+ st2.remove_hydrogens()
177
+ dc.add_model_density_to_grid(st2[0])
178
+ else:
179
+ dc.add_model_density_to_grid(st[0])
180
+
181
+ # Subtract hydrogen Z
182
+ if not omit_proton and st[0].has_hydrogen():
183
+ if topo is not None:
184
+ # Shift proton positions
185
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus,
186
+ default_scale=restraints.default_proton_scale)
187
+ for cra in st[0].all():
188
+ if cra.atom.is_hydrogen():
189
+ dc.add_c_contribution_to_grid(cra.atom, -1)
190
+
191
+ dc.grid.symmetrize_sum()
192
+ sum_ab = calc_sum_ab(st) * len(st.find_spacegroup().operations())
193
+ mb_000 = sum_ab * gemmi.mott_bethe_const() / 4
194
+ else:
195
+ logger.writeln("Calculating Fc")
196
+ dc.put_model_density_on_grid(st[0])
197
+ mb_000 = 0
198
+
199
+ logger.writeln(" done. Fc calculation time: {:.1f} s".format(time.time() - t_start))
200
+ grid = gemmi.transform_map_to_f_phi(dc.grid)
201
+
202
+ if miller_array is None:
203
+ return grid.prepare_asu_data(dmin=d_min, mott_bethe=mott_bethe, unblur=dc.blur)
204
+ else:
205
+ return grid.get_value_by_hkl(miller_array, mott_bethe=mott_bethe, unblur=dc.blur,
206
+ mott_bethe_000=mb_000)
207
+ # calc_fc_fft()
208
+
209
+ def calc_fc_direct(st, d_min, source, mott_bethe, monlib=None, miller_array=None):
210
+ assert source in ("xray", "electron")
211
+ if source != "electron": mott_bethe = False
212
+
213
+ miller_array_given = miller_array is not None
214
+ unit_cell = st.cell
215
+ spacegroup = gemmi.SpaceGroup(st.spacegroup_hm)
216
+ if not miller_array_given: miller_array = gemmi.make_miller_array(unit_cell, spacegroup, d_min)
217
+ topo = None
218
+
219
+ if source == "xray" or mott_bethe:
220
+ calc = gemmi.StructureFactorCalculatorX(st.cell)
221
+ else:
222
+ calc = gemmi.StructureFactorCalculatorE(st.cell)
223
+
224
+
225
+ if source == "electron" and mott_bethe:
226
+ if monlib is not None and st[0].has_hydrogen():
227
+ st = st.clone()
228
+ topo = gemmi.prepare_topology(st, monlib, warnings=logger, ignore_unknown_links=True)
229
+ resnames = st[0].get_all_residue_names()
230
+ restraints.check_monlib_support_nucleus_distances(monlib, resnames)
231
+
232
+ calc.addends.clear()
233
+ calc.addends.subtract_z(except_hydrogen=True)
234
+
235
+ vals = []
236
+ for hkl in miller_array:
237
+ sf = calc.calculate_sf_from_model(st[0], hkl) # attention: traverse cell.images
238
+ if mott_bethe: sf *= calc.mott_bethe_factor()
239
+ vals.append(sf)
240
+
241
+ if topo is not None:
242
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus,
243
+ default_scale=restraints.default_proton_scale)
244
+
245
+ for i, hkl in enumerate(miller_array):
246
+ sf = calc.calculate_mb_z(st[0], hkl, only_h=True)
247
+ if mott_bethe: sf *= calc.mott_bethe_factor()
248
+ vals[i] += sf
249
+
250
+ if miller_array_given:
251
+ return numpy.array(vals)
252
+ else:
253
+ asu = gemmi.ComplexAsuData(unit_cell, spacegroup,
254
+ miller_array, vals)
255
+ return asu
256
+ # calc_fc_direct()
257
+
258
+ def get_em_expected_hydrogen(st, d_min, monlib, weights=None, blur=None, cutoff=1e-5, rate=1.5, optimize=False):
259
+ # Very crude implementation to find peak from calculated map
260
+ assert st[0].has_hydrogen()
261
+ if blur is None: blur = determine_blur_for_dencalc(st, d_min/2/rate)
262
+ blur = max(0, blur)
263
+ logger.writeln("Setting blur= {:.2f} in density calculation".format(blur))
264
+
265
+ st = st.clone()
266
+ topo = gemmi.prepare_topology(st, monlib, warnings=logger, ignore_unknown_links=True)
267
+ resnames = st[0].get_all_residue_names()
268
+ restraints.check_monlib_support_nucleus_distances(monlib, resnames)
269
+
270
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.ElectronCloud)
271
+ st_e = st.clone()
272
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus)
273
+ st_n = st.clone()
274
+
275
+ dc = gemmi.DensityCalculatorX()
276
+ dc.d_min = d_min
277
+ dc.blur = blur
278
+ dc.cutoff = cutoff
279
+ dc.rate = rate
280
+
281
+ # Decide box_size
282
+ max_r = max([dc.estimate_radius(cra.atom) for cra in st[0].all()])
283
+ logger.writeln("max_r= {:.2f}".format(max_r))
284
+ box_size = max_r*2 + 1 # padding
285
+ logger.writeln("box_size= {:.2f}".format(box_size))
286
+ mode_all = False #True
287
+ if mode_all:
288
+ dc.grid.setup_from(st)
289
+ else:
290
+ dc.grid.unit_cell = gemmi.UnitCell(box_size, box_size, box_size, 90, 90, 90)
291
+ dc.grid.spacegroup = gemmi.SpaceGroup("P1")
292
+ cbox = gemmi.Position(box_size/2, box_size/2, box_size/2)
293
+
294
+ if mode_all: dc.initialize_grid()
295
+
296
+ if weights is not None:
297
+ w_s, w_w = weights # s_list and w_list
298
+ else:
299
+ w_s, w_w = None, None
300
+
301
+ for ichain in range(len(st[0])):
302
+ chain = st[0][ichain]
303
+ for ires in range(len(chain)):
304
+ residue = chain[ires]
305
+ for iatom in range(len(residue)):
306
+ atom = residue[iatom]
307
+ if not atom.is_hydrogen(): continue
308
+ h_n = st_n[0][ichain][ires][iatom]
309
+ h_e = st_e[0][ichain][ires][iatom]
310
+ if not mode_all:
311
+ dc.initialize_grid()
312
+ h_n.occ = 1.
313
+ h_e.occ = 1.
314
+ n_pos = gemmi.Position(h_n.pos)
315
+ h_n.pos = cbox
316
+ h_e.pos = cbox + h_e.pos - n_pos
317
+ dc.add_atom_density_to_grid(h_e)
318
+ dc.add_c_contribution_to_grid(h_n, -1)
319
+ if not mode_all:
320
+ grid = gemmi.transform_map_to_f_phi(dc.grid)
321
+ asu_data = grid.prepare_asu_data(dmin=d_min, mott_bethe=True, unblur=dc.blur)
322
+ if w_s is not None:
323
+ asu_data.value_array[:] *= numpy.interp(1./asu_data.make_d_array(), w_s, w_w)
324
+
325
+ denmap = asu_data.transform_f_phi_to_map(exact_size=(int(box_size*10), int(box_size*10), int(box_size*10)))
326
+ m = numpy.unravel_index(numpy.argmax(denmap), denmap.shape)
327
+ peakpos = denmap.get_position(m[0], m[1], m[2])
328
+ if optimize: peakpos = maps.optimize_peak(denmap, peakpos)
329
+ atom.pos = peakpos - cbox + n_pos
330
+
331
+ if mode_all:
332
+ grid = gemmi.transform_map_to_f_phi(dc.grid)
333
+ asu_data = grid.prepare_asu_data(dmin=d_min, mott_bethe=True, unblur=dc.blur)
334
+ if w_s is not None:
335
+ asu_data.value_array[:] *= numpy.interp(1./asu_data.make_d_array(), w_s, w_w)
336
+ denmap = asu_data.transform_f_phi_to_map(sample_rate=3)
337
+ ccp4 = gemmi.Ccp4Map()
338
+ ccp4.grid = denmap
339
+ ccp4.update_ccp4_header(2, True) # float, update stats
340
+ ccp4.write_ccp4_map("debug.ccp4")
341
+
342
+ return st
343
+
344
+ # get_em_expected_hydrogen()
345
+
346
+ def translate_into_box(st, origin=None, apply_shift=True):
347
+ if origin is None: origin = gemmi.Position(0,0,0)
348
+
349
+ # apply unit cell translations to put model into a box (unit cell)
350
+ omat = st.cell.orth.mat.array
351
+ fmat = st.cell.frac.mat.array.transpose()
352
+ com = numpy.array((st[0].calculate_center_of_mass() - origin).tolist())
353
+ shift = sum([omat[:,i]*numpy.floor(1-numpy.dot(com, fmat[:,i])) for i in range(3)])
354
+ tr = gemmi.Transform(gemmi.Mat33(), gemmi.Vec3(*shift))
355
+ if apply_shift:
356
+ for m in st: m.transform_pos_and_adp(tr)
357
+ return shift
358
+ # translate_into_box()
359
+
360
+ def box_from_model(model, padding):
361
+ allpos = numpy.array([cra.atom.pos.tolist() for cra in model.all()])
362
+ ext = numpy.max(allpos, axis=0) - numpy.min(allpos, axis=0) + padding
363
+ cell = gemmi.UnitCell(ext[0], ext[1], ext[2], 90, 90, 90)
364
+ return cell
365
+ # box_from_model()
366
+
367
+ def cra_to_indices(cra, model):
368
+ ret = [None, None, None]
369
+ for ic in range(len(model)):
370
+ chain = model[ic]
371
+ if cra.chain != chain: continue
372
+ ret[0] = ic
373
+ for ir in range(len(chain)):
374
+ res = chain[ir]
375
+ if cra.residue != res: continue
376
+ ret[1] = ir
377
+ for ia in range(len(res)):
378
+ if cra.atom == res[ia]:
379
+ ret[2] = ia
380
+
381
+ return tuple(ret)
382
+ # cra_to_indices()
383
+
384
+ def cra_to_atomaddress(cra):
385
+ aa = gemmi.AtomAddress(cra.chain.name,
386
+ cra.residue.seqid, cra.residue.name,
387
+ cra.atom.name, cra.atom.altloc)
388
+ aa.res_id.segment = cra.residue.segment
389
+ return aa
390
+ # cra_to_atomaddress()
391
+
392
+ def check_occupancies(st, raise_error=False):
393
+ bad = []
394
+ for cra in st[0].all():
395
+ if not 0 <= cra.atom.occ <= 1 + 1e-6:
396
+ bad.append(cra)
397
+ if bad:
398
+ logger.writeln("Bad occupancies:")
399
+ for cra in bad:
400
+ logger.writeln(f" {cra} occ= {cra.atom.occ:.4f}")
401
+ if raise_error:
402
+ raise RuntimeError("Please check your model and fix bad occupancies")
403
+ # check_occupancies()
404
+
405
+ def find_special_positions(st, special_pos_threshold=0.2, fix_occ=True, fix_pos=True, fix_adp=True):
406
+ ns = gemmi.NeighborSearch(st[0], st.cell, 3).populate()
407
+ cs = gemmi.ContactSearch(special_pos_threshold * 2)
408
+ cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
409
+ cs.special_pos_cutoff_sq = 0
410
+ results = cs.find_contacts(ns)
411
+ found = {}
412
+ cra = {}
413
+ for r in results:
414
+ if r.partner1.atom != r.partner2.atom: continue
415
+ found.setdefault(r.partner1.atom, []).append(r.image_idx)
416
+ cra[r.partner1.atom] = r.partner1
417
+
418
+ if found: logger.writeln("Atoms on special position detected.")
419
+ tostr = lambda x: ", ".join("{:.3e}".format(v) for v in x)
420
+ ret = []
421
+ for atom in found:
422
+ images = found[atom]
423
+ n_images = len(images) + 1
424
+ sum_occ = atom.occ * n_images
425
+ logger.writeln(" {} multiplicity= {} images= {} occupancies_total= {:.2f}".format(cra[atom], n_images, images, sum_occ))
426
+ if sum_occ > 1.001 and fix_occ:
427
+ new_occ = atom.occ / n_images
428
+ logger.writeln(" correcting occupancy= {:.2f}".format(new_occ))
429
+ atom.occ = new_occ
430
+ if fix_pos:
431
+ fpos = gemmi.Fractional(st.cell.frac.apply(atom.pos))
432
+ fdiff = sum([(st.cell.images[i-1].apply(fpos) - fpos).wrap_to_zero() for i in images], gemmi.Fractional(0,0,0)) / n_images
433
+ diff = st.cell.orth.apply(fdiff)
434
+ atom.pos += gemmi.Position(diff)
435
+ logger.writeln(" correcting position= {}".format(tostr(atom.pos.tolist())))
436
+ logger.writeln(" pos_viol= {}".format(tostr(diff.tolist())))
437
+ if fix_adp and atom.aniso.nonzero():
438
+ aniso_bak = atom.aniso.elements_pdb()
439
+ fani = atom.aniso.transformed_by(st.cell.frac.mat)
440
+ fani_avg = sum([fani.transformed_by(st.cell.images[i-1].mat) for i in images], fani).scaled(1/n_images)
441
+ atom.aniso = fani_avg.transformed_by(st.cell.orth.mat)
442
+ diff = numpy.array(atom.aniso.elements_pdb()) - aniso_bak
443
+ logger.writeln(" correcting aniso= {}".format(tostr(atom.aniso.elements_pdb())))
444
+ logger.writeln(" aniso_viol= {}".format(tostr(diff)))
445
+
446
+ mats = [st.cell.orth.combine(st.cell.images[i-1]).combine(st.cell.frac).mat.array for i in images]
447
+ mat_total = (numpy.identity(3) + sum(numpy.array(m) for m in mats)) / n_images
448
+ mat_total_aniso = (numpy.identity(6) + sum(mat33_as66(m.tolist()) for m in mats)) / n_images
449
+ mat_total_aniso = numpy.linalg.pinv(mat_total_aniso)
450
+ ret.append((atom, images, mat_total, mat_total_aniso))
451
+
452
+ return ret
453
+ # find_special_positions()
454
+
455
+ def expand_ncs(st, special_pos_threshold=0.01, howtoname=gemmi.HowToNameCopiedChain.Short):
456
+ # TODO modify st.connections for atoms at special positions
457
+ if len(st.ncs) == 0: return
458
+ find_special_positions(st, special_pos_threshold) # just to show info, a bit waste of cpu time..
459
+ logger.writeln("Expanding symmetry..")
460
+ st.expand_ncs(howtoname, merge_dist=1e-4)
461
+ # expand_ncs()
462
+
463
+ def prepare_assembly(name, chains, ops, is_helical=False):
464
+ a = gemmi.Assembly(name)
465
+ g = gemmi.Assembly.Gen()
466
+ if sum(map(lambda x: x.tr.is_identity(), ops)) == 0:
467
+ g.operators.append(gemmi.Assembly.Operator()) # add identity
468
+ for i, nop in enumerate(ops):
469
+ op = gemmi.Assembly.Operator()
470
+ op.transform = nop.tr
471
+ if not nop.tr.is_identity():
472
+ if is_helical:
473
+ op.type = "helical symmetry operation"
474
+ else:
475
+ op.type = "point symmetry operation"
476
+ g.operators.append(op)
477
+ g.chains = chains
478
+ a.generators.append(g)
479
+ if is_helical:
480
+ a.special_kind = gemmi.AssemblySpecialKind.RepresentativeHelical
481
+ else:
482
+ a.special_kind = gemmi.AssemblySpecialKind.CompletePoint
483
+ return a
484
+ # prepare_assembly()
485
+
486
+ def filter_contacting_ncs(st, cutoff=5.):
487
+ if len(st.ncs) == 0: return
488
+ logger.writeln("Filtering out non-contacting NCS copies with cutoff={:.2f} A".format(cutoff))
489
+ st.setup_cell_images()
490
+ ns = gemmi.NeighborSearch(st[0], st.cell, cutoff*2).populate() # This is considered crystallographic cell if not 1 1 1. Undesirable result may be seen.
491
+ cs = gemmi.ContactSearch(cutoff)
492
+ cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
493
+ results = cs.find_contacts(ns)
494
+ indices = set([r.image_idx for r in results])
495
+ logger.writeln(" contacting copies: {}".format(indices))
496
+ ops = [st.ncs[i-1] for i in indices] # XXX is this correct? maybe yes as long as identity operator is not there
497
+ st.ncs.clear()
498
+ st.ncs.extend(ops)
499
+ # filter_contacting_ncs()
500
+
501
+ def check_symmetry_related_model_duplication(st, distance_cutoff=0.5, max_allowed_ratio=0.5):
502
+ logger.writeln("Checking if model in asu is given.")
503
+ n_atoms = st[0].count_atom_sites()
504
+ st.setup_cell_images()
505
+ ns = gemmi.NeighborSearch(st[0], st.cell, 3).populate()
506
+ cs = gemmi.ContactSearch(distance_cutoff)
507
+ cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
508
+ results = cs.find_contacts(ns)
509
+ n_contacting_atoms = len(set([a for r in results for a in (r.partner1.atom, r.partner2.atom)]))
510
+ logger.writeln(" N_atoms= {} N_contacting_atoms= {}".format(n_atoms, n_contacting_atoms))
511
+ return n_contacting_atoms / n_atoms > max_allowed_ratio # return True if too many contacts
512
+ # check_symmetry_related_model_duplication()
513
+
514
+ def adp_analysis(st, ignore_zero_occ=True):
515
+ logger.writeln("= ADP analysis =")
516
+ if ignore_zero_occ:
517
+ logger.writeln("(zero-occupancy atoms are ignored)")
518
+
519
+ all_B = []
520
+ for i, mol in enumerate(st):
521
+ if len(st) > 1: logger.writeln("Model {}:".format(i))
522
+ logger.writeln(" min Q1 med Q3 max")
523
+ stats = adp_stats_per_chain(mol, ignore_zero_occ)
524
+ for chain, natoms, qs in stats:
525
+ logger.writeln(("Chain {:3s}".format(chain) if chain!="*" else "All ") + " {:5.1f} {:5.1f} {:5.1f} {:5.1f} {:5.1f}".format(*qs))
526
+ logger.writeln("")
527
+ # adp_analysis()
528
+
529
+ def adp_stats_per_chain(model, ignore_zero_occ=True):
530
+ bs = {}
531
+ for cra in model.all():
532
+ if not ignore_zero_occ or cra.atom.occ > 0:
533
+ bs.setdefault(cra.chain.name, []).append(cra.atom.b_iso)
534
+
535
+ ret = []
536
+ for chain in model:
537
+ if chain.name in [x[0] for x in ret]: continue
538
+ qs = numpy.quantile(bs[chain.name], [0,0.25,0.5,0.75,1])
539
+ ret.append((chain.name, len(bs[chain.name]), qs))
540
+
541
+ if len(bs) > 1:
542
+ all_bs = sum(bs.values(), [])
543
+ qs = numpy.quantile(all_bs, [0,0.25,0.5,0.75,1])
544
+ ret.append(("*", len(all_bs), qs))
545
+
546
+ return ret
547
+ # adp_stats_per_chain()
548
+
549
+ def reset_adp(model, bfactor=None, adp_mode="iso"):
550
+ for cra in model.all():
551
+ if bfactor is not None:
552
+ cra.atom.b_iso = bfactor
553
+ if adp_mode == "iso" or (adp_mode == "fix" and bfactor is not None):
554
+ cra.atom.aniso = gemmi.SMat33f(0,0,0,0,0,0)
555
+ elif adp_mode == "aniso":
556
+ if not cra.atom.aniso.nonzero() or bfactor is not None:
557
+ u = cra.atom.b_iso * b_to_u
558
+ cra.atom.aniso = gemmi.SMat33f(u, u, u, 0, 0, 0)
559
+ # reset_adp()
560
+
561
+ def shift_b(model, b):
562
+ u = b * b_to_u
563
+ for cra in model.all():
564
+ cra.atom.b_iso += b
565
+ if cra.atom.aniso.nonzero():
566
+ cra.atom.aniso.u11 += u
567
+ cra.atom.aniso.u22 += u
568
+ cra.atom.aniso.u33 += u
569
+ # shift_b()
570
+
571
+ def all_chain_ids(st):
572
+ return [chain.name for model in st for chain in model]
573
+ # all_chain_ids()
574
+
575
+ def all_B(st, ignore_zero_occ=True):
576
+ ret = []
577
+ for mol in st:
578
+ for cra in mol.all():
579
+ if not ignore_zero_occ or cra.atom.occ > 0:
580
+ ret.append(cra.atom.b_iso)
581
+
582
+ return ret
583
+ # all_B()
584
+
585
+ def mat33_as66(m):
586
+ # suppose R is a transformation matrix that is applied to 3x3 symmetric matrix U: R U R^T
587
+ # this function constructs equivalent transformation for 6-element vector: R' u
588
+ r = numpy.zeros((6,6))
589
+ for k, (i, j) in enumerate(((0,0), (1,1), (2,2), (0,1), (0,2), (1,2))):
590
+ r[k,:] = (m[i][0] * m[j][0],
591
+ m[i][1] * m[j][1],
592
+ m[i][2] * m[j][2],
593
+ m[i][0] * m[j][1] + m[i][1] * m[j][0],
594
+ m[i][0] * m[j][2] + m[i][2] * m[j][0],
595
+ m[i][1] * m[j][2] + m[i][2] * m[j][1])
596
+ return r
597
+ def adp_constraints(ops, cell, tr0=True):
598
+ # think about f = (b-Rb)^T (b-Rb) = b^T b - b^T R b -b^T R^T b + b^T R^T R b
599
+ # d^2f/db db^T = 2I - 2(R+R^T) + 2(R^T R)
600
+ # eigenvectors of this second derivative matrix corresponding to 0-valeud eigenvalues are directions to refine
601
+ x = numpy.zeros((6,6))
602
+ if tr0:
603
+ x[:3,:3] += numpy.ones((3,3)) * 2
604
+ for op in ops:
605
+ r = mat33_as66(cell.op_as_transform(op).mat.tolist())
606
+ x += 2 * numpy.identity(6) - 2 * (r + r.T) + 2 * numpy.dot(r.T, r)
607
+
608
+ evals, evecs = numpy.linalg.eigh(x)
609
+ ret = []
610
+ for i in range(6):
611
+ if numpy.isclose(evals[i], 0):
612
+ ret.append(evecs[:, i])
613
+
614
+ if len(ret) > 0:
615
+ ret = numpy.vstack(ret)
616
+ ret = numpy.where(numpy.abs(ret) < 1e-9, 0, ret)
617
+ return ret
618
+ return numpy.empty((0, 6))
619
+ # adp_constraints()
620
+
621
+ def to_dataframe(st):
622
+ keys = ("model", "chain", "resn", "subchain", "segment", "seqnum", "icode", "altloc",
623
+ "u11", "u22", "u33", "u12", "u13", "u23",
624
+ "b_iso", "charge", "elem", "atom", "occ",
625
+ "x", "y", "z", "tlsgroup")
626
+ d = dict([(x,[]) for x in keys])
627
+ app = lambda k, v: d[k].append(v)
628
+
629
+ for m in st:
630
+ for cra in m.all():
631
+ c,r,a = cra.chain, cra.residue, cra.atom
632
+ # TODO need support r.het_flag, r.flag, a.calc_flag, a.flag, a.serial?
633
+ app("model", m.num)
634
+ app("chain", c.name)
635
+ app("resn", r.name)
636
+ app("subchain", r.subchain)
637
+ app("segment", r.segment)
638
+ app("seqnum", r.seqid.num)
639
+ app("icode", r.seqid.icode)
640
+ app("altloc", a.altloc)
641
+ app("u11", a.aniso.u11)
642
+ app("u22", a.aniso.u22)
643
+ app("u33", a.aniso.u33)
644
+ app("u12", a.aniso.u12)
645
+ app("u13", a.aniso.u13)
646
+ app("u23", a.aniso.u23)
647
+ app("b_iso", a.b_iso)
648
+ app("charge", a.charge)
649
+ app("elem", a.element.name)
650
+ app("atom", a.name)
651
+ app("occ", a.occ)
652
+ app("x", a.pos.x)
653
+ app("y", a.pos.y)
654
+ app("z", a.pos.z)
655
+ app("tlsgroup", a.tls_group_id)
656
+
657
+ return pandas.DataFrame(data=d)
658
+ # to_dataframe()
659
+
660
+ def from_dataframe(df, st=None): # Slow!
661
+ if st is None:
662
+ st = gemmi.Structure()
663
+ else:
664
+ st = st.clone()
665
+ for i in range(len(st)):
666
+ del st[0]
667
+
668
+ for m_num, dm in df.groupby("model"):
669
+ st.add_model(gemmi.Model(m_num))
670
+ m = st[-1]
671
+ for c_name, dc in dm.groupby("chain"):
672
+ m.add_chain(gemmi.Chain(c_name))
673
+ c = m[-1]
674
+ for rkey, dr in dc.groupby(["seqnum","icode","resn","segment","subchain"]):
675
+ c.add_residue(gemmi.Residue())
676
+ r = c[-1]
677
+ r.seqid.num = rkey[0]
678
+ r.seqid.icode = rkey[1]
679
+ r.name = rkey[2]
680
+ r.segment = rkey[3]
681
+ r.subchain = rkey[4]
682
+ for _, row in dr.iterrows():
683
+ r.add_atom(gemmi.Atom())
684
+ a = r[-1]
685
+ a.altloc = row["altloc"]
686
+ a.name = row["atom"]
687
+ a.aniso.u11 = row["u11"]
688
+ a.aniso.u22 = row["u22"]
689
+ a.aniso.u33 = row["u33"]
690
+ a.aniso.u12 = row["u12"]
691
+ a.aniso.u13 = row["u13"]
692
+ a.aniso.u23 = row["u23"]
693
+ a.b_iso = row["b_iso"]
694
+ a.charge = row["charge"]
695
+ a.element = gemmi.Element(row["elem"])
696
+ a.occ = row["occ"]
697
+ a.pos.x = row["x"]
698
+ a.pos.y = row["y"]
699
+ a.pos.z = row["z"]
700
+ a.tls_group_id = row["tlsgroup"]
701
+
702
+ return st
703
+ # from_dataframe()
704
+
705
+ def st_from_positions(positions, bs=None, qs=None):
706
+ st = gemmi.Structure()
707
+ st.add_model(gemmi.Model(1))
708
+ st[0].add_chain(gemmi.Chain("A"))
709
+ c = st[0][0]
710
+ if bs is None: bs = (0. for _ in range(len(positions)))
711
+ if qs is None: qs = (1. for _ in range(len(positions)))
712
+ for i, (pos, b, q) in enumerate(zip(positions, bs, qs)):
713
+ c.add_residue(gemmi.Residue())
714
+ r = c[-1]
715
+ r.seqid.num = i
716
+ r.name = "HOH"
717
+ r.add_atom(gemmi.Atom())
718
+ a = r[-1]
719
+ a.name = "O"
720
+ a.element = gemmi.Element("O")
721
+ a.pos = pos
722
+ a.b_iso = b
723
+ a.occ = q
724
+
725
+ return st
726
+ # st_from_positions()
727
+
728
+ def invert_model(st):
729
+ # invert x-axis
730
+ A = st.cell.orth.mat.array
731
+ center = numpy.sum(A,axis=1) / 2
732
+ center = gemmi.Vec3(*center)
733
+ mat = gemmi.Mat33([[-1,0,0],[0,1,0],[0,0,1]])
734
+ vec = mat.multiply(-center) + center
735
+ tr = gemmi.Transform(mat, vec)
736
+ st[0].transform_pos_and_adp(tr)
737
+
738
+ # invert peptides
739
+ # invert_model()
740
+
741
+ def cx_to_mx(ss): #SmallStructure to Structure
742
+ st = gemmi.Structure()
743
+ st.spacegroup_hm = ss.spacegroup.xhm()
744
+ st.cell = ss.cell
745
+ st.add_model(gemmi.Model(1))
746
+ st[-1].add_chain(gemmi.Chain("A"))
747
+ st[-1][-1].add_residue(gemmi.Residue())
748
+ st[-1][-1][-1].seqid.num = 1
749
+ st[-1][-1][-1].name = "00"
750
+
751
+ ruc = ss.cell.reciprocal()
752
+ cif2cart = ss.cell.orth.mat.multiply_by_diagonal(gemmi.Vec3(ruc.a, ruc.b, ruc.c))
753
+ as_smat33f = lambda x: gemmi.SMat33f(x.u11, x.u22, x.u33, x.u12, x.u13, x.u23)
754
+
755
+ for site in ss.sites:
756
+ st[-1][-1][-1].add_atom(gemmi.Atom())
757
+ a = st[-1][-1][-1][-1]
758
+ a.name = site.label
759
+ a.aniso = as_smat33f(site.aniso.transformed_by(cif2cart))
760
+ a.b_iso = site.u_iso * u_to_b
761
+ #a.charge = ?
762
+ a.element = site.element
763
+ a.occ = site.occ
764
+ a.pos = site.orth(ss.cell)
765
+
766
+ return st
767
+ # cx_to_mx()
768
+
769
+ def fix_deuterium_residues(st):
770
+ # we do not have DOD. will not change ND4->NH4 and SPW->SPK, as hydrogen atom names are different
771
+ n_changed = 0
772
+ for chain in st[0]:
773
+ for res in chain:
774
+ if res.name == "DOD":
775
+ res.name = "HOH"
776
+ n_changed += 1
777
+ for con in st.connections:
778
+ for p in (con.partner1, con.partner2):
779
+ if p.res_id.name == "DOD":
780
+ p.res_id.name = "HOH"
781
+ if n_changed > 0:
782
+ logger.writeln("Warning: {} DOD residues have been renamed to HOH".format(n_changed))