servalcat 0.4.60__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (44) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cp312-win_amd64.pyd +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +733 -0
  7. servalcat/refine/refine_geom.py +207 -0
  8. servalcat/refine/refine_spa.py +327 -0
  9. servalcat/refine/refine_xtal.py +242 -0
  10. servalcat/refine/spa.py +132 -0
  11. servalcat/refine/xtal.py +227 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +536 -0
  15. servalcat/refmac/refmac_wrapper.py +360 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +462 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +961 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1277 -0
  27. servalcat/utils/fileio.py +745 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +699 -0
  30. servalcat/utils/logger.py +116 -0
  31. servalcat/utils/maps.py +340 -0
  32. servalcat/utils/model.py +774 -0
  33. servalcat/utils/refmac.py +747 -0
  34. servalcat/utils/restraints.py +605 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +250 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1403 -0
  40. servalcat-0.4.60.dist-info/METADATA +56 -0
  41. servalcat-0.4.60.dist-info/RECORD +44 -0
  42. servalcat-0.4.60.dist-info/WHEEL +5 -0
  43. servalcat-0.4.60.dist-info/entry_points.txt +4 -0
  44. servalcat-0.4.60.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,774 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ from servalcat.utils import logger
10
+ from servalcat.utils import restraints
11
+ from servalcat.utils import maps
12
+ import gemmi
13
+ import numpy
14
+ import pandas
15
+ import scipy.sparse
16
+ import os
17
+ import time
18
+ import itertools
19
+ import string
20
+
21
+ gemmi.IT92_normalize()
22
+ gemmi.Element("X").it92.set_coefs(gemmi.Element("O").it92.get_coefs()) # treat X (unknown) as O
23
+
24
+ u_to_b = 8 * numpy.pi**2
25
+ b_to_u = 1. / u_to_b
26
+
27
+ def shake_structure(st, sigma, copy=True):
28
+ print("Randomizing structure with rmsd of {}".format(sigma))
29
+ if copy:
30
+ st2 = st.clone()
31
+ else:
32
+ st2 = st
33
+
34
+ sigma /= numpy.sqrt(3)
35
+ for model in st2:
36
+ for cra in model.all():
37
+ r = numpy.random.normal(0, sigma, 3)
38
+ cra.atom.pos += gemmi.Position(*r)
39
+
40
+ return st2
41
+ # shake_structure()
42
+
43
+ def setup_entities(st, clear=False, overwrite_entity_type=False, force_subchain_names=False):
44
+ if clear:
45
+ st.entities.clear()
46
+ st.add_entity_ids(overwrite=True) # clear entity_id so that ensure_entities() will work properly
47
+ st.add_entity_types(overwrite_entity_type)
48
+ st.assign_subchains(force_subchain_names)
49
+ st.ensure_entities()
50
+ st.add_entity_ids()
51
+ st.deduplicate_entities()
52
+ # setup_entities()
53
+
54
+ def minimum_b(m):
55
+ b_min = min((cra.atom.b_iso for cra in m.all()))
56
+ eig_mins = [min(cra.atom.aniso.calculate_eigenvalues()) for cra in m.all() if cra.atom.aniso.nonzero()]
57
+ if len(eig_mins) > 0: b_min = min(b_min, min(eig_mins) * u_to_b)
58
+ return b_min
59
+ # minimum_b()
60
+
61
+ def determine_blur_for_dencalc(st, grid):
62
+ b_min = minimum_b(st[0])
63
+ b_need = grid**2*8*numpy.pi**2/1.1 # Refmac's way
64
+ b_add = b_need - b_min
65
+ return b_add
66
+ # determine_blur_for_dencalc()
67
+
68
+ def remove_charge(sts):
69
+ nonzero = False
70
+ for st in sts:
71
+ for cra in st[0].all():
72
+ if cra.atom.charge != 0: nonzero = True
73
+ cra.atom.charge = 0
74
+ if nonzero:
75
+ logger.writeln("Warning: all atomic charges were set to zero.")
76
+ # remove_charge()
77
+
78
+ def check_atomsf(sts, source, mott_bethe=True):
79
+ assert source in ("xray", "electron", "neutron")
80
+ if source != "electron": mott_bethe = False
81
+ logger.writeln("Atomic scattering factors for {}".format("electron (Mott-Bethe)" if mott_bethe else source))
82
+ if source != "xray" and not mott_bethe:
83
+ logger.writeln(" Note that charges will be ignored")
84
+ el_charges = {(cra.atom.element, cra.atom.charge) for st in sts for cra in st[0].all()}
85
+ elems = {x[0] for x in el_charges}
86
+ if source == "xray" or mott_bethe:
87
+ shown = set()
88
+ for el, charge in sorted(el_charges, key=lambda x: (x[0].atomic_number, x[1])):
89
+ sf = gemmi.IT92_get_exact(el, charge)
90
+ if not sf:
91
+ logger.writeln(" Warning: no scattering factor found for {}{:+}".format(el.name, charge))
92
+ sf = el.it92
93
+ charge = 0
94
+ if (el, charge) in shown: continue
95
+ label = el.name if charge == 0 else "{}{:+}".format(el.name, charge)
96
+ logger.writeln(" {} {}".format(label, tuple(sf.get_coefs())))
97
+ shown.add((el, charge))
98
+ else:
99
+ for el in sorted(elems, key=lambda x: x.atomic_number):
100
+ sf = el.c4322 if source == "electron" else el.neutron92
101
+ logger.writeln(" {} {}".format(el.name, tuple(sf.get_coefs())))
102
+ logger.writeln("")
103
+ # check_atomsf()
104
+
105
+ def calc_sum_ab(st):
106
+ sum_ab = dict()
107
+ ret = 0.
108
+ for cra in st[0].all():
109
+ if cra.atom.element not in sum_ab:
110
+ it92 = cra.atom.element.it92
111
+ sum_ab[cra.atom.element] = sum(x*y for x,y in zip(it92.a, it92.b))
112
+ ret += sum_ab[cra.atom.element] * cra.atom.occ
113
+ return ret
114
+ # calc_sum_ab()
115
+
116
+ def calc_fc_fft(st, d_min, source, mott_bethe=True, monlib=None, blur=None, cutoff=1e-5, rate=1.5,
117
+ omit_proton=False, omit_h_electron=False, miller_array=None):
118
+ assert source in ("xray", "electron", "neutron")
119
+ if source != "electron": mott_bethe = False
120
+ if blur is None: blur = determine_blur_for_dencalc(st, d_min/2/rate)
121
+ #blur = max(0, blur) # negative blur may cause non-positive definite in case of anisotropic Bs
122
+ logger.writeln("Setting blur= {:.2f} in density calculation (unblurred later)".format(blur))
123
+ topo = None
124
+ if st[0].has_hydrogen():
125
+ st = st.clone()
126
+ if source == "neutron":
127
+ # nothing happens if not st.has_d_fraction
128
+ st.store_deuterium_as_fraction(False)
129
+ if omit_proton or omit_h_electron:
130
+ assert mott_bethe
131
+ if omit_proton and omit_h_electron:
132
+ logger.writeln("omit_proton and omit_h_electron requested. removing hydrogens")
133
+ st.remove_hydrogens()
134
+ omit_proton = omit_h_electron = False
135
+ if mott_bethe and not omit_proton and monlib is not None:
136
+ topo = gemmi.prepare_topology(st, monlib, warnings=logger, ignore_unknown_links=True)
137
+ resnames = st[0].get_all_residue_names()
138
+ restraints.check_monlib_support_nucleus_distances(monlib, resnames)
139
+ # Shift electron positions
140
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.ElectronCloud)
141
+ elif omit_proton or omit_h_electron:
142
+ logger.writeln("WARNING: omit_proton/h_electron requested, but no hydrogen exists!")
143
+ omit_proton = omit_h_electron = False
144
+
145
+ if source == "xray" or mott_bethe:
146
+ dc = gemmi.DensityCalculatorX()
147
+ elif source == "electron":
148
+ dc = gemmi.DensityCalculatorE()
149
+ elif source == "neutron":
150
+ dc = gemmi.DensityCalculatorN()
151
+ else:
152
+ raise RuntimeError("unknown source")
153
+
154
+ dc.d_min = d_min
155
+ dc.blur = blur
156
+ dc.cutoff = cutoff
157
+ dc.rate = rate
158
+ dc.set_grid_cell_and_spacegroup(st)
159
+
160
+ t_start = time.time()
161
+ if mott_bethe:
162
+ if omit_proton:
163
+ method_str = "proton-omit Fc"
164
+ elif omit_h_electron:
165
+ if topo is None:
166
+ method_str = "hydrogen electron-omit Fc"
167
+ else:
168
+ method_str = "hydrogen electron-omit, proton-shifted Fc"
169
+ elif topo is not None:
170
+ method_str = "proton-shifted Fc"
171
+ else:
172
+ method_str = "Fc"
173
+
174
+ logger.writeln("Calculating {} using Mott-Bethe formula".format(method_str))
175
+
176
+ dc.initialize_grid()
177
+ dc.addends.subtract_z(except_hydrogen=True)
178
+
179
+ if omit_h_electron:
180
+ st2 = st.clone()
181
+ st2.remove_hydrogens()
182
+ dc.add_model_density_to_grid(st2[0])
183
+ else:
184
+ dc.add_model_density_to_grid(st[0])
185
+
186
+ # Subtract hydrogen Z
187
+ if not omit_proton and st[0].has_hydrogen():
188
+ if topo is not None:
189
+ # Shift proton positions
190
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus,
191
+ default_scale=restraints.default_proton_scale)
192
+ for cra in st[0].all():
193
+ if cra.atom.is_hydrogen():
194
+ dc.add_c_contribution_to_grid(cra.atom, -1)
195
+
196
+ dc.grid.symmetrize_sum()
197
+ sum_ab = calc_sum_ab(st) * len(st.find_spacegroup().operations())
198
+ mb_000 = sum_ab * gemmi.mott_bethe_const() / 4
199
+ else:
200
+ logger.writeln("Calculating Fc")
201
+ dc.put_model_density_on_grid(st[0])
202
+ mb_000 = 0
203
+
204
+ logger.writeln(" done. Fc calculation time: {:.1f} s".format(time.time() - t_start))
205
+ grid = gemmi.transform_map_to_f_phi(dc.grid)
206
+
207
+ if miller_array is None:
208
+ return grid.prepare_asu_data(dmin=d_min, mott_bethe=mott_bethe, unblur=dc.blur)
209
+ else:
210
+ return grid.get_value_by_hkl(miller_array, mott_bethe=mott_bethe, unblur=dc.blur,
211
+ mott_bethe_000=mb_000)
212
+ # calc_fc_fft()
213
+
214
+ def calc_fc_direct(st, d_min, source, mott_bethe, monlib=None, miller_array=None):
215
+ assert source in ("xray", "electron")
216
+ if source != "electron": mott_bethe = False
217
+
218
+ miller_array_given = miller_array is not None
219
+ unit_cell = st.cell
220
+ spacegroup = gemmi.SpaceGroup(st.spacegroup_hm)
221
+ if not miller_array_given: miller_array = gemmi.make_miller_array(unit_cell, spacegroup, d_min)
222
+ topo = None
223
+
224
+ if source == "xray" or mott_bethe:
225
+ calc = gemmi.StructureFactorCalculatorX(st.cell)
226
+ else:
227
+ calc = gemmi.StructureFactorCalculatorE(st.cell)
228
+
229
+
230
+ if source == "electron" and mott_bethe:
231
+ if monlib is not None and st[0].has_hydrogen():
232
+ st = st.clone()
233
+ topo = gemmi.prepare_topology(st, monlib, warnings=logger, ignore_unknown_links=True)
234
+ resnames = st[0].get_all_residue_names()
235
+ restraints.check_monlib_support_nucleus_distances(monlib, resnames)
236
+
237
+ calc.addends.clear()
238
+ calc.addends.subtract_z(except_hydrogen=True)
239
+
240
+ vals = []
241
+ for hkl in miller_array:
242
+ sf = calc.calculate_sf_from_model(st[0], hkl) # attention: traverse cell.images
243
+ if mott_bethe: sf *= calc.mott_bethe_factor()
244
+ vals.append(sf)
245
+
246
+ if topo is not None:
247
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus,
248
+ default_scale=restraints.default_proton_scale)
249
+
250
+ for i, hkl in enumerate(miller_array):
251
+ sf = calc.calculate_mb_z(st[0], hkl, only_h=True)
252
+ if mott_bethe: sf *= calc.mott_bethe_factor()
253
+ vals[i] += sf
254
+
255
+ if miller_array_given:
256
+ return numpy.array(vals)
257
+ else:
258
+ asu = gemmi.ComplexAsuData(unit_cell, spacegroup,
259
+ miller_array, vals)
260
+ return asu
261
+ # calc_fc_direct()
262
+
263
+ def get_em_expected_hydrogen(st, d_min, monlib, weights=None, blur=None, cutoff=1e-5, rate=1.5, optimize=False):
264
+ # Very crude implementation to find peak from calculated map
265
+ assert st[0].has_hydrogen()
266
+ if blur is None: blur = determine_blur_for_dencalc(st, d_min/2/rate)
267
+ blur = max(0, blur)
268
+ logger.writeln("Setting blur= {:.2f} in density calculation".format(blur))
269
+
270
+ st = st.clone()
271
+ topo = gemmi.prepare_topology(st, monlib, warnings=logger, ignore_unknown_links=True)
272
+ resnames = st[0].get_all_residue_names()
273
+ restraints.check_monlib_support_nucleus_distances(monlib, resnames)
274
+
275
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.ElectronCloud)
276
+ st_e = st.clone()
277
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus)
278
+ st_n = st.clone()
279
+
280
+ dc = gemmi.DensityCalculatorX()
281
+ dc.d_min = d_min
282
+ dc.blur = blur
283
+ dc.cutoff = cutoff
284
+ dc.rate = rate
285
+
286
+ # Decide box_size
287
+ max_r = max([dc.estimate_radius(cra.atom) for cra in st[0].all()])
288
+ logger.writeln("max_r= {:.2f}".format(max_r))
289
+ box_size = max_r*2 + 1 # padding
290
+ logger.writeln("box_size= {:.2f}".format(box_size))
291
+ mode_all = False #True
292
+ if mode_all:
293
+ dc.set_grid_cell_and_spacegroup(st)
294
+ else:
295
+ dc.grid.unit_cell = gemmi.UnitCell(box_size, box_size, box_size, 90, 90, 90)
296
+ dc.grid.spacegroup = gemmi.SpaceGroup("P1")
297
+ cbox = gemmi.Position(box_size/2, box_size/2, box_size/2)
298
+
299
+ if mode_all: dc.initialize_grid()
300
+
301
+ if weights is not None:
302
+ w_s, w_w = weights # s_list and w_list
303
+ else:
304
+ w_s, w_w = None, None
305
+
306
+ for ichain in range(len(st[0])):
307
+ chain = st[0][ichain]
308
+ for ires in range(len(chain)):
309
+ residue = chain[ires]
310
+ for iatom in range(len(residue)):
311
+ atom = residue[iatom]
312
+ if not atom.is_hydrogen(): continue
313
+ h_n = st_n[0][ichain][ires][iatom]
314
+ h_e = st_e[0][ichain][ires][iatom]
315
+ if not mode_all:
316
+ dc.initialize_grid()
317
+ h_n.occ = 1.
318
+ h_e.occ = 1.
319
+ n_pos = gemmi.Position(h_n.pos)
320
+ h_n.pos = cbox
321
+ h_e.pos = cbox + h_e.pos - n_pos
322
+ dc.add_atom_density_to_grid(h_e)
323
+ dc.add_c_contribution_to_grid(h_n, -1)
324
+ if not mode_all:
325
+ grid = gemmi.transform_map_to_f_phi(dc.grid)
326
+ asu_data = grid.prepare_asu_data(dmin=d_min, mott_bethe=True, unblur=dc.blur)
327
+ if w_s is not None:
328
+ asu_data.value_array[:] *= numpy.interp(1./asu_data.make_d_array(), w_s, w_w)
329
+
330
+ denmap = asu_data.transform_f_phi_to_map(exact_size=(int(box_size*10), int(box_size*10), int(box_size*10)))
331
+ m = numpy.unravel_index(numpy.argmax(denmap), denmap.shape)
332
+ peakpos = denmap.get_position(m[0], m[1], m[2])
333
+ if optimize: peakpos = maps.optimize_peak(denmap, peakpos)
334
+ atom.pos = peakpos - cbox + n_pos
335
+
336
+ if mode_all:
337
+ grid = gemmi.transform_map_to_f_phi(dc.grid)
338
+ asu_data = grid.prepare_asu_data(dmin=d_min, mott_bethe=True, unblur=dc.blur)
339
+ if w_s is not None:
340
+ asu_data.value_array[:] *= numpy.interp(1./asu_data.make_d_array(), w_s, w_w)
341
+ denmap = asu_data.transform_f_phi_to_map(sample_rate=3)
342
+ ccp4 = gemmi.Ccp4Map()
343
+ ccp4.grid = denmap
344
+ ccp4.update_ccp4_header(2, True) # float, update stats
345
+ ccp4.write_ccp4_map("debug.ccp4")
346
+
347
+ return st
348
+
349
+ # get_em_expected_hydrogen()
350
+
351
+ def translate_into_box(st, origin=None, apply_shift=True):
352
+ if origin is None: origin = gemmi.Position(0,0,0)
353
+
354
+ # apply unit cell translations to put model into a box (unit cell)
355
+ omat = numpy.array(st.cell.orthogonalization_matrix)
356
+ fmat = numpy.array(st.cell.fractionalization_matrix).transpose()
357
+ com = numpy.array((st[0].calculate_center_of_mass() - origin).tolist())
358
+ shift = sum([omat[:,i]*numpy.floor(1-numpy.dot(com, fmat[:,i])) for i in range(3)])
359
+ tr = gemmi.Transform(gemmi.Mat33(), gemmi.Vec3(*shift))
360
+ if apply_shift:
361
+ for m in st: m.transform_pos_and_adp(tr)
362
+ return shift
363
+ # translate_into_box()
364
+
365
+ def box_from_model(model, padding):
366
+ allpos = numpy.array([cra.atom.pos.tolist() for cra in model.all()])
367
+ ext = numpy.max(allpos, axis=0) - numpy.min(allpos, axis=0) + padding
368
+ cell = gemmi.UnitCell(ext[0], ext[1], ext[2], 90, 90, 90)
369
+ return cell
370
+ # box_from_model()
371
+
372
+ def cra_to_indices(cra, model):
373
+ ret = [None, None, None]
374
+ for ic in range(len(model)):
375
+ chain = model[ic]
376
+ if cra.chain != chain: continue
377
+ ret[0] = ic
378
+ for ir in range(len(chain)):
379
+ res = chain[ir]
380
+ if cra.residue != res: continue
381
+ ret[1] = ir
382
+ for ia in range(len(res)):
383
+ if cra.atom == res[ia]:
384
+ ret[2] = ia
385
+
386
+ return tuple(ret)
387
+ # cra_to_indices()
388
+
389
+ def cra_to_atomaddress(cra):
390
+ aa = gemmi.AtomAddress(cra.chain.name,
391
+ cra.residue.seqid, cra.residue.name,
392
+ cra.atom.name, cra.atom.altloc)
393
+ aa.res_id.segment = cra.residue.segment
394
+ return aa
395
+ # cra_to_atomaddress()
396
+
397
+ def find_special_positions(st, special_pos_threshold=0.2, fix_occ=True, fix_pos=True, fix_adp=True):
398
+ ns = gemmi.NeighborSearch(st[0], st.cell, 3).populate()
399
+ cs = gemmi.ContactSearch(special_pos_threshold * 2)
400
+ cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
401
+ cs.special_pos_cutoff_sq = 0
402
+ results = cs.find_contacts(ns)
403
+ found = {}
404
+ cra = {}
405
+ for r in results:
406
+ if r.partner1.atom != r.partner2.atom: continue
407
+ found.setdefault(r.partner1.atom, []).append(r.image_idx)
408
+ cra[r.partner1.atom] = r.partner1
409
+
410
+ if found: logger.writeln("Atoms on special position detected.")
411
+ tostr = lambda x: ", ".join("{:.3e}".format(v) for v in x)
412
+ ret = []
413
+ for atom in found:
414
+ images = found[atom]
415
+ n_images = len(images) + 1
416
+ sum_occ = atom.occ * n_images
417
+ logger.writeln(" {} multiplicity= {} images= {} occupancies_total= {:.2f}".format(cra[atom], n_images, images, sum_occ))
418
+ if sum_occ > 1.001 and fix_occ:
419
+ new_occ = atom.occ / n_images
420
+ logger.writeln(" correcting occupancy= {:.2f}".format(new_occ))
421
+ atom.occ = new_occ
422
+ if fix_pos:
423
+ fpos = gemmi.Fractional(st.cell.frac.apply(atom.pos))
424
+ fdiff = sum([(st.cell.images[i-1].apply(fpos) - fpos).wrap_to_zero() for i in images], gemmi.Fractional(0,0,0)) / n_images
425
+ diff = st.cell.orth.apply(fdiff)
426
+ atom.pos += gemmi.Position(diff)
427
+ logger.writeln(" correcting position= {}".format(tostr(atom.pos.tolist())))
428
+ logger.writeln(" pos_viol= {}".format(tostr(diff.tolist())))
429
+ if fix_adp and atom.aniso.nonzero():
430
+ aniso_bak = atom.aniso.elements_pdb()
431
+ fani = atom.aniso.transformed_by(st.cell.frac.mat)
432
+ fani_avg = sum([fani.transformed_by(st.cell.images[i-1].mat) for i in images], fani).scaled(1/n_images)
433
+ atom.aniso = fani_avg.transformed_by(st.cell.orth.mat)
434
+ diff = numpy.array(atom.aniso.elements_pdb()) - aniso_bak
435
+ logger.writeln(" correcting aniso= {}".format(tostr(atom.aniso.elements_pdb())))
436
+ logger.writeln(" aniso_viol= {}".format(tostr(diff)))
437
+
438
+ mats = [st.cell.orth.combine(st.cell.images[i-1]).combine(st.cell.frac).mat for i in images]
439
+ mat_total = (numpy.identity(3) + sum(numpy.array(m) for m in mats)) / n_images
440
+ mat_total_aniso = (numpy.identity(6) + sum(mat33_as66(m.tolist()) for m in mats)) / n_images
441
+ mat_total_aniso = numpy.linalg.pinv(mat_total_aniso)
442
+ ret.append((atom, images, mat_total, mat_total_aniso))
443
+
444
+ return ret
445
+ # find_special_positions()
446
+
447
+ def expand_ncs(st, special_pos_threshold=0.01, howtoname=gemmi.HowToNameCopiedChain.Short):
448
+ # TODO modify st.connections for atoms at special positions
449
+ if len(st.ncs) == 0: return
450
+ find_special_positions(st, special_pos_threshold) # just to show info, a bit waste of cpu time..
451
+ logger.writeln("Expanding symmetry..")
452
+ st.expand_ncs(howtoname, merge_dist=1e-4)
453
+ # expand_ncs()
454
+
455
+ def prepare_assembly(name, chains, ops, is_helical=False):
456
+ a = gemmi.Assembly(name)
457
+ g = gemmi.Assembly.Gen()
458
+ if sum(map(lambda x: x.tr.is_identity(), ops)) == 0:
459
+ g.operators.append(gemmi.Assembly.Operator()) # add identity
460
+ for i, nop in enumerate(ops):
461
+ op = gemmi.Assembly.Operator()
462
+ op.transform = nop.tr
463
+ if not nop.tr.is_identity():
464
+ if is_helical:
465
+ op.type = "helical symmetry operation"
466
+ else:
467
+ op.type = "point symmetry operation"
468
+ g.operators.append(op)
469
+ g.chains = chains
470
+ a.generators.append(g)
471
+ if is_helical:
472
+ a.special_kind = gemmi.AssemblySpecialKind.RepresentativeHelical
473
+ else:
474
+ a.special_kind = gemmi.AssemblySpecialKind.CompletePoint
475
+ return a
476
+ # prepare_assembly()
477
+
478
+ def filter_contacting_ncs(st, cutoff=5.):
479
+ if len(st.ncs) == 0: return
480
+ logger.writeln("Filtering out non-contacting NCS copies with cutoff={:.2f} A".format(cutoff))
481
+ st.setup_cell_images()
482
+ ns = gemmi.NeighborSearch(st[0], st.cell, cutoff*2).populate() # This is considered crystallographic cell if not 1 1 1. Undesirable result may be seen.
483
+ cs = gemmi.ContactSearch(cutoff)
484
+ cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
485
+ results = cs.find_contacts(ns)
486
+ indices = set([r.image_idx for r in results])
487
+ logger.writeln(" contacting copies: {}".format(indices))
488
+ ops = [st.ncs[i-1] for i in indices] # XXX is this correct? maybe yes as long as identity operator is not there
489
+ st.ncs.clear()
490
+ st.ncs.extend(ops)
491
+ # filter_contacting_ncs()
492
+
493
+ def check_symmetry_related_model_duplication(st, distance_cutoff=0.5, max_allowed_ratio=0.5):
494
+ logger.writeln("Checking if model in asu is given.")
495
+ n_atoms = st[0].count_atom_sites()
496
+ st.setup_cell_images()
497
+ ns = gemmi.NeighborSearch(st[0], st.cell, 3).populate()
498
+ cs = gemmi.ContactSearch(distance_cutoff)
499
+ cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
500
+ results = cs.find_contacts(ns)
501
+ n_contacting_atoms = len(set([a for r in results for a in (r.partner1.atom, r.partner2.atom)]))
502
+ logger.writeln(" N_atoms= {} N_contacting_atoms= {}".format(n_atoms, n_contacting_atoms))
503
+ return n_contacting_atoms / n_atoms > max_allowed_ratio # return True if too many contacts
504
+ # check_symmetry_related_model_duplication()
505
+
506
+ def adp_analysis(st, ignore_zero_occ=True):
507
+ logger.writeln("= ADP analysis =")
508
+ if ignore_zero_occ:
509
+ logger.writeln("(zero-occupancy atoms are ignored)")
510
+
511
+ all_B = []
512
+ for i, mol in enumerate(st):
513
+ if len(st) > 1: logger.writeln("Model {}:".format(i))
514
+ logger.writeln(" min Q1 med Q3 max")
515
+ stats = adp_stats_per_chain(mol, ignore_zero_occ)
516
+ for chain, natoms, qs in stats:
517
+ logger.writeln(("Chain {:3s}".format(chain) if chain!="*" else "All ") + " {:5.1f} {:5.1f} {:5.1f} {:5.1f} {:5.1f}".format(*qs))
518
+ logger.writeln("")
519
+ # adp_analysis()
520
+
521
+ def adp_stats_per_chain(model, ignore_zero_occ=True):
522
+ bs = {}
523
+ for cra in model.all():
524
+ if not ignore_zero_occ or cra.atom.occ > 0:
525
+ bs.setdefault(cra.chain.name, []).append(cra.atom.b_iso)
526
+
527
+ ret = []
528
+ for chain in model:
529
+ if chain.name in [x[0] for x in ret]: continue
530
+ qs = numpy.quantile(bs[chain.name], [0,0.25,0.5,0.75,1])
531
+ ret.append((chain.name, len(bs[chain.name]), qs))
532
+
533
+ if len(bs) > 1:
534
+ all_bs = sum(bs.values(), [])
535
+ qs = numpy.quantile(all_bs, [0,0.25,0.5,0.75,1])
536
+ ret.append(("*", len(all_bs), qs))
537
+
538
+ return ret
539
+ # adp_stats_per_chain()
540
+
541
+ def reset_adp(model, bfactor=None, is_aniso=False):
542
+ for cra in model.all():
543
+ if bfactor is not None:
544
+ cra.atom.b_iso = bfactor
545
+ if not is_aniso:
546
+ cra.atom.aniso = gemmi.SMat33f(0,0,0,0,0,0)
547
+ else:
548
+ if not cra.atom.aniso.nonzero() or bfactor is not None:
549
+ u = cra.atom.b_iso * b_to_u
550
+ cra.atom.aniso = gemmi.SMat33f(u, u, u, 0, 0, 0)
551
+ # reset_adp()
552
+
553
+ def shift_b(model, b):
554
+ u = b * b_to_u
555
+ for cra in model.all():
556
+ cra.atom.b_iso += b
557
+ if cra.atom.aniso.nonzero():
558
+ cra.atom.aniso.u11 += u
559
+ cra.atom.aniso.u22 += u
560
+ cra.atom.aniso.u33 += u
561
+ # shift_b()
562
+
563
+ def all_chain_ids(st):
564
+ return [chain.name for model in st for chain in model]
565
+ # all_chain_ids()
566
+
567
+ def all_B(st, ignore_zero_occ=True):
568
+ ret = []
569
+ for mol in st:
570
+ for cra in mol.all():
571
+ if not ignore_zero_occ or cra.atom.occ > 0:
572
+ ret.append(cra.atom.b_iso)
573
+
574
+ return ret
575
+ # all_B()
576
+
577
+ def mat33_as66(m):
578
+ # suppose R is a transformation matrix that is applied to 3x3 symmetric matrix U: R U R^T
579
+ # this function constructs equivalent transformation for 6-element vector: R' u
580
+ r = numpy.zeros((6,6))
581
+ for k, (i, j) in enumerate(((0,0), (1,1), (2,2), (0,1), (0,2), (1,2))):
582
+ r[k,:] = (m[i][0] * m[j][0],
583
+ m[i][1] * m[j][1],
584
+ m[i][2] * m[j][2],
585
+ m[i][0] * m[j][1] + m[i][1] * m[j][0],
586
+ m[i][0] * m[j][2] + m[i][2] * m[j][0],
587
+ m[i][1] * m[j][2] + m[i][2] * m[j][1])
588
+ return r
589
+ def adp_constraints(ops, cell, tr0=True):
590
+ # think about f = (b-Rb)^T (b-Rb) = b^T b - b^T R b -b^T R^T b + b^T R^T R b
591
+ # d^2f/db db^T = 2I - 2(R+R^T) + 2(R^T R)
592
+ # eigenvectors of this second derivative matrix corresponding to 0-valeud eigenvalues are directions to refine
593
+ x = numpy.zeros((6,6))
594
+ if tr0:
595
+ x[:3,:3] += numpy.ones((3,3)) * 2
596
+ for op in ops:
597
+ r = mat33_as66(cell.op_as_transform(op).mat.tolist())
598
+ x += 2 * numpy.identity(6) - 2 * (r + r.T) + 2 * numpy.dot(r.T, r)
599
+
600
+ evals, evecs = numpy.linalg.eigh(x)
601
+ ret = []
602
+ for i in range(6):
603
+ if numpy.isclose(evals[i], 0):
604
+ ret.append(evecs[:, i])
605
+
606
+ if len(ret) > 0:
607
+ ret = numpy.vstack(ret)
608
+ ret = numpy.where(numpy.abs(ret) < 1e-9, 0, ret)
609
+ return ret
610
+ return numpy.empty((0, 6))
611
+ # adp_constraints()
612
+
613
+ def to_dataframe(st):
614
+ keys = ("model", "chain", "resn", "subchain", "segment", "seqnum", "icode", "altloc",
615
+ "u11", "u22", "u33", "u12", "u13", "u23",
616
+ "b_iso", "charge", "elem", "atom", "occ",
617
+ "x", "y", "z", "tlsgroup")
618
+ d = dict([(x,[]) for x in keys])
619
+ app = lambda k, v: d[k].append(v)
620
+
621
+ for m in st:
622
+ for cra in m.all():
623
+ c,r,a = cra.chain, cra.residue, cra.atom
624
+ # TODO need support r.het_flag, r.flag, a.calc_flag, a.flag, a.serial?
625
+ app("model", m.name)
626
+ app("chain", c.name)
627
+ app("resn", r.name)
628
+ app("subchain", r.subchain)
629
+ app("segment", r.segment)
630
+ app("seqnum", r.seqid.num)
631
+ app("icode", r.seqid.icode)
632
+ app("altloc", a.altloc)
633
+ app("u11", a.aniso.u11)
634
+ app("u22", a.aniso.u22)
635
+ app("u33", a.aniso.u33)
636
+ app("u12", a.aniso.u12)
637
+ app("u13", a.aniso.u13)
638
+ app("u23", a.aniso.u23)
639
+ app("b_iso", a.b_iso)
640
+ app("charge", a.charge)
641
+ app("elem", a.element.name)
642
+ app("atom", a.name)
643
+ app("occ", a.occ)
644
+ app("x", a.pos.x)
645
+ app("y", a.pos.y)
646
+ app("z", a.pos.z)
647
+ app("tlsgroup", a.tls_group_id)
648
+
649
+ return pandas.DataFrame(data=d)
650
+ # to_dataframe()
651
+
652
+ def from_dataframe(df, st=None): # Slow!
653
+ if st is None:
654
+ st = gemmi.Structure()
655
+ else:
656
+ st = st.clone()
657
+ for i in range(len(st)):
658
+ del st[0]
659
+
660
+ for m_name, dm in df.groupby("model"):
661
+ st.add_model(gemmi.Model(m_name))
662
+ m = st[-1]
663
+ for c_name, dc in dm.groupby("chain"):
664
+ m.add_chain(gemmi.Chain(c_name))
665
+ c = m[-1]
666
+ for rkey, dr in dc.groupby(["seqnum","icode","resn","segment","subchain"]):
667
+ c.add_residue(gemmi.Residue())
668
+ r = c[-1]
669
+ r.seqid.num = rkey[0]
670
+ r.seqid.icode = rkey[1]
671
+ r.name = rkey[2]
672
+ r.segment = rkey[3]
673
+ r.subchain = rkey[4]
674
+ for _, row in dr.iterrows():
675
+ r.add_atom(gemmi.Atom())
676
+ a = r[-1]
677
+ a.altloc = row["altloc"]
678
+ a.name = row["atom"]
679
+ a.aniso.u11 = row["u11"]
680
+ a.aniso.u22 = row["u22"]
681
+ a.aniso.u33 = row["u33"]
682
+ a.aniso.u12 = row["u12"]
683
+ a.aniso.u13 = row["u13"]
684
+ a.aniso.u23 = row["u23"]
685
+ a.b_iso = row["b_iso"]
686
+ a.charge = row["charge"]
687
+ a.element = gemmi.Element(row["elem"])
688
+ a.occ = row["occ"]
689
+ a.pos.x = row["x"]
690
+ a.pos.y = row["y"]
691
+ a.pos.z = row["z"]
692
+ a.tls_group_id = row["tlsgroup"]
693
+
694
+ return st
695
+ # from_dataframe()
696
+
697
+ def st_from_positions(positions, bs=None, qs=None):
698
+ st = gemmi.Structure()
699
+ st.add_model(gemmi.Model("1"))
700
+ st[0].add_chain(gemmi.Chain("A"))
701
+ c = st[0][0]
702
+ if bs is None: bs = (0. for _ in range(len(positions)))
703
+ if qs is None: qs = (1. for _ in range(len(positions)))
704
+ for i, (pos, b, q) in enumerate(zip(positions, bs, qs)):
705
+ c.add_residue(gemmi.Residue())
706
+ r = c[-1]
707
+ r.seqid.num = i
708
+ r.name = "HOH"
709
+ r.add_atom(gemmi.Atom())
710
+ a = r[-1]
711
+ a.name = "O"
712
+ a.element = gemmi.Element("O")
713
+ a.pos = pos
714
+ a.b_iso = b
715
+ a.occ = q
716
+
717
+ return st
718
+ # st_from_positions()
719
+
720
+ def invert_model(st):
721
+ # invert x-axis
722
+ A = numpy.array(st.cell.orthogonalization_matrix.tolist())
723
+ center = numpy.sum(A,axis=1) / 2
724
+ center = gemmi.Vec3(*center)
725
+ mat = gemmi.Mat33([[-1,0,0],[0,1,0],[0,0,1]])
726
+ vec = mat.multiply(-center) + center
727
+ tr = gemmi.Transform(mat, vec)
728
+ st[0].transform_pos_and_adp(tr)
729
+
730
+ # invert peptides
731
+ # invert_model()
732
+
733
+ def cx_to_mx(ss): #SmallStructure to Structure
734
+ st = gemmi.Structure()
735
+ st.spacegroup_hm = ss.spacegroup_hm
736
+ st.cell = ss.cell
737
+ st.add_model(gemmi.Model("1"))
738
+ st[-1].add_chain(gemmi.Chain("A"))
739
+ st[-1][-1].add_residue(gemmi.Residue())
740
+ st[-1][-1][-1].seqid.num = 1
741
+ st[-1][-1][-1].name = "00"
742
+
743
+ ruc = ss.cell.reciprocal()
744
+ cif2cart = ss.cell.orthogonalization_matrix.multiply_by_diagonal(gemmi.Vec3(ruc.a, ruc.b, ruc.c))
745
+ as_smat33f = lambda x: gemmi.SMat33f(x.u11, x.u22, x.u33, x.u12, x.u13, x.u23)
746
+
747
+ for site in ss.sites:
748
+ st[-1][-1][-1].add_atom(gemmi.Atom())
749
+ a = st[-1][-1][-1][-1]
750
+ a.name = site.label
751
+ a.aniso = as_smat33f(site.aniso.transformed_by(cif2cart))
752
+ a.b_iso = site.u_iso * u_to_b
753
+ #a.charge = ?
754
+ a.element = site.element
755
+ a.occ = site.occ
756
+ a.pos = site.orth(ss.cell)
757
+
758
+ return st
759
+ # cx_to_mx()
760
+
761
+ def fix_deuterium_residues(st):
762
+ # we do not have DOD. will not change ND4->NH4 and SPW->SPK, as hydrogen atom names are different
763
+ n_changed = 0
764
+ for chain in st[0]:
765
+ for res in chain:
766
+ if res.name == "DOD":
767
+ res.name = "HOH"
768
+ n_changed += 1
769
+ for con in st.connections:
770
+ for p in (con.partner1, con.partner2):
771
+ if p.res_id.name == "DOD":
772
+ p.res_id.name = "HOH"
773
+ if n_changed > 0:
774
+ logger.writeln("Warning: {} DOD residues have been renamed to HOH".format(n_changed))