servalcat 0.4.131__cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-314t-x86_64-linux-gnu.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +1162 -0
  7. servalcat/refine/refine_geom.py +245 -0
  8. servalcat/refine/refine_spa.py +400 -0
  9. servalcat/refine/refine_xtal.py +339 -0
  10. servalcat/refine/spa.py +151 -0
  11. servalcat/refine/xtal.py +312 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +191 -0
  14. servalcat/refmac/refmac_keywords.py +660 -0
  15. servalcat/refmac/refmac_wrapper.py +423 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +488 -0
  18. servalcat/spa/fsc.py +391 -0
  19. servalcat/spa/localcc.py +197 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +979 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1629 -0
  27. servalcat/utils/fileio.py +836 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +811 -0
  30. servalcat/utils/logger.py +140 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +933 -0
  33. servalcat/utils/refmac.py +759 -0
  34. servalcat/utils/restraints.py +888 -0
  35. servalcat/utils/symmetry.py +298 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +262 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1954 -0
  40. servalcat/xtal/twin.py +316 -0
  41. servalcat-0.4.131.dist-info/METADATA +60 -0
  42. servalcat-0.4.131.dist-info/RECORD +45 -0
  43. servalcat-0.4.131.dist-info/WHEEL +6 -0
  44. servalcat-0.4.131.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.131.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,933 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ import gemmi
10
+ import numpy
11
+ import pandas
12
+ import scipy.sparse
13
+ import os
14
+ import time
15
+ import itertools
16
+ import string
17
+ from servalcat import ext
18
+
19
+ gemmi.IT92_normalize()
20
+ gemmi.IT92_set_ignore_charge(False)
21
+ gemmi.Element("X").it92.set_coefs(gemmi.Element("O").it92.get_coefs()) # treat X (unknown) as O
22
+ ext.IT92_normalize_etc(gemmi.Element("O")) # the same changes to gemmi in servalcat c++ code
23
+ u_to_b = 8 * numpy.pi**2
24
+ b_to_u = 1. / u_to_b
25
+
26
+ from servalcat.utils import logger
27
+ from servalcat.utils import restraints
28
+ from servalcat.utils import maps
29
+
30
+ def shake_structure(st, sigma, copy=True):
31
+ print("Randomizing structure with rmsd of {}".format(sigma))
32
+ if copy:
33
+ st2 = st.clone()
34
+ else:
35
+ st2 = st
36
+
37
+ sigma /= numpy.sqrt(3)
38
+ for model in st2:
39
+ for cra in model.all():
40
+ r = numpy.random.normal(0, sigma, 3)
41
+ cra.atom.pos += gemmi.Position(*r)
42
+
43
+ return st2
44
+ # shake_structure()
45
+
46
+ def setup_entities(st, clear=False, overwrite_entity_type=False, force_subchain_names=False, fix_sequences=None):
47
+ if clear:
48
+ st.entities.clear()
49
+ st.add_entity_ids(overwrite=True) # clear entity_id so that ensure_entities() will work properly
50
+ st.add_entity_types(overwrite_entity_type)
51
+ st.assign_subchains(force_subchain_names)
52
+ st.ensure_entities()
53
+ st.add_entity_ids()
54
+ st.deduplicate_entities()
55
+
56
+ # TODO Add an option to set user-given sequences (use assign_best_sequences)
57
+ if fix_sequences:
58
+ for e in st.entities:
59
+ if not e.full_sequence and e.entity_type == gemmi.EntityType.Polymer and e.subchains:
60
+ rspan = st[0].get_subchain(e.subchains[0])
61
+ e.full_sequence = [r.name for r in rspan]
62
+
63
+ # fix label_seq_id
64
+ for chain in st[0]:
65
+ for res in chain:
66
+ res.label_seq = None
67
+ st.assign_label_seq_id()
68
+ # setup_entities()
69
+
70
+ def determine_blur_for_dencalc(st, grid):
71
+ b_min = st[0].calculate_b_aniso_range()[0]
72
+ b_need = grid**2*8*numpy.pi**2/1.1 # Refmac's way
73
+ b_add = b_need - b_min
74
+ return b_add
75
+ # determine_blur_for_dencalc()
76
+
77
+ def remove_charge(sts):
78
+ nonzero = False
79
+ for st in sts:
80
+ for cra in st[0].all():
81
+ if cra.atom.charge != 0: nonzero = True
82
+ cra.atom.charge = 0
83
+ if nonzero:
84
+ logger.writeln("Warning: all atomic charges were set to zero.")
85
+ # remove_charge()
86
+
87
+ class CustomCoefUtil:
88
+ def __init__(self):
89
+ self.scat_lookup = {} # atom_key: scat_id
90
+ self.elem_lookup = {} # scat_id: set of element_name; there should be a single element for each id though
91
+ self.coeffs = {} # scat_id: coeffs
92
+
93
+ def cra2key(self, cra):
94
+ return (cra.chain.name, cra.residue.seqid,
95
+ cra.atom.name, cra.atom.altloc)
96
+
97
+ def read_from_cif(self, st, cif_in):
98
+ doc = gemmi.cif.read(cif_in)
99
+ # gemmi reads structure from the first block
100
+ block = doc[0]
101
+
102
+ # _atom_site.id is read as atom.serial
103
+ # it is mmcif writer's responsibility to ensure unique serials
104
+ serial2cra = {x.atom.serial: x for x in st[0].all()}
105
+ self.scat_lookup = {}
106
+ for r in block.find("_atom_site.", ["id", "scat_id"]):
107
+ atom_id = gemmi.cif.as_int(r[0])
108
+ scat_id = gemmi.cif.as_int(r[1])
109
+ cra = serial2cra[atom_id]
110
+ self.scat_lookup[self.cra2key(cra)] = scat_id
111
+ self.elem_lookup.setdefault(scat_id, set()).add(cra.atom.element.name)
112
+
113
+ # read coeffs
114
+ self.coeffs = {gemmi.cif.as_int(r[0]): [gemmi.cif.as_number(r[i]) for i in range(1, 11)]
115
+ for r in block.find("_lmb_scat_coef.", ["scat_id",
116
+ "coef_a1", "coef_a2", "coef_a3", "coef_a4", "coef_a5",
117
+ "coef_b1", "coef_b2", "coef_b3", "coef_b4", "coef_b5"])}
118
+ # read_from_cif()
119
+
120
+ def set_coeffs(self, st):
121
+ #logger.writeln("debug: using c4322")
122
+ max_serial = max(cra.atom.serial for cra in st[0].all())
123
+ pp = [[0.]*10 for _ in range(max_serial+1)]
124
+ for cra in st[0].all():
125
+ scat_id = self.scat_lookup.get(self.cra2key(cra))
126
+ if scat_id is None:
127
+ raise RuntimeError(f"scat_id unknown {cra}")
128
+ pp[cra.atom.serial] = self.coeffs[scat_id]
129
+ #pp[cra.atom.serial] = cra.atom.element.c4322.get_coefs() # test
130
+ gemmi.set_custom_form_factors(pp)
131
+ ext.set_custom_form_factors(pp)
132
+ # set_coeffs()
133
+
134
+ def show_info(self):
135
+ logger.writeln("Custom atomic scattering factors will be used")
136
+ df = pandas.DataFrame([[k]+v for k, v in self.coeffs.items()],
137
+ columns=["scat_id"] +[f"{k}{i+1}" for k in ("a", "b") for i in range(5)])
138
+ df["count"] = [list(self.scat_lookup.values()).count(i) for i in df["scat_id"]]
139
+ df["elem"] = [" ".join(self.elem_lookup[i]) for i in df["scat_id"]]
140
+ logger.writeln(df.to_string(index=False))
141
+ logger.writeln("")
142
+ # show_info()
143
+ # class CustomCoefUtil
144
+
145
+ def check_atomsf(sts, source, mott_bethe=True, wavelength=None):
146
+ assert source in ("xray", "electron", "neutron")
147
+ if source != "electron": mott_bethe = False
148
+ if wavelength is not None: assert source == "xray"
149
+ logger.writeln("Atomic scattering factors for {}".format("xray (use Mott-Bethe to convert to electrons)" if mott_bethe else source))
150
+ if source != "xray" and not mott_bethe:
151
+ logger.writeln(" Note that charges will be ignored")
152
+ el_charges = {(cra.atom.element, cra.atom.charge) for st in sts for cra in st[0].all()}
153
+ elems = {x[0] for x in el_charges}
154
+ if wavelength is not None and source == "xray":
155
+ addends = gemmi.Addends()
156
+ addends2 = gemmi.Addends()
157
+ logger.writeln(f"f' and f'' for lambda= {wavelength} A will be used.")
158
+ for el in elems:
159
+ fp, fpp = gemmi.cromer_liberman(el.atomic_number, gemmi.hc / wavelength)
160
+ addends.set(el, fp)
161
+ addends2.set(el, fpp)
162
+ else:
163
+ addends, addends2 = None, None
164
+ tmp = {}
165
+ if source == "xray" or mott_bethe:
166
+ shown = set()
167
+ for el, charge in sorted(el_charges, key=lambda x: (x[0].atomic_number, x[1])):
168
+ sf = gemmi.IT92_get_exact(el, charge)
169
+ if not sf:
170
+ logger.writeln(" Warning: no scattering factor found for {}{:+}".format(el.name, charge))
171
+ sf = el.it92
172
+ charge = 0
173
+ if (el, charge) in shown: continue
174
+ label = el.name if charge == 0 else "{}{:+}".format(el.name, charge)
175
+ shown.add((el, charge))
176
+ tmp[label] = {**{f"{k}{i+1}": x for k in ("a", "b") for i, x in enumerate(getattr(sf, k))}, "c": sf.c}
177
+ if addends and addends2:
178
+ tmp[label]["f'"] = addends.get(el)
179
+ tmp[label]["f''"] = addends2.get(el)
180
+ else:
181
+ for el in sorted(elems, key=lambda x: x.atomic_number):
182
+ if source == "electron":
183
+ tmp[el.name] = {f"{k}{i+1}": x for k in ("a", "b") for i, x in enumerate(getattr(el.c4322, k))}
184
+ else:
185
+ tmp[el.name] = {"a": el.neutron92.get_coefs()[0]}
186
+ with logger.with_prefix(" "):
187
+ logger.writeln(pandas.DataFrame(tmp).T.to_string())
188
+ logger.writeln("")
189
+ return addends, addends2
190
+ # check_atomsf()
191
+
192
+ def calc_sum_ab(st):
193
+ sum_ab = dict()
194
+ ret = 0.
195
+ for cra in st[0].all():
196
+ if cra.atom.element not in sum_ab:
197
+ it92 = cra.atom.element.it92
198
+ sum_ab[cra.atom.element] = sum(x*y for x,y in zip(it92.a, it92.b))
199
+ ret += sum_ab[cra.atom.element] * cra.atom.occ
200
+ return ret
201
+ # calc_sum_ab()
202
+
203
+ def calc_fc_fft(st, d_min, source, mott_bethe=True, monlib=None, blur=None, cutoff=1e-5, rate=1.5,
204
+ omit_proton=False, omit_h_electron=False, miller_array=None, addends=None):
205
+ assert source in ("xray", "electron", "neutron", "custom")
206
+ if source != "electron": mott_bethe = False
207
+ topo = None
208
+ if st[0].has_hydrogen():
209
+ st = st.clone()
210
+ if source == "neutron":
211
+ # nothing happens if not st.has_d_fraction
212
+ st.store_deuterium_as_fraction(False)
213
+ if omit_proton or omit_h_electron:
214
+ assert mott_bethe
215
+ if omit_proton and omit_h_electron:
216
+ logger.writeln("omit_proton and omit_h_electron requested. removing hydrogens")
217
+ st.remove_hydrogens()
218
+ omit_proton = omit_h_electron = False
219
+ if mott_bethe and not omit_proton and monlib is not None:
220
+ topo = gemmi.prepare_topology(st, monlib, warnings=logger, ignore_unknown_links=True)
221
+ resnames = st[0].get_all_residue_names()
222
+ restraints.check_monlib_support_nucleus_distances(monlib, resnames)
223
+ # Shift electron positions
224
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.ElectronCloud)
225
+ elif omit_proton or omit_h_electron:
226
+ logger.writeln("WARNING: omit_proton/h_electron requested, but no hydrogen exists!")
227
+ omit_proton = omit_h_electron = False
228
+
229
+ # for printing
230
+ method_str = ""
231
+ if mott_bethe:
232
+ if omit_proton:
233
+ method_str += "proton-omit "
234
+ elif omit_h_electron:
235
+ if topo is None:
236
+ method_str += "hydrogen electron-omit "
237
+ else:
238
+ method_str += "hydrogen electron-omit, proton-shifted "
239
+ elif topo is not None:
240
+ method_str += "proton-shifted "
241
+ method_str += f"Fc with {source} scattering factors"
242
+ if mott_bethe:
243
+ method_str += " through Mott-Bethe formula from X-ray sf"
244
+ logger.writeln(f"Calculating {method_str}..")
245
+
246
+ if blur is None: blur = determine_blur_for_dencalc(st, d_min/2/rate)
247
+ #blur = max(0, blur) # negative blur may cause non-positive definite in case of anisotropic Bs
248
+ logger.writeln(" Setting blur= {:.2f} in density calculation (unblurred later)".format(blur))
249
+
250
+ if source == "xray" or mott_bethe:
251
+ dc = gemmi.DensityCalculatorX()
252
+ elif source == "electron":
253
+ dc = gemmi.DensityCalculatorE()
254
+ elif source == "neutron":
255
+ dc = gemmi.DensityCalculatorN()
256
+ elif source == "custom":
257
+ dc = gemmi.DensityCalculatorC()
258
+ else:
259
+ raise RuntimeError("unknown source")
260
+
261
+ dc.d_min = d_min
262
+ dc.blur = blur
263
+ dc.cutoff = cutoff
264
+ dc.rate = rate
265
+ dc.grid.setup_from(st)
266
+
267
+ t_start = time.time()
268
+ if mott_bethe:
269
+ dc.initialize_grid()
270
+ dc.addends.subtract_z(except_hydrogen=True)
271
+
272
+ if omit_h_electron:
273
+ st2 = st.clone()
274
+ st2.remove_hydrogens()
275
+ dc.add_model_density_to_grid(st2[0])
276
+ else:
277
+ dc.add_model_density_to_grid(st[0])
278
+
279
+ # Subtract hydrogen Z
280
+ if not omit_proton and st[0].has_hydrogen():
281
+ if topo is not None:
282
+ # Shift proton positions
283
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus,
284
+ default_scale=restraints.default_proton_scale)
285
+ for cra in st[0].all():
286
+ if cra.atom.is_hydrogen():
287
+ dc.add_c_contribution_to_grid(cra.atom, -1)
288
+
289
+ dc.grid.symmetrize_sum()
290
+ sum_ab = calc_sum_ab(st) * len(st.find_spacegroup().operations())
291
+ mb_000 = sum_ab * gemmi.mott_bethe_const() / 4
292
+ else:
293
+ if addends is not None: dc.addends = addends
294
+ dc.put_model_density_on_grid(st[0])
295
+ mb_000 = 0
296
+
297
+ logger.writeln(" done. Fc calculation time: {:.1f} s".format(time.time() - t_start))
298
+ grid = gemmi.transform_map_to_f_phi(dc.grid)
299
+
300
+ if miller_array is None:
301
+ return grid.prepare_asu_data(dmin=d_min, mott_bethe=mott_bethe, unblur=dc.blur)
302
+ else:
303
+ # TODO remove this with gemmi 0.7.5
304
+ if not miller_array.flags.writeable:
305
+ miller_array = miller_array.copy()
306
+ return grid.get_value_by_hkl(miller_array, mott_bethe=mott_bethe, unblur=dc.blur,
307
+ mott_bethe_000=mb_000)
308
+ # calc_fc_fft()
309
+
310
+ def calc_fcpp_fft(st, d_min, addends2, blur=None, cutoff=1e-5, rate=1.5, miller_array=None): # f'' contribution
311
+ if blur is None: blur = determine_blur_for_dencalc(st, d_min/2/rate)
312
+ dc2 = gemmi.DensityCalculatorZ()
313
+ dc2.d_min = d_min
314
+ dc2.blur = blur
315
+ dc2.cutoff = cutoff
316
+ dc2.rate = rate
317
+ dc2.grid.setup_from(st)
318
+ dc2.addends = addends2
319
+ dc2.put_model_density_on_grid(st[0])
320
+ grid2 = gemmi.transform_map_to_f_phi(dc2.grid)
321
+ if miller_array is None:
322
+ return grid2.prepare_asu_data(dmin=d_min, unblur=dc2.blur)
323
+ else:
324
+ # TODO remove this with gemmi 0.7.5
325
+ if not miller_array.flags.writeable:
326
+ miller_array = miller_array.copy()
327
+ return grid2.get_value_by_hkl(miller_array, unblur=dc2.blur)
328
+ #return (fc + numpy.vstack([fpp * 1j, -fpp * 1j])).T # shape of (nref, 2); F+ and F-*
329
+ # calc_fcpp_fft()
330
+
331
+ def calc_fc_direct(st, d_min, source, mott_bethe, monlib=None, miller_array=None):
332
+ assert source in ("xray", "electron")
333
+ if source != "electron": mott_bethe = False
334
+
335
+ miller_array_given = miller_array is not None
336
+ unit_cell = st.cell
337
+ spacegroup = gemmi.SpaceGroup(st.spacegroup_hm)
338
+ if not miller_array_given: miller_array = gemmi.make_miller_array(unit_cell, spacegroup, d_min)
339
+ topo = None
340
+
341
+ if source == "xray" or mott_bethe:
342
+ calc = gemmi.StructureFactorCalculatorX(st.cell)
343
+ else:
344
+ calc = gemmi.StructureFactorCalculatorE(st.cell)
345
+
346
+
347
+ if source == "electron" and mott_bethe:
348
+ if monlib is not None and st[0].has_hydrogen():
349
+ st = st.clone()
350
+ topo = gemmi.prepare_topology(st, monlib, warnings=logger, ignore_unknown_links=True)
351
+ resnames = st[0].get_all_residue_names()
352
+ restraints.check_monlib_support_nucleus_distances(monlib, resnames)
353
+
354
+ calc.addends.clear()
355
+ calc.addends.subtract_z(except_hydrogen=True)
356
+
357
+ vals = []
358
+ for hkl in miller_array:
359
+ sf = calc.calculate_sf_from_model(st[0], hkl) # attention: traverse cell.images
360
+ if mott_bethe: sf *= calc.mott_bethe_factor()
361
+ vals.append(sf)
362
+
363
+ if topo is not None:
364
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus,
365
+ default_scale=restraints.default_proton_scale)
366
+
367
+ for i, hkl in enumerate(miller_array):
368
+ sf = calc.calculate_mb_z(st[0], hkl, only_h=True)
369
+ if mott_bethe: sf *= calc.mott_bethe_factor()
370
+ vals[i] += sf
371
+
372
+ if miller_array_given:
373
+ return numpy.array(vals)
374
+ else:
375
+ asu = gemmi.ComplexAsuData(unit_cell, spacegroup,
376
+ miller_array, vals)
377
+ return asu
378
+ # calc_fc_direct()
379
+
380
+ def get_em_expected_hydrogen(st, d_min, monlib, weights=None, blur=None, cutoff=1e-5, rate=1.5, optimize=False):
381
+ # Very crude implementation to find peak from calculated map
382
+ assert st[0].has_hydrogen()
383
+ if blur is None: blur = determine_blur_for_dencalc(st, d_min/2/rate)
384
+ blur = max(0, blur)
385
+ logger.writeln("Setting blur= {:.2f} in density calculation".format(blur))
386
+
387
+ st = st.clone()
388
+ topo = gemmi.prepare_topology(st, monlib, warnings=logger, ignore_unknown_links=True)
389
+ resnames = st[0].get_all_residue_names()
390
+ restraints.check_monlib_support_nucleus_distances(monlib, resnames)
391
+
392
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.ElectronCloud)
393
+ st_e = st.clone()
394
+ topo.adjust_hydrogen_distances(gemmi.Restraints.DistanceOf.Nucleus)
395
+ st_n = st.clone()
396
+
397
+ dc = gemmi.DensityCalculatorX()
398
+ dc.d_min = d_min
399
+ dc.blur = blur
400
+ dc.cutoff = cutoff
401
+ dc.rate = rate
402
+
403
+ # Decide box_size
404
+ max_r = max([dc.estimate_radius(cra.atom) for cra in st[0].all()])
405
+ logger.writeln("max_r= {:.2f}".format(max_r))
406
+ box_size = max_r*2 + 1 # padding
407
+ logger.writeln("box_size= {:.2f}".format(box_size))
408
+ mode_all = False #True
409
+ if mode_all:
410
+ dc.grid.setup_from(st)
411
+ else:
412
+ dc.grid.unit_cell = gemmi.UnitCell(box_size, box_size, box_size, 90, 90, 90)
413
+ dc.grid.spacegroup = gemmi.SpaceGroup("P1")
414
+ cbox = gemmi.Position(box_size/2, box_size/2, box_size/2)
415
+
416
+ if mode_all: dc.initialize_grid()
417
+
418
+ if weights is not None:
419
+ w_s, w_w = weights # s_list and w_list
420
+ else:
421
+ w_s, w_w = None, None
422
+
423
+ for ichain in range(len(st[0])):
424
+ chain = st[0][ichain]
425
+ for ires in range(len(chain)):
426
+ residue = chain[ires]
427
+ for iatom in range(len(residue)):
428
+ atom = residue[iatom]
429
+ if not atom.is_hydrogen(): continue
430
+ h_n = st_n[0][ichain][ires][iatom]
431
+ h_e = st_e[0][ichain][ires][iatom]
432
+ if not mode_all:
433
+ dc.initialize_grid()
434
+ h_n.occ = 1.
435
+ h_e.occ = 1.
436
+ n_pos = gemmi.Position(h_n.pos)
437
+ h_n.pos = cbox
438
+ h_e.pos = cbox + h_e.pos - n_pos
439
+ dc.add_atom_density_to_grid(h_e)
440
+ dc.add_c_contribution_to_grid(h_n, -1)
441
+ if not mode_all:
442
+ grid = gemmi.transform_map_to_f_phi(dc.grid)
443
+ asu_data = grid.prepare_asu_data(dmin=d_min, mott_bethe=True, unblur=dc.blur)
444
+ if w_s is not None:
445
+ asu_data.value_array[:] *= numpy.interp(1./asu_data.make_d_array(), w_s, w_w)
446
+
447
+ denmap = asu_data.transform_f_phi_to_map(exact_size=(int(box_size*10), int(box_size*10), int(box_size*10)))
448
+ m = numpy.unravel_index(numpy.argmax(denmap), denmap.shape)
449
+ peakpos = denmap.get_position(m[0], m[1], m[2])
450
+ if optimize: peakpos = maps.optimize_peak(denmap, peakpos)
451
+ atom.pos = peakpos - cbox + n_pos
452
+
453
+ if mode_all:
454
+ grid = gemmi.transform_map_to_f_phi(dc.grid)
455
+ asu_data = grid.prepare_asu_data(dmin=d_min, mott_bethe=True, unblur=dc.blur)
456
+ if w_s is not None:
457
+ asu_data.value_array[:] *= numpy.interp(1./asu_data.make_d_array(), w_s, w_w)
458
+ denmap = asu_data.transform_f_phi_to_map(sample_rate=3)
459
+ ccp4 = gemmi.Ccp4Map()
460
+ ccp4.grid = denmap
461
+ ccp4.update_ccp4_header(2, True) # float, update stats
462
+ ccp4.write_ccp4_map("debug.ccp4")
463
+
464
+ return st
465
+
466
+ # get_em_expected_hydrogen()
467
+
468
+ def translate_into_box(st, origin=None, apply_shift=True):
469
+ if origin is None: origin = gemmi.Position(0,0,0)
470
+
471
+ # apply unit cell translations to put model into a box (unit cell)
472
+ omat = st.cell.orth.mat.array
473
+ fmat = st.cell.frac.mat.array.transpose()
474
+ com = numpy.array((st[0].calculate_center_of_mass() - origin).tolist())
475
+ shift = sum([omat[:,i]*numpy.floor(1-numpy.dot(com, fmat[:,i])) for i in range(3)])
476
+ tr = gemmi.Transform(gemmi.Mat33(), gemmi.Vec3(*shift))
477
+ if apply_shift:
478
+ for m in st: m.transform_pos_and_adp(tr)
479
+ return shift
480
+ # translate_into_box()
481
+
482
+ def box_from_model(model, padding):
483
+ allpos = numpy.array([cra.atom.pos.tolist() for cra in model.all()])
484
+ ext = numpy.max(allpos, axis=0) - numpy.min(allpos, axis=0) + padding
485
+ cell = gemmi.UnitCell(ext[0], ext[1], ext[2], 90, 90, 90)
486
+ return cell
487
+ # box_from_model()
488
+
489
+ def cra_to_indices(cra, model):
490
+ ret = [None, None, None]
491
+ for ic in range(len(model)):
492
+ chain = model[ic]
493
+ if cra.chain != chain: continue
494
+ ret[0] = ic
495
+ for ir in range(len(chain)):
496
+ res = chain[ir]
497
+ if cra.residue != res: continue
498
+ ret[1] = ir
499
+ for ia in range(len(res)):
500
+ if cra.atom == res[ia]:
501
+ ret[2] = ia
502
+
503
+ return tuple(ret)
504
+ # cra_to_indices()
505
+
506
+ def cra_to_atomaddress(cra):
507
+ aa = gemmi.AtomAddress(cra.chain.name,
508
+ cra.residue.seqid, cra.residue.name,
509
+ cra.atom.name, cra.atom.altloc)
510
+ aa.res_id.segment = cra.residue.segment
511
+ return aa
512
+ # cra_to_atomaddress()
513
+
514
+ def check_occupancies(st, raise_error=False):
515
+ bad = []
516
+ for cra in st[0].all():
517
+ if not 0 <= cra.atom.occ <= 1 + 1e-6:
518
+ bad.append(cra)
519
+ if bad:
520
+ logger.writeln("Bad occupancies:")
521
+ for cra in bad:
522
+ logger.writeln(f" {cra} occ= {cra.atom.occ:.4f}")
523
+ if raise_error:
524
+ raise RuntimeError("Please check your model and fix bad occupancies")
525
+ # check_occupancies()
526
+
527
+ def find_special_positions(st, special_pos_threshold=0.2, fix_occ=True, fix_pos=True, fix_adp=True):
528
+ ns = gemmi.NeighborSearch(st[0], st.cell, 3).populate()
529
+ cs = gemmi.ContactSearch(special_pos_threshold * 2)
530
+ cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
531
+ cs.special_pos_cutoff_sq = 0
532
+ results = cs.find_contacts(ns)
533
+ found = {}
534
+ cra = {}
535
+ for r in results:
536
+ if r.partner1.atom != r.partner2.atom: continue
537
+ found.setdefault(r.partner1.atom, []).append(r.image_idx)
538
+ cra[r.partner1.atom] = r.partner1
539
+
540
+ if found: logger.writeln("Atoms on special position detected.")
541
+ tostr = lambda x: ", ".join("{:.3e}".format(v) for v in x)
542
+ ret = []
543
+ for atom in found:
544
+ images = found[atom]
545
+ n_images = len(images) + 1
546
+ sum_occ = atom.occ * n_images
547
+ logger.writeln(" {} multiplicity= {} images= {} occupancies_total= {:.2f}".format(cra[atom], n_images, images, sum_occ))
548
+ if sum_occ > 1.001 and fix_occ:
549
+ new_occ = atom.occ / n_images
550
+ logger.writeln(" correcting occupancy= {:.2f}".format(new_occ))
551
+ atom.occ = new_occ
552
+ if fix_pos:
553
+ fpos = gemmi.Fractional(st.cell.frac.apply(atom.pos))
554
+ fdiff = sum([(st.cell.images[i-1].apply(fpos) - fpos).wrap_to_zero() for i in images], gemmi.Fractional(0,0,0)) / n_images
555
+ diff = st.cell.orth.apply(fdiff)
556
+ atom.pos += gemmi.Position(diff)
557
+ logger.writeln(" correcting position= {}".format(tostr(atom.pos.tolist())))
558
+ logger.writeln(" pos_viol= {}".format(tostr(diff.tolist())))
559
+ if fix_adp and atom.aniso.nonzero():
560
+ aniso_bak = atom.aniso.elements_pdb()
561
+ fani = atom.aniso.transformed_by(st.cell.frac.mat)
562
+ fani_avg = sum([fani.transformed_by(st.cell.images[i-1].mat) for i in images], fani).scaled(1/n_images)
563
+ atom.aniso = fani_avg.transformed_by(st.cell.orth.mat)
564
+ diff = numpy.array(atom.aniso.elements_pdb()) - aniso_bak
565
+ logger.writeln(" correcting aniso= {}".format(tostr(atom.aniso.elements_pdb())))
566
+ logger.writeln(" aniso_viol= {}".format(tostr(diff)))
567
+
568
+ mats = [st.cell.orth.combine(st.cell.images[i-1]).combine(st.cell.frac).mat.array for i in images]
569
+ mat_total = (numpy.identity(3) + sum(numpy.array(m) for m in mats)) / n_images
570
+ mat_total_aniso = (numpy.identity(6) + sum(mat33_as66(m.tolist()) for m in mats)) / n_images
571
+ mat_total_aniso = numpy.linalg.pinv(mat_total_aniso)
572
+ ret.append((atom, images, mat_total, mat_total_aniso))
573
+
574
+ return ret
575
+ # find_special_positions()
576
+
577
+ def expand_ncs(st, special_pos_threshold=0.01, howtoname=gemmi.HowToNameCopiedChain.Short):
578
+ # TODO modify st.connections for atoms at special positions
579
+ if len(st.ncs) == 0: return
580
+ find_special_positions(st, special_pos_threshold) # just to show info, a bit waste of cpu time..
581
+ logger.writeln("Expanding symmetry..")
582
+ st.expand_ncs(howtoname, merge_dist=1e-4)
583
+ # expand_ncs()
584
+
585
+ def prepare_assembly(name, chains, ops, is_helical=False):
586
+ a = gemmi.Assembly(name)
587
+ g = gemmi.Assembly.Gen()
588
+ if sum(map(lambda x: x.tr.is_identity(), ops)) == 0:
589
+ g.operators.append(gemmi.Assembly.Operator()) # add identity
590
+ for i, nop in enumerate(ops):
591
+ op = gemmi.Assembly.Operator()
592
+ op.transform = nop.tr
593
+ if not nop.tr.is_identity():
594
+ if is_helical:
595
+ op.type = "helical symmetry operation"
596
+ else:
597
+ op.type = "point symmetry operation"
598
+ g.operators.append(op)
599
+ g.chains = chains
600
+ a.generators.append(g)
601
+ if is_helical:
602
+ a.special_kind = gemmi.AssemblySpecialKind.RepresentativeHelical
603
+ else:
604
+ a.special_kind = gemmi.AssemblySpecialKind.CompletePoint
605
+ return a
606
+ # prepare_assembly()
607
+
608
+ def filter_contacting_ncs(st, cutoff=5.):
609
+ if len(st.ncs) == 0: return
610
+ logger.writeln("Filtering out non-contacting NCS copies with cutoff={:.2f} A".format(cutoff))
611
+ st.setup_cell_images()
612
+ ns = gemmi.NeighborSearch(st[0], st.cell, cutoff*2).populate() # This is considered crystallographic cell if not 1 1 1. Undesirable result may be seen.
613
+ cs = gemmi.ContactSearch(cutoff)
614
+ cs.twice = True # since we need all image_idx
615
+ cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
616
+ results = cs.find_contacts(ns)
617
+ indices = {r.image_idx for r in results}
618
+ logger.writeln(" contacting copies: {}".format(indices))
619
+ ops = [st.ncs[i-1] for i in indices] # XXX is this correct? maybe yes as long as identity operator is not there
620
+ st.ncs.clear()
621
+ st.ncs.extend(ops)
622
+ # filter_contacting_ncs()
623
+
624
+ def check_symmetry_related_model_duplication(st, distance_cutoff=0.5, max_allowed_ratio=0.5):
625
+ logger.writeln("Checking if model in asu is given.")
626
+ n_atoms = st[0].count_atom_sites()
627
+ st.setup_cell_images()
628
+ ns = gemmi.NeighborSearch(st[0], st.cell, 3).populate()
629
+ cs = gemmi.ContactSearch(distance_cutoff)
630
+ cs.ignore = gemmi.ContactSearch.Ignore.SameAsu
631
+ results = cs.find_contacts(ns)
632
+ n_contacting_atoms = len(set([a for r in results for a in (r.partner1.atom, r.partner2.atom)]))
633
+ logger.writeln(" N_atoms= {} N_contacting_atoms= {}".format(n_atoms, n_contacting_atoms))
634
+ return n_contacting_atoms / n_atoms > max_allowed_ratio # return True if too many contacts
635
+ # check_symmetry_related_model_duplication()
636
+
637
+ def adp_analysis(st, ignore_zero_occ=True):
638
+ logger.writeln("= ADP analysis =")
639
+ if ignore_zero_occ:
640
+ logger.writeln("(zero-occupancy atoms are ignored)")
641
+
642
+ all_B = []
643
+ for i, mol in enumerate(st):
644
+ if len(st) > 1: logger.writeln("Model {}:".format(i))
645
+ logger.writeln(" min Q1 med Q3 max")
646
+ stats = adp_stats_per_chain(mol, ignore_zero_occ)
647
+ for chain, natoms, qs in stats:
648
+ logger.writeln(("Chain {:3s}".format(chain) if chain!="*" else "All ") + " {:5.1f} {:5.1f} {:5.1f} {:5.1f} {:5.1f}".format(*qs))
649
+ logger.writeln("")
650
+ # adp_analysis()
651
+
652
+ def adp_stats_per_chain(model, ignore_zero_occ=True):
653
+ bs = {}
654
+ for cra in model.all():
655
+ if not ignore_zero_occ or cra.atom.occ > 0:
656
+ bs.setdefault(cra.chain.name, []).append(cra.atom.b_iso)
657
+
658
+ ret = []
659
+ for chain in model:
660
+ if chain.name in [x[0] for x in ret]: continue
661
+ qs = numpy.quantile(bs[chain.name], [0,0.25,0.5,0.75,1])
662
+ ret.append((chain.name, len(bs[chain.name]), qs))
663
+
664
+ if len(bs) > 1:
665
+ all_bs = sum(bs.values(), [])
666
+ qs = numpy.quantile(all_bs, [0,0.25,0.5,0.75,1])
667
+ ret.append(("*", len(all_bs), qs))
668
+
669
+ return ret
670
+ # adp_stats_per_chain()
671
+
672
+ def reset_adp(model, bfactor=None, adp_mode="iso"):
673
+ for cra in model.all():
674
+ if bfactor is not None:
675
+ cra.atom.b_iso = bfactor
676
+ if adp_mode == "iso" or (adp_mode == "fix" and bfactor is not None):
677
+ cra.atom.aniso = gemmi.SMat33f(0,0,0,0,0,0)
678
+ elif adp_mode == "aniso":
679
+ if cra.atom.aniso.nonzero() and bfactor is None: # just in case
680
+ b_iso = cra.atom.aniso.trace() / 3 * u_to_b
681
+ if abs(cra.atom.b_iso - b_iso) > 1e-2:
682
+ logger.writeln(f"WARNING: {cra} B_iso={cra.atom.b_iso:.3f} and tr(B_aniso)/3={b_iso:.3f} are different. Resetting B_iso from B_aniso")
683
+ cra.atom.b_iso = b_iso
684
+ else:
685
+ u = cra.atom.b_iso * b_to_u
686
+ cra.atom.aniso = gemmi.SMat33f(u, u, u, 0, 0, 0)
687
+ # reset_adp()
688
+
689
+ def shift_b(model, delta_b, min_b=0.01):
690
+ delta_u = delta_b * b_to_u
691
+ min_u = min_b * b_to_u
692
+ for cra in model.all():
693
+ cra.atom.b_iso = max(cra.atom.b_iso + delta_b, min_b)
694
+ if cra.atom.aniso.nonzero():
695
+ M = cra.atom.aniso.as_mat33().array
696
+ v, Q = numpy.linalg.eigh(M)
697
+ v = numpy.maximum(v + delta_u, min_u)
698
+ M2 = Q.dot(numpy.diag(v)).dot(Q.T)
699
+ cra.atom.aniso = gemmi.SMat33f(M2[0,0], M2[1,1], M2[2,2], M2[0,1], M2[0,2], M2[1,2])
700
+ cra.atom.b_iso = cra.atom.aniso.trace() / 3 * u_to_b
701
+ # shift_b()
702
+
703
+ def initialize_values(model, params):
704
+ for k in params:
705
+ if k not in ("adp", "occ", "dfrac"):
706
+ continue
707
+ for selstr, value in params[k].items():
708
+ sel = gemmi.Selection(selstr)
709
+ for chain in sel.chains(model):
710
+ for residue in sel.residues(chain):
711
+ for atom in sel.atoms(residue):
712
+ setattr(atom, {"adp":"b_iso", "occ": "occ", "dfrac": "fraction"}[k], value)
713
+ if k == "adp" and atom.aniso.nonzero():
714
+ u = atom.b_iso * b_to_u
715
+ atom.aniso = gemmi.SMat33f(u, u, u, 0, 0, 0)
716
+ # initialize_values()
717
+
718
+ def all_chain_ids(st):
719
+ return [chain.name for model in st for chain in model]
720
+ # all_chain_ids()
721
+
722
+ def all_B(st, ignore_zero_occ=True):
723
+ ret = []
724
+ for mol in st:
725
+ for cra in mol.all():
726
+ if not ignore_zero_occ or cra.atom.occ > 0:
727
+ ret.append(cra.atom.b_iso)
728
+
729
+ return ret
730
+ # all_B()
731
+
732
+ def mat33_as66(m):
733
+ # suppose R is a transformation matrix that is applied to 3x3 symmetric matrix U: R U R^T
734
+ # this function constructs equivalent transformation for 6-element vector: R' u
735
+ r = numpy.zeros((6,6))
736
+ for k, (i, j) in enumerate(((0,0), (1,1), (2,2), (0,1), (0,2), (1,2))):
737
+ r[k,:] = (m[i][0] * m[j][0],
738
+ m[i][1] * m[j][1],
739
+ m[i][2] * m[j][2],
740
+ m[i][0] * m[j][1] + m[i][1] * m[j][0],
741
+ m[i][0] * m[j][2] + m[i][2] * m[j][0],
742
+ m[i][1] * m[j][2] + m[i][2] * m[j][1])
743
+ return r
744
+ def adp_constraints(ops, cell, tr0=True):
745
+ # think about f = (b-Rb)^T (b-Rb) = b^T b - b^T R b -b^T R^T b + b^T R^T R b
746
+ # d^2f/db db^T = 2I - 2(R+R^T) + 2(R^T R)
747
+ # eigenvectors of this second derivative matrix corresponding to 0-valeud eigenvalues are directions to refine
748
+ x = numpy.zeros((6,6))
749
+ if tr0:
750
+ x[:3,:3] += numpy.ones((3,3)) * 2
751
+ for op in ops:
752
+ r = mat33_as66(cell.op_as_transform(op).mat.tolist())
753
+ x += 2 * numpy.identity(6) - 2 * (r + r.T) + 2 * numpy.dot(r.T, r)
754
+
755
+ evals, evecs = numpy.linalg.eigh(x)
756
+ ret = []
757
+ for i in range(6):
758
+ if numpy.isclose(evals[i], 0):
759
+ ret.append(evecs[:, i])
760
+
761
+ if len(ret) > 0:
762
+ ret = numpy.vstack(ret)
763
+ ret = numpy.where(numpy.abs(ret) < 1e-9, 0, ret)
764
+ return ret
765
+ return numpy.empty((0, 6))
766
+ # adp_constraints()
767
+
768
+ def to_dataframe(st):
769
+ keys = ("model", "chain", "resn", "subchain", "segment", "seqnum", "icode", "altloc",
770
+ "u11", "u22", "u33", "u12", "u13", "u23",
771
+ "b_iso", "charge", "elem", "atom", "occ",
772
+ "x", "y", "z", "tlsgroup")
773
+ d = dict([(x,[]) for x in keys])
774
+ app = lambda k, v: d[k].append(v)
775
+
776
+ for m in st:
777
+ for cra in m.all():
778
+ c,r,a = cra.chain, cra.residue, cra.atom
779
+ # TODO need support r.het_flag, r.flag, a.calc_flag, a.flag, a.serial?
780
+ app("model", m.num)
781
+ app("chain", c.name)
782
+ app("resn", r.name)
783
+ app("subchain", r.subchain)
784
+ app("segment", r.segment)
785
+ app("seqnum", r.seqid.num)
786
+ app("icode", r.seqid.icode)
787
+ app("altloc", a.altloc)
788
+ app("u11", a.aniso.u11)
789
+ app("u22", a.aniso.u22)
790
+ app("u33", a.aniso.u33)
791
+ app("u12", a.aniso.u12)
792
+ app("u13", a.aniso.u13)
793
+ app("u23", a.aniso.u23)
794
+ app("b_iso", a.b_iso)
795
+ app("charge", a.charge)
796
+ app("elem", a.element.name)
797
+ app("atom", a.name)
798
+ app("occ", a.occ)
799
+ app("x", a.pos.x)
800
+ app("y", a.pos.y)
801
+ app("z", a.pos.z)
802
+ app("tlsgroup", a.tls_group_id)
803
+
804
+ return pandas.DataFrame(data=d)
805
+ # to_dataframe()
806
+
807
+ def from_dataframe(df, st=None): # Slow!
808
+ if st is None:
809
+ st = gemmi.Structure()
810
+ else:
811
+ st = st.clone()
812
+ for i in range(len(st)):
813
+ del st[0]
814
+
815
+ for m_num, dm in df.groupby("model"):
816
+ st.add_model(gemmi.Model(m_num))
817
+ m = st[-1]
818
+ for c_name, dc in dm.groupby("chain"):
819
+ m.add_chain(gemmi.Chain(c_name))
820
+ c = m[-1]
821
+ for rkey, dr in dc.groupby(["seqnum","icode","resn","segment","subchain"]):
822
+ c.add_residue(gemmi.Residue())
823
+ r = c[-1]
824
+ r.seqid.num = rkey[0]
825
+ r.seqid.icode = rkey[1]
826
+ r.name = rkey[2]
827
+ r.segment = rkey[3]
828
+ r.subchain = rkey[4]
829
+ for _, row in dr.iterrows():
830
+ r.add_atom(gemmi.Atom())
831
+ a = r[-1]
832
+ a.altloc = row["altloc"]
833
+ a.name = row["atom"]
834
+ a.aniso.u11 = row["u11"]
835
+ a.aniso.u22 = row["u22"]
836
+ a.aniso.u33 = row["u33"]
837
+ a.aniso.u12 = row["u12"]
838
+ a.aniso.u13 = row["u13"]
839
+ a.aniso.u23 = row["u23"]
840
+ a.b_iso = row["b_iso"]
841
+ a.charge = row["charge"]
842
+ a.element = gemmi.Element(row["elem"])
843
+ a.occ = row["occ"]
844
+ a.pos.x = row["x"]
845
+ a.pos.y = row["y"]
846
+ a.pos.z = row["z"]
847
+ a.tls_group_id = row["tlsgroup"]
848
+
849
+ return st
850
+ # from_dataframe()
851
+
852
+ def st_from_positions(positions, bs=None, qs=None):
853
+ st = gemmi.Structure()
854
+ st.add_model(gemmi.Model(1))
855
+ st[0].add_chain(gemmi.Chain("A"))
856
+ c = st[0][0]
857
+ if bs is None: bs = (0. for _ in range(len(positions)))
858
+ if qs is None: qs = (1. for _ in range(len(positions)))
859
+ for i, (pos, b, q) in enumerate(zip(positions, bs, qs)):
860
+ c.add_residue(gemmi.Residue())
861
+ r = c[-1]
862
+ r.seqid.num = i
863
+ r.name = "HOH"
864
+ r.add_atom(gemmi.Atom())
865
+ a = r[-1]
866
+ a.name = "O"
867
+ a.element = gemmi.Element("O")
868
+ a.pos = pos
869
+ a.b_iso = b
870
+ a.occ = q
871
+
872
+ return st
873
+ # st_from_positions()
874
+
875
+ def invert_model(st):
876
+ # invert x-axis
877
+ A = st.cell.orth.mat.array
878
+ center = numpy.sum(A,axis=1) / 2
879
+ center = gemmi.Vec3(*center)
880
+ mat = gemmi.Mat33([[-1,0,0],[0,1,0],[0,0,1]])
881
+ vec = mat.multiply(-center) + center
882
+ tr = gemmi.Transform(mat, vec)
883
+ st[0].transform_pos_and_adp(tr)
884
+
885
+ # invert peptides
886
+ # invert_model()
887
+
888
+ def cif2cart_matrix(cell):
889
+ # transformation matrix from U_cif to U_cart
890
+ ruc = cell.reciprocal()
891
+ ret = cell.orth.mat.multiply_by_diagonal(gemmi.Vec3(ruc.a, ruc.b, ruc.c))
892
+ return ret
893
+ # cif2cart_matrix()
894
+
895
+ def cx_to_mx(ss): #SmallStructure to Structure
896
+ st = gemmi.Structure()
897
+ st.spacegroup_hm = ss.spacegroup.xhm()
898
+ st.cell = ss.cell
899
+ st.add_model(gemmi.Model(1))
900
+ st[-1].add_chain(gemmi.Chain("A"))
901
+ st[-1][-1].add_residue(gemmi.Residue())
902
+ st[-1][-1][-1].seqid.num = 1
903
+ st[-1][-1][-1].name = "00"
904
+ cif2cart = cif2cart_matrix(ss.cell)
905
+
906
+ for site in ss.sites:
907
+ st[-1][-1][-1].add_atom(gemmi.Atom())
908
+ a = st[-1][-1][-1][-1]
909
+ a.name = site.label
910
+ a.aniso = gemmi.SMat33f(*site.aniso.transformed_by(cif2cart).elements_pdb())
911
+ a.b_iso = site.u_iso * u_to_b
912
+ #a.charge = ?
913
+ a.element = site.element
914
+ a.occ = site.occ
915
+ a.pos = site.orth(ss.cell)
916
+
917
+ return st
918
+ # cx_to_mx()
919
+
920
+ def fix_deuterium_residues(st):
921
+ # we do not have DOD. will not change ND4->NH4 and SPW->SPK, as hydrogen atom names are different
922
+ n_changed = 0
923
+ for chain in st[0]:
924
+ for res in chain:
925
+ if res.name == "DOD":
926
+ res.name = "HOH"
927
+ n_changed += 1
928
+ for con in st.connections:
929
+ for p in (con.partner1, con.partner2):
930
+ if p.res_id.name == "DOD":
931
+ p.res_id.name = "HOH"
932
+ if n_changed > 0:
933
+ logger.writeln("Warning: {} DOD residues have been renamed to HOH".format(n_changed))