servalcat 0.4.60__cp312-cp312-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (44) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cp312-win_amd64.pyd +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +733 -0
  7. servalcat/refine/refine_geom.py +207 -0
  8. servalcat/refine/refine_spa.py +327 -0
  9. servalcat/refine/refine_xtal.py +242 -0
  10. servalcat/refine/spa.py +132 -0
  11. servalcat/refine/xtal.py +227 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +536 -0
  15. servalcat/refmac/refmac_wrapper.py +360 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +462 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +961 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1277 -0
  27. servalcat/utils/fileio.py +745 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +699 -0
  30. servalcat/utils/logger.py +116 -0
  31. servalcat/utils/maps.py +340 -0
  32. servalcat/utils/model.py +774 -0
  33. servalcat/utils/refmac.py +747 -0
  34. servalcat/utils/restraints.py +605 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +250 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1403 -0
  40. servalcat-0.4.60.dist-info/METADATA +56 -0
  41. servalcat-0.4.60.dist-info/RECORD +44 -0
  42. servalcat-0.4.60.dist-info/WHEEL +5 -0
  43. servalcat-0.4.60.dist-info/entry_points.txt +4 -0
  44. servalcat-0.4.60.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,733 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ import os
10
+ import re
11
+ import gemmi
12
+ import numpy
13
+ import pandas
14
+ import scipy.sparse
15
+ import servalcat # for version
16
+ from servalcat.utils import logger
17
+ from servalcat import utils
18
+ from servalcat.refmac import exte
19
+ from servalcat.refmac.refmac_keywords import parse_keywords
20
+ from servalcat import ext
21
+ from . import cgsolve
22
+ u_to_b = utils.model.u_to_b
23
+ b_to_u = utils.model.b_to_u
24
+
25
+ #import line_profiler
26
+ #import atexit
27
+ #profile = line_profiler.LineProfiler()
28
+ #atexit.register(profile.print_stats)
29
+
30
+ class Geom:
31
+ def __init__(self, st, topo, monlib, adpr_w=1, shake_rms=0,
32
+ refmac_keywords=None, unrestrained=False, use_nucleus=False,
33
+ ncslist=None, atom_pos=None):
34
+ self.st = st
35
+ self.atoms = [None for _ in range(self.st[0].count_atom_sites())]
36
+ for cra in self.st[0].all(): self.atoms[cra.atom.serial-1] = cra.atom
37
+ if atom_pos is not None:
38
+ self.atom_pos = atom_pos
39
+ else:
40
+ self.atom_pos = list(range(len(self.atoms)))
41
+ self.n_refine_atoms = max(self.atom_pos) + 1
42
+ self.lookup = {x.atom: x for x in self.st[0].all()}
43
+ self.geom = ext.Geometry(self.st, self.atom_pos, monlib.ener_lib)
44
+ self.specs = utils.model.find_special_positions(self.st)
45
+ #cs_count = len(self.st.find_spacegroup().operations())
46
+ for atom, images, matp, mata in self.specs:
47
+ #n_sym = len([x for x in images if x < cs_count]) + 1
48
+ n_sym = len(images) + 1
49
+ self.geom.specials.append(ext.Geometry.Special(atom, matp, mata, n_sym))
50
+ self.adpr_w = adpr_w
51
+ self.unrestrained = unrestrained
52
+ if shake_rms > 0:
53
+ numpy.random.seed(0)
54
+ utils.model.shake_structure(self.st, shake_rms, copy=False)
55
+ utils.fileio.write_model(self.st, "shaken", pdb=True, cif=True)
56
+ if not self.unrestrained:
57
+ self.geom.load_topo(topo)
58
+ self.check_chemtypes(os.path.join(monlib.path(), "ener_lib.cif"), topo)
59
+ self.use_nucleus = use_nucleus
60
+ self.calc_kwds = {"use_nucleus": self.use_nucleus}
61
+ if refmac_keywords:
62
+ exte.read_external_restraints(refmac_keywords, self.st, self.geom)
63
+ kwds = parse_keywords(refmac_keywords)
64
+ for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw", "wncs"):
65
+ if k in kwds:
66
+ self.calc_kwds[k] = kwds[k]
67
+ logger.writeln("setting geometry weight {}= {}".format(k, kwds[k]))
68
+ self.group_occ = GroupOccupancy(self.st, kwds.get("occu"))
69
+ else:
70
+ self.group_occ = GroupOccupancy(self.st, None)
71
+ self.geom.finalize_restraints()
72
+ self.outlier_sigmas = dict(bond=5, angle=5, torsion=5, vdw=5, ncs=5, chir=5, plane=5, staca=5, stacd=5, per_atom=5)
73
+ self.parents = {}
74
+ self.ncslist = ncslist
75
+ # __init__()
76
+
77
+ def check_chemtypes(self, enerlib_path, topo):
78
+ block = gemmi.cif.read(enerlib_path).sole_block()
79
+ all_types = set(block.find_values("_lib_atom.type"))
80
+ for ci in topo.chain_infos:
81
+ for ri in ci.res_infos:
82
+ cc_all = {x: ri.get_final_chemcomp(x) for x in set(a.altloc for a in ri.res)}
83
+ for a in ri.res:
84
+ cca = cc_all[a.altloc].find_atom(a.name)
85
+ if cca is None: # I believe it won't happen..
86
+ logger.writeln("WARNING: restraint for {} not found.".format(self.lookup[a]))
87
+ elif cca.chem_type not in all_types:
88
+ raise RuntimeError("Energy type {} of {} not found in ener_lib.".format(cca.chem_type,
89
+ self.lookup[a]))
90
+ def set_h_parents(self):
91
+ self.parents = {}
92
+ for bond in self.geom.bonds:
93
+ if bond.atoms[0].is_hydrogen():
94
+ self.parents[bond.atoms[0]] = bond.atoms[1]
95
+ elif bond.atoms[1].is_hydrogen():
96
+ self.parents[bond.atoms[1]] = bond.atoms[0]
97
+ # set_h_parents()
98
+ def setup_nonbonded(self, refine_xyz):
99
+ skip_critical_dist = not refine_xyz or self.unrestrained
100
+ self.geom.setup_nonbonded(skip_critical_dist=skip_critical_dist, group_idxes=self.group_occ.group_idxes)
101
+ if self.ncslist:
102
+ self.geom.setup_ncsr(self.ncslist)
103
+ def calc(self, target_only):
104
+ return self.geom.calc(check_only=target_only, **self.calc_kwds)
105
+ def calc_adp_restraint(self, target_only):
106
+ return self.geom.calc_adp_restraint(target_only, self.adpr_w)
107
+ def calc_target(self, target_only, refine_xyz, adp_mode):
108
+ self.geom.clear_target()
109
+ geom_x = self.calc(target_only) if refine_xyz else 0
110
+ geom_a = self.calc_adp_restraint(target_only) if adp_mode > 0 else 0
111
+ logger.writeln(" geom_x = {}".format(geom_x))
112
+ logger.writeln(" geom_a = {}".format(geom_a))
113
+ geom = geom_x + geom_a
114
+ if not target_only:
115
+ self.geom.spec_correction()
116
+ return geom
117
+
118
+ def show_model_stats(self, show_outliers=True):
119
+ f0_x = self.calc(True)
120
+ f0_a = self.calc_adp_restraint(True)
121
+ ret = {"outliers": {}}
122
+ if show_outliers:
123
+ get_table = dict(bond=self.geom.reporting.get_bond_outliers,
124
+ angle=self.geom.reporting.get_angle_outliers,
125
+ torsion=self.geom.reporting.get_torsion_outliers,
126
+ chir=self.geom.reporting.get_chiral_outliers,
127
+ plane=self.geom.reporting.get_plane_outliers,
128
+ staca=self.geom.reporting.get_stacking_angle_outliers,
129
+ stacd=self.geom.reporting.get_stacking_dist_outliers,
130
+ vdw=self.geom.reporting.get_vdw_outliers,
131
+ #ncs=self.geom.reporting.get_ncsr_outliers, # not useful?
132
+ )
133
+ labs = dict(bond="Bond distances",
134
+ angle="Bond angles",
135
+ torsion="Torsion angles",
136
+ chir="Chiral centres",
137
+ plane="Planar groups",
138
+ staca="Stacking plane angles",
139
+ stacd="Stacking plane distances",
140
+ vdw="VDW repulsions",
141
+ ncs="Local NCS restraints")
142
+
143
+ for k in get_table:
144
+ kwgs = {"min_z": self.outlier_sigmas[k]}
145
+ if k == "bond": kwgs["use_nucleus"] = self.use_nucleus
146
+ table = get_table[k](**kwgs)
147
+ if table["z"]:
148
+ for kk in table:
149
+ if kk.startswith(("atom", "plane", "1_atom", "2_atom")):
150
+ table[kk] = [str(self.lookup[x]) for x in table[kk]]
151
+ df = pandas.DataFrame(table)
152
+ df = df.reindex(df.z.abs().sort_values(ascending=False).index)
153
+ ret["outliers"][k] = df
154
+ if k == "bond":
155
+ df0 = df[df.type < 2].drop(columns=["type", "alpha"])
156
+ if len(df0.index) > 0:
157
+ logger.writeln(" *** {} outliers (Z >= {}) ***\n".format(labs[k], self.outlier_sigmas[k]))
158
+ logger.writeln(df0.to_string(float_format="{:.3f}".format, index=False) + "\n")
159
+ df0 = df[df.type == 2].drop(columns=["type"])
160
+ if len(df0.index) > 0:
161
+ logger.writeln(" *** External bond outliers (Z >= {}) ***\n".format(self.outlier_sigmas[k]))
162
+ logger.writeln(df0.to_string(float_format="{:.3f}".format, index=False) + "\n")
163
+ else:
164
+ logger.writeln(" *** {} outliers (Z >= {}) ***\n".format(labs[k], self.outlier_sigmas[k]))
165
+ logger.writeln(df.to_string(float_format="{:.3f}".format, index=False) + "\n")
166
+
167
+ # Per-atom score
168
+ peratom = self.geom.reporting.per_atom_score(len(self.atoms), self.use_nucleus, "mean")
169
+ df = pandas.DataFrame(peratom)
170
+ df.insert(0, "atom", [str(self.lookup[x]) for x in self.atoms])
171
+ df = df[df["total"] >= self.outlier_sigmas["per_atom"]]
172
+ if show_outliers and len(df.index) > 0:
173
+ df.sort_values("total", ascending=False, inplace=True)
174
+ ret["outliers"]["per_atom"] = df
175
+ logger.writeln(" *** Per-atom violations (Z >= {}) ***\n".format(self.outlier_sigmas["per_atom"]))
176
+ logger.writeln(df.to_string(float_format="{:.2f}".format, index=False) + "\n")
177
+
178
+ df = pandas.DataFrame(self.geom.reporting.get_summary_table(self.use_nucleus))
179
+ df = df.set_index("Restraint type").rename_axis(index=None)
180
+ ret["summary"] = df
181
+ logger.writeln(df.to_string(float_format="{:.3f}".format) + "\n")
182
+ return ret
183
+
184
+ def show_binstats(df, cycle_number):
185
+ forplot = []
186
+ rlabs = [x for x in df if x.startswith("R")]
187
+ cclabs = [x for x in df if x.startswith("CC")]
188
+ dlabs = [x for x in df if re.search("^D[0-9]*", x)]
189
+ if "fsc_model" in df: forplot.append(["FSC", ["fsc_model"]])
190
+ if rlabs: forplot.append(["R", rlabs])
191
+ if cclabs: forplot.append(["CC", cclabs])
192
+ if dlabs: forplot.append(["ML parameters - D", dlabs])
193
+ if "S" in df: forplot.append(["ML parameters - Sigma", ["S"]])
194
+ lstr = utils.make_loggraph_str(df, "Data stats in cycle {}".format(cycle_number), forplot,
195
+ s2=1/df["d_min"]**2,
196
+ float_format="{:.4f}".format)
197
+ logger.writeln(lstr)
198
+ # show_binstats()
199
+
200
+ class GroupOccupancy:
201
+ # TODO max may not be one. should check multiplicity
202
+ def __init__(self, st, params):
203
+ self.groups = []
204
+ self.consts = []
205
+ self.group_idxes = [0 for _ in range(st[0].count_atom_sites())]
206
+ self.ncycle = 0
207
+ if not params or not params.get("groups"):
208
+ return
209
+ logger.writeln("Occupancy groups:")
210
+ self.atom_pos = [-1 for _ in range(st[0].count_atom_sites())]
211
+ count = 0
212
+ for igr in params["groups"]:
213
+ self.groups.append([[], []]) # list of [indexes, atoms]
214
+ n_curr = count
215
+ for sel in params["groups"][igr]:
216
+ sel_chains = sel.get("chains")
217
+ sel_from = sel.get("resi_from")
218
+ sel_to = sel.get("resi_to")
219
+ sel_seq = sel.get("resi")
220
+ sel_atom = sel.get("atom")
221
+ sel_alt = sel.get("alt")
222
+ for chain in st[0]:
223
+ if sel_chains and chain.name not in sel_chains:
224
+ continue
225
+ flag = False
226
+ for res in chain:
227
+ if sel_seq and res.seqid != sel_seq:
228
+ continue
229
+ if sel_from and res.seqid == sel_from:
230
+ flag = True
231
+ if sel_from and not flag:
232
+ continue
233
+ for atom in res:
234
+ if sel_atom and atom.name != sel_atom:
235
+ continue
236
+ if sel_alt and atom.altloc != sel_alt:
237
+ continue
238
+ self.atom_pos[atom.serial-1] = count
239
+ self.groups[-1][0].append(count)
240
+ self.groups[-1][1].append(atom)
241
+ self.group_idxes[atom.serial-1] = len(self.groups)
242
+ count += 1
243
+ if sel_to and res.seqid == sel_to:
244
+ flag = False
245
+ logger.writeln(" id= {} atoms= {}".format(igr, count - n_curr))
246
+
247
+ igr_idxes = {igr:i for i, igr in enumerate(params["groups"])}
248
+ self.consts = [(is_comp, [igr_idxes[g] for g in gids])
249
+ for is_comp, gids in params["const"]]
250
+ self.ncycle = params.get("ncycle", 5)
251
+ # __init__()
252
+
253
+ def constraint(self, x):
254
+ # x: occupancy parameters
255
+ ret = []
256
+ for is_comp, ids in self.consts:
257
+ x_sum = numpy.sum(x[ids])
258
+ if is_comp or x_sum > 1:
259
+ ret.append(x_sum - 1)
260
+ else:
261
+ ret.append(0.)
262
+ return numpy.array(ret)
263
+
264
+ def ensure_constraints(self):
265
+ vals = []
266
+ for _, atoms in self.groups:
267
+ occ = numpy.mean([a.occ for a in atoms])
268
+ vals.append(occ)
269
+ for is_comp, idxes in self.consts:
270
+ sum_occ = sum(vals[i] for i in idxes)
271
+ if not is_comp and sum_occ < 1:
272
+ sum_occ = 1. # do nothing
273
+ for i in idxes:
274
+ #logger.writeln("Imposing constraints: {} {}".format(vals[i], vals[i]/sum_occ))
275
+ vals[i] /= sum_occ
276
+ for occ, (_, atoms) in zip(vals, self.groups):
277
+ for a in atoms: a.occ = occ
278
+
279
+ def get_x(self):
280
+ return numpy.array([atoms[0].occ for _, atoms in self.groups])
281
+
282
+ def set_x(self, x):
283
+ for p, (_, atoms) in zip(x, self.groups):
284
+ for a in atoms:
285
+ a.occ = p
286
+
287
+ def target(self, x, ll, ls, u):
288
+ self.set_x(x)
289
+ ll.update_fc()
290
+ c = self.constraint(x)
291
+ f = ll.calc_target() - numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
292
+ return f
293
+
294
+ def grad(self, x, ll, ls, u, refine_h):
295
+ c = self.constraint(x)
296
+ ll.calc_grad(self.atom_pos, refine_xyz=False, adp_mode=0, refine_occ=True, refine_h=refine_h, specs=None)
297
+ #print("grad=", ll.ll.vn)
298
+ #print("diag=", ll.ll.am)
299
+ assert len(ll.ll.vn) == len(ll.ll.am)
300
+ vn = []
301
+ diag = []
302
+ for idxes, atoms in self.groups:
303
+ if not refine_h:
304
+ idxes = [i for i, a in zip(idxes, atoms) if not a.is_hydrogen()]
305
+ vn.append(numpy.sum(numpy.array(ll.ll.vn)[idxes]))
306
+ diag.append(numpy.sum(numpy.array(ll.ll.am)[idxes]))
307
+ vn, diag = numpy.array(vn), numpy.array(diag)
308
+ for i, (is_comp, idxes) in enumerate(self.consts):
309
+ dcdx = numpy.zeros(len(self.groups))
310
+ dcdx[idxes] = 1.
311
+ if is_comp or c[i] != 0:
312
+ vn -= (ls[i] - u * c[i]) * dcdx
313
+ diag += u * dcdx**2
314
+
315
+ return vn, diag
316
+
317
+ def refine(self, ll, refine_h, alpha=1.1):
318
+ # Refinement of grouped occupancies using augmented Lagrangian
319
+ # f(x) = LL(x) - sum_j (lambda_j c_j(x)) + u/2 sum_j (c_j(x))^2
320
+ # with c_j(x) = 0 constraints
321
+ if not self.groups:
322
+ return
323
+ logger.writeln("\n== Group occupancy refinement ==")
324
+ self.ensure_constraints() # make sure constrained groups have the same occupancies.
325
+ ls = 0 * numpy.ones(len(self.consts)) # Lagrange multiplier
326
+ u = 10000. # penalty parameter. in Refmac 1/0.01**2
327
+ x0 = self.get_x()
328
+ #logger.writeln(" parameters: {}".format(len(x0)))
329
+ f0 = self.target(x0, ll, ls, u)
330
+ ret = []
331
+ for cyc in range(self.ncycle):
332
+ ret.append({"Ncyc": cyc+1, "f0": f0})
333
+ logger.writeln("occ_{}_f0= {:.4e}".format(cyc, f0))
334
+ vn, diag = self.grad(x0, ll, ls, u, refine_h)
335
+ diag[diag < 1e-6] = 1.
336
+ dx = -vn / diag
337
+ if 0:
338
+ ofs = open("debug.dat", "w")
339
+ for scale in (-1, -0.5, 0, 0.1, 0.2, 0.3, 0.4, 0.5, 1, 2):
340
+ self.set_x(x0 + scale * dx)
341
+ ll.update_fc()
342
+ c = self.constraint(x0 + dx)
343
+ f = ll.calc_target() + numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
344
+ ofs.write("{} {}\n".format(scale, f))
345
+ ofs.close()
346
+ import scipy.optimize
347
+ print(scipy.optimize.line_search(f=lambda x: self.target(x, ll, ls, u),
348
+ myfprime= lambda x: self.grad(ll, ls, u, refine_h)[0],
349
+ xk= x0,
350
+ pk= dx))
351
+ quit()
352
+
353
+ scale = 1
354
+ for i in range(3):
355
+ scale = 1/2**i
356
+ f1 = self.target(x0 + dx * scale, ll, ls, u)
357
+ logger.writeln("occ_{}_f1, {}= {:.4e}".format(cyc, i, f1))
358
+ if f1 < f0: break
359
+ else:
360
+ logger.writeln("WARNING: function not minimised")
361
+ #self.set_x(x0) # Refmac accepts it even when function increases
362
+ c = self.constraint(x0 + dx * scale)
363
+ ret[-1]["f1"] = f1
364
+ ret[-1]["shift_scale"] = scale
365
+ f0 = f1
366
+ x0 = x0 + dx * scale
367
+ ls -= u * c
368
+ u = alpha * u
369
+ ret[-1]["const_viol"] = list(c)
370
+ ret[-1]["lambda_new"] = list(ls)
371
+ self.ensure_constraints()
372
+ ll.update_fc()
373
+ f = ll.calc_target()
374
+ logger.writeln("final -LL= {}".format(f))
375
+ return ret
376
+
377
+
378
+ class Refine:
379
+ def __init__(self, st, geom, ll=None, refine_xyz=True, adp_mode=1, refine_h=False, refine_occ=False,
380
+ unrestrained=False, refmac_keywords=None):
381
+ assert adp_mode in (0, 1, 2) # 0=fix, 1=iso, 2=aniso
382
+ assert geom is not None
383
+ self.st = st # clone()?
384
+ self.atoms = geom.atoms # not a copy
385
+ self.geom = geom
386
+ self.ll = ll
387
+ self.gamma = 0
388
+ self.adp_mode = 0 if self.ll is None else adp_mode
389
+ self.refine_xyz = refine_xyz
390
+ self.refine_occ = refine_occ
391
+ self.unrestrained = unrestrained
392
+ self.refine_h = refine_h
393
+ self.h_inherit_parent_adp = self.adp_mode > 0 and not self.refine_h and self.st[0].has_hydrogen()
394
+ if self.h_inherit_parent_adp:
395
+ self.geom.set_h_parents()
396
+ assert self.geom.group_occ.groups or self.n_params() > 0
397
+ # __init__()
398
+
399
+ def print_weights(self): # TODO unfinished
400
+ logger.writeln("Geometry weights")
401
+ g = self.geom.geom
402
+ if self.adp_mode > 0:
403
+ logger.writeln(" ADP restraints")
404
+ logger.writeln(" weight: {}".format(self.geom.adpr_w))
405
+ logger.writeln(" mode: {}".format(g.adpr_mode))
406
+ if g.adpr_mode == "diff":
407
+ logger.writeln(" sigmas: {}".format(" ".join("{:.2f}".format(x) for x in g.adpr_diff_sigs)))
408
+ elif g.adpr_mode == "kldiv":
409
+ logger.writeln(" sigmas: {}".format(" ".join("{:.2f}".format(x) for x in g.adpr_kl_sigs)))
410
+ else:
411
+ raise LookupError("unknown adpr_mode")
412
+
413
+ def scale_shifts(self, dx, scale):
414
+ n_atoms = self.geom.n_refine_atoms
415
+ #ave_shift = numpy.mean(dx)
416
+ #max_shift = numpy.maximum(dx)
417
+ #rms_shift = numpy.std(dx)
418
+ shift_allow_high = 1.0
419
+ shift_allow_low = -1.0
420
+ shift_max_allow_B = 30.0
421
+ shift_min_allow_B = -30.0
422
+ shift_max_allow_q = 0.5
423
+ shift_min_allow_q = -0.5
424
+ dx = scale * dx
425
+ offset_b = n_atoms * 3 if self.refine_xyz else 0
426
+ offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
427
+ if self.refine_xyz:
428
+ dxx = dx[:offset_b]
429
+ logger.writeln("min(dx) = {}".format(numpy.min(dxx)))
430
+ logger.writeln("max(dx) = {}".format(numpy.max(dxx)))
431
+ logger.writeln("mean(dx)= {}".format(numpy.mean(dxx)))
432
+ dxx[dxx > shift_allow_high] = shift_allow_high
433
+ dxx[dxx < shift_allow_low] = shift_allow_low
434
+ if self.adp_mode == 1:
435
+ dxb = dx[offset_b:offset_q]
436
+ logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
437
+ logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
438
+ logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
439
+ dxb[dxb > shift_max_allow_B] = shift_max_allow_B
440
+ dxb[dxb < shift_min_allow_B] = shift_min_allow_B
441
+ elif self.adp_mode == 2:
442
+ dxb = dx[offset_b:offset_q]
443
+ # TODO this is misleading
444
+ logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
445
+ logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
446
+ logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
447
+ for i in range(len(dxb)//6):
448
+ j = i * 6
449
+ a = numpy.array([[dxb[j], dxb[j+3], dxb[j+4]],
450
+ [dxb[j+3], dxb[j+1], dxb[j+5]],
451
+ [dxb[j+4], dxb[j+5], dxb[j+2]]])
452
+ v, Q = numpy.linalg.eigh(a)
453
+ v[v > shift_max_allow_B] = shift_max_allow_B
454
+ v[v < shift_min_allow_B] = shift_min_allow_B
455
+ a = Q.dot(numpy.diag(v)).dot(Q.T)
456
+ dxb[j:j+6] = a[0,0], a[1,1], a[2,2], a[0,1], a[0,2], a[1,2]
457
+ if self.refine_occ:
458
+ dxq = dx[offset_q:]
459
+ logger.writeln("min(dq) = {}".format(numpy.min(dxq)))
460
+ logger.writeln("max(dq) = {}".format(numpy.max(dxq)))
461
+ logger.writeln("mean(dq)= {}".format(numpy.mean(dxq)))
462
+ dxq[dxq > shift_max_allow_q] = shift_max_allow_q
463
+ dxq[dxq < shift_min_allow_q] = shift_min_allow_q
464
+
465
+ return dx
466
+
467
+ def n_params(self):
468
+ n_atoms = self.geom.n_refine_atoms
469
+ n_params = 0
470
+ if self.refine_xyz: n_params += 3 * n_atoms
471
+ if self.adp_mode == 1:
472
+ n_params += n_atoms
473
+ elif self.adp_mode == 2:
474
+ n_params += 6 * n_atoms
475
+ if self.refine_occ:
476
+ n_params += n_atoms
477
+ return n_params
478
+
479
+ def set_x(self, x):
480
+ n_atoms = self.geom.n_refine_atoms
481
+ offset_b = n_atoms * 3 if self.refine_xyz else 0
482
+ offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
483
+ max_occ = {}
484
+ if self.refine_occ and self.geom.specs:
485
+ max_occ = {atom: 1./(len(images)+1) for atom, images, _, _ in self.geom.specs}
486
+ for i, j in enumerate(self.geom.atom_pos):
487
+ if j < 0: continue
488
+ if self.refine_xyz:
489
+ self.atoms[i].pos.fromlist(x[3*j:3*j+3]) # faster than substituting pos.x,pos.y,pos.z
490
+ if self.adp_mode == 1:
491
+ self.atoms[i].b_iso = max(0.5, x[offset_b + j]) # minimum B = 0.5
492
+ elif self.adp_mode == 2:
493
+ a = x[offset_b + 6 * j: offset_b + 6 * (j+1)]
494
+ a = gemmi.SMat33d(*a)
495
+ M = numpy.array(a.as_mat33())
496
+ v, Q = numpy.linalg.eigh(M) # eig() may return complex due to numerical precision?
497
+ v = numpy.maximum(v, 0.5) # avoid NPD with minimum B = 0.5
498
+ M2 = Q.dot(numpy.diag(v)).dot(Q.T)
499
+ self.atoms[i].b_iso = M2.trace() / 3
500
+ M2 *= b_to_u
501
+ self.atoms[i].aniso = gemmi.SMat33f(M2[0,0], M2[1,1], M2[2,2], M2[0,1], M2[0,2], M2[1,2])
502
+ if self.refine_occ:
503
+ self.atoms[i].occ = min(max_occ.get(self.atoms[i], 1), max(1e-3, x[offset_q + j]))
504
+
505
+ # Copy B of hydrogen from parent
506
+ if self.h_inherit_parent_adp:
507
+ for h in self.geom.parents:
508
+ p = self.geom.parents[h]
509
+ h.b_iso = p.b_iso
510
+ h.aniso = p.aniso
511
+
512
+ if self.ll is not None:
513
+ self.ll.update_fc()
514
+
515
+ self.geom.setup_nonbonded(self.refine_xyz) # if refine_xyz=False, no need to do it every time
516
+ self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
517
+ logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
518
+
519
+ def get_x(self):
520
+ n_atoms = self.geom.n_refine_atoms
521
+ offset_b = n_atoms * 3 if self.refine_xyz else 0
522
+ offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
523
+ x = numpy.zeros(self.n_params())
524
+ for i, j in enumerate(self.geom.atom_pos):
525
+ if j < 0: continue
526
+ a = self.atoms[i]
527
+ if self.refine_xyz:
528
+ x[3*j:3*(j+1)] = a.pos.tolist()
529
+ if self.adp_mode == 1:
530
+ x[offset_b + j] = self.atoms[i].b_iso
531
+ elif self.adp_mode == 2:
532
+ x[offset_b + 6*j : offset_b + 6*(j+1)] = self.atoms[i].aniso.elements_pdb()
533
+ x[offset_b + 6*j : offset_b + 6*(j+1)] *= u_to_b
534
+ if self.refine_occ:
535
+ x[offset_q + j] = a.occ
536
+
537
+ return x
538
+ #@profile
539
+ def calc_target(self, w=1, target_only=False):
540
+ N = self.n_params()
541
+ geom = self.geom.calc_target(target_only,
542
+ not self.unrestrained and self.refine_xyz,
543
+ self.adp_mode)
544
+ if self.ll is not None:
545
+ ll = self.ll.calc_target()
546
+ logger.writeln(" ll= {}".format(ll))
547
+ if not target_only:
548
+ self.ll.calc_grad(self.geom.atom_pos, self.refine_xyz, self.adp_mode, self.refine_occ,
549
+ self.refine_h, self.geom.geom.specials)
550
+ else:
551
+ ll = 0
552
+
553
+ f = w * ll + geom
554
+ return f
555
+
556
+ #@profile
557
+ def run_cycle(self, weight=1):
558
+ if 0: # test of grad
559
+ self.ll.update_fc()
560
+ x0 = self.get_x()
561
+ f0,ader,_ = self.calc_target(weight)
562
+ i = 1
563
+ for e in 1e-1,1e-2,1e-3, 1e-4, 1e-5:
564
+ x1 = numpy.copy(x0)
565
+ x1[i] += e
566
+ self.set_x(x1)
567
+ self.ll.update_fc()
568
+ f1,_,_ = self.calc_target(weight, target_only=True)
569
+ nder = (f1 - f0) / e
570
+ print("e=", e)
571
+ print("NUM DER=", nder)
572
+ print("ANA DER=", ader[i])
573
+ print("ratio=", nder/ader[i])
574
+ quit()
575
+
576
+ f0 = self.calc_target(weight)
577
+ x0 = self.get_x()
578
+ logger.writeln("f0= {:.4e}".format(f0))
579
+ if 1:
580
+ use_ic = False # incomplete cholesky. problematic at least in geometry optimisation case
581
+ logger.writeln("using cgsolve in c++, ic={}".format(use_ic))
582
+ cgsolver = ext.CgSolve(self.geom.geom.target, None if self.ll is None else self.ll.ll)
583
+ if use_ic:
584
+ cgsolver.gamma = 0
585
+ cgsolver.max_gamma_cyc = 1
586
+ else:
587
+ cgsolver.gamma = self.gamma
588
+ dx = cgsolver.solve(weight, logger, use_ic)
589
+ self.gamma = cgsolver.gamma
590
+ else:
591
+ logger.writeln("using cgsolve in py")
592
+ am = self.geom.geom.target.am_spmat
593
+ vn = numpy.array(self.geom.geom.target.vn)
594
+ if self.ll is not None:
595
+ am += self.ll.ll.fisher_spmat * weight
596
+ vn += numpy.array(self.ll.ll.vn) * weight
597
+ diag = am.diagonal()
598
+ diag[diag<=0] = 1.
599
+ diag = numpy.sqrt(diag)
600
+ rdiag = 1./diag # sk
601
+ M = scipy.sparse.diags(rdiag)
602
+ dx, self.gamma = cgsolve.cgsolve_rm(A=am, v=vn, M=M, gamma=self.gamma)
603
+
604
+ if 0: # to check hessian scale
605
+ with open("minimise_line.dat", "w") as ofs:
606
+ ofs.write("s f\n")
607
+ for s in numpy.arange(-2, 2, 0.1):
608
+ dx2 = self.scale_shifts(dx, s)
609
+ self.set_x(x0 + dx2)
610
+ fval = self.calc_target(weight, target_only=True)[0]
611
+ ofs.write("{} {}\n".format(s, fval))
612
+ quit()
613
+
614
+ ret = True # success
615
+ shift_scale = 1
616
+ for i in range(3):
617
+ shift_scale = 1/2**i
618
+ dx2 = self.scale_shifts(dx, shift_scale)
619
+ self.set_x(x0 - dx2)
620
+ f1 = self.calc_target(weight, target_only=True)
621
+ logger.writeln("f1, {}= {:.4e}".format(i, f1))
622
+ if f1 < f0: break
623
+ else:
624
+ ret = False
625
+ logger.writeln("WARNING: function not minimised")
626
+ #self.set_x(x0) # Refmac accepts it even when function increases
627
+
628
+ return ret, shift_scale, f1
629
+
630
+ def run_cycles(self, ncycles, weight=1, debug=False):
631
+ self.print_weights()
632
+ stats = [{"Ncyc": 0}]
633
+ self.geom.setup_nonbonded(self.refine_xyz)
634
+ self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
635
+ logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
636
+ if self.refine_xyz and not self.unrestrained:
637
+ stats[-1]["geom"] = self.geom.show_model_stats(show_outliers=True)["summary"]
638
+ if self.ll is not None:
639
+ self.ll.update_fc()
640
+ self.ll.overall_scale()
641
+ self.ll.update_ml_params()
642
+ llstats = self.ll.calc_stats(bin_stats=True)
643
+ stats[-1]["data"] = {"summary": llstats["summary"],
644
+ "binned": llstats["bin_stats"].to_dict(orient="records")}
645
+ show_binstats(llstats["bin_stats"], 0)
646
+ if self.adp_mode > 0:
647
+ utils.model.adp_analysis(self.st)
648
+ occ_refine_flag = self.ll is not None and self.geom.group_occ.groups and self.geom.group_occ.ncycle > 0
649
+
650
+ for i in range(ncycles):
651
+ logger.writeln("\n====== CYCLE {:2d} ======\n".format(i+1))
652
+ if self.refine_xyz or self.adp_mode > 0:
653
+ is_ok, shift_scale, fval = self.run_cycle(weight=weight)
654
+ stats.append({"Ncyc": len(stats), "shift_scale": shift_scale, "fval": fval, "fval_decreased": is_ok})
655
+ if occ_refine_flag:
656
+ stats[-1]["occ_refine"] = self.geom.group_occ.refine(self.ll, self.refine_h)
657
+ if debug: utils.fileio.write_model(self.st, "refined_{:02d}".format(i+1), pdb=True)#, cif=True)
658
+ if self.refine_xyz and not self.unrestrained:
659
+ stats[-1]["geom"] = self.geom.show_model_stats(show_outliers=(i==ncycles-1))["summary"]
660
+ if self.ll is not None:
661
+ self.ll.overall_scale()
662
+ f0 = self.ll.calc_target()
663
+ self.ll.update_ml_params()
664
+ llstats = self.ll.calc_stats(bin_stats=True)#(i==ncycles-1))
665
+ if llstats["summary"]["-LL"] > f0:
666
+ logger.writeln("WARNING: -LL has increased after ML parameter optimization:"
667
+ "{} to {}".format(f0, llstats["summary"]["-LL"]))
668
+ stats[-1]["data"] = {"summary": llstats["summary"],
669
+ "binned": llstats["bin_stats"].to_dict(orient="records")}
670
+ show_binstats(llstats["bin_stats"], i+1)
671
+ if self.adp_mode > 0:
672
+ utils.model.adp_analysis(self.st)
673
+ logger.writeln("")
674
+
675
+ # Make table
676
+ data_keys, geom_keys = set(), set()
677
+ tmp = []
678
+ for d in stats:
679
+ x = {"Ncyc": d["Ncyc"]}
680
+ if "data" in d and "summary" in d["data"]:
681
+ x.update(d["data"]["summary"])
682
+ data_keys.update(d["data"]["summary"])
683
+ if "geom" in d:
684
+ for k, n, l in (("r.m.s.d.", "Bond distances, non H", "rmsBOND"),
685
+ ("r.m.s.Z", "Bond distances, non H", "zBOND"),
686
+ ("r.m.s.d.", "Bond angles, non H", "rmsANGL"),
687
+ ("r.m.s.Z", "Bond angles, non H", "zANGL")):
688
+ if k in d["geom"] and n in d["geom"][k]:
689
+ x[l] = d["geom"][k].get(n)
690
+ geom_keys.add(l)
691
+ tmp.append(x)
692
+ df = pandas.DataFrame(tmp)
693
+ forplot = []
694
+ if "FSCaverage" in data_keys:
695
+ forplot.append(["FSC", ["Ncyc", "FSCaverage"]])
696
+ r_keys = [x for x in data_keys if x.startswith("R")]
697
+ if r_keys:
698
+ forplot.append(["R", ["Ncyc"] + r_keys])
699
+ cc_keys = [x for x in data_keys if x.startswith("CC")]
700
+ if cc_keys:
701
+ forplot.append(["CC", ["Ncyc"] + cc_keys])
702
+ if "-LL" in data_keys:
703
+ forplot.append(["-LL", ["Ncyc", "-LL"]])
704
+ rms_keys = [x for x in geom_keys if x.startswith("rms")]
705
+ if rms_keys:
706
+ forplot.append(["Geometry", ["Ncyc"] + rms_keys])
707
+ z_keys = [x for x in geom_keys if x.startswith("z")]
708
+ if z_keys:
709
+ forplot.append(["Geometry Z", ["Ncyc"] + z_keys])
710
+
711
+ lstr = utils.make_loggraph_str(df, "stats vs cycle", forplot,
712
+ float_format="{:.4f}".format)
713
+ logger.writeln(lstr)
714
+ self.update_meta()
715
+ return stats
716
+
717
+ def update_meta(self):
718
+ # TODO write stats. probably geom.reporting.get_summary_table should return with _refine_ls_restr.type names
719
+ self.st.raw_remarks = []
720
+ si = gemmi.SoftwareItem()
721
+ si.classification = gemmi.SoftwareItem.Classification.Refinement
722
+ si.name = "Servalcat"
723
+ si.version = servalcat.__version__
724
+ si.date = servalcat.__date__
725
+ self.st.meta.software = [si]
726
+
727
+ self.st.meta.refinement = []
728
+ #ri = gemmi.RefinementInfo()
729
+ #rr = gemmi.RefinementInfo.Restr("")
730
+ #ri.restr_stats.append(rr)
731
+ #st.meta.refinement = [ri]
732
+
733
+ # class Refine