servalcat 0.4.99__cp39-cp39-macosx_10_14_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

Files changed (45) hide show
  1. servalcat/__init__.py +10 -0
  2. servalcat/__main__.py +120 -0
  3. servalcat/ext.cpython-39-darwin.so +0 -0
  4. servalcat/refine/__init__.py +0 -0
  5. servalcat/refine/cgsolve.py +100 -0
  6. servalcat/refine/refine.py +906 -0
  7. servalcat/refine/refine_geom.py +233 -0
  8. servalcat/refine/refine_spa.py +366 -0
  9. servalcat/refine/refine_xtal.py +281 -0
  10. servalcat/refine/spa.py +144 -0
  11. servalcat/refine/xtal.py +276 -0
  12. servalcat/refmac/__init__.py +0 -0
  13. servalcat/refmac/exte.py +182 -0
  14. servalcat/refmac/refmac_keywords.py +639 -0
  15. servalcat/refmac/refmac_wrapper.py +395 -0
  16. servalcat/spa/__init__.py +0 -0
  17. servalcat/spa/fofc.py +479 -0
  18. servalcat/spa/fsc.py +385 -0
  19. servalcat/spa/localcc.py +188 -0
  20. servalcat/spa/realspcc_from_var.py +128 -0
  21. servalcat/spa/run_refmac.py +977 -0
  22. servalcat/spa/shift_maps.py +293 -0
  23. servalcat/spa/shiftback.py +137 -0
  24. servalcat/spa/translate.py +129 -0
  25. servalcat/utils/__init__.py +35 -0
  26. servalcat/utils/commands.py +1547 -0
  27. servalcat/utils/fileio.py +744 -0
  28. servalcat/utils/generate_operators.py +296 -0
  29. servalcat/utils/hkl.py +714 -0
  30. servalcat/utils/logger.py +140 -0
  31. servalcat/utils/maps.py +345 -0
  32. servalcat/utils/model.py +782 -0
  33. servalcat/utils/refmac.py +760 -0
  34. servalcat/utils/restraints.py +781 -0
  35. servalcat/utils/symmetry.py +295 -0
  36. servalcat/xtal/__init__.py +0 -0
  37. servalcat/xtal/french_wilson.py +258 -0
  38. servalcat/xtal/run_refmac_small.py +240 -0
  39. servalcat/xtal/sigmaa.py +1644 -0
  40. servalcat/xtal/twin.py +121 -0
  41. servalcat-0.4.99.dist-info/METADATA +55 -0
  42. servalcat-0.4.99.dist-info/RECORD +45 -0
  43. servalcat-0.4.99.dist-info/WHEEL +5 -0
  44. servalcat-0.4.99.dist-info/entry_points.txt +4 -0
  45. servalcat-0.4.99.dist-info/licenses/LICENSE +373 -0
@@ -0,0 +1,906 @@
1
+ """
2
+ Author: "Keitaro Yamashita, Garib N. Murshudov"
3
+ MRC Laboratory of Molecular Biology
4
+
5
+ This software is released under the
6
+ Mozilla Public License, version 2.0; see LICENSE.
7
+ """
8
+ from __future__ import absolute_import, division, print_function, generators
9
+ import os
10
+ import re
11
+ import gemmi
12
+ import numpy
13
+ import json
14
+ import pandas
15
+ import scipy.sparse
16
+ import servalcat # for version
17
+ from servalcat.utils import logger
18
+ from servalcat import utils
19
+ from servalcat.refmac import exte
20
+ from servalcat import ext
21
+ from . import cgsolve
22
+ u_to_b = utils.model.u_to_b
23
+ b_to_u = utils.model.b_to_u
24
+
25
+ #import line_profiler
26
+ #import atexit
27
+ #profile = line_profiler.LineProfiler()
28
+ #atexit.register(profile.print_stats)
29
+
30
+ class Geom:
31
+ def __init__(self, st, topo, monlib, adpr_w=1, occr_w=1, shake_rms=0,
32
+ params=None, unrestrained=False, use_nucleus=False,
33
+ ncslist=None, atom_pos=None):
34
+ self.st = st
35
+ self.atoms = [None for _ in range(self.st[0].count_atom_sites())]
36
+ for cra in self.st[0].all(): self.atoms[cra.atom.serial-1] = cra.atom
37
+ if atom_pos is not None:
38
+ self.atom_pos = atom_pos
39
+ else:
40
+ self.atom_pos = list(range(len(self.atoms)))
41
+ self.n_refine_atoms = max(self.atom_pos) + 1
42
+ self.lookup = {x.atom: x for x in self.st[0].all()}
43
+ try:
44
+ self.geom = ext.Geometry(self.st, self.atom_pos, monlib.ener_lib)
45
+ except TypeError as e:
46
+ raise SystemExit(f"An error occurred while creating the Geometry object:\n{e}\n\n"
47
+ "This likely indicates an installation issue. "
48
+ "Please verify that you have the correct version of gemmi installed and that both gemmi and servalcat were compiled in the same environment.")
49
+ self.specs = utils.model.find_special_positions(self.st)
50
+ #cs_count = len(self.st.find_spacegroup().operations())
51
+ for atom, images, matp, mata in self.specs:
52
+ #n_sym = len([x for x in images if x < cs_count]) + 1
53
+ n_sym = len(images) + 1
54
+ self.geom.specials.append(ext.Geometry.Special(atom, matp, mata, n_sym))
55
+ self.adpr_w = adpr_w
56
+ self.occr_w = occr_w
57
+ self.unrestrained = unrestrained
58
+ if shake_rms > 0:
59
+ numpy.random.seed(0)
60
+ utils.model.shake_structure(self.st, shake_rms, copy=False)
61
+ #utils.fileio.write_model(self.st, "shaken", pdb=True, cif=True)
62
+ self.use_nucleus = use_nucleus
63
+ self.calc_kwds = {"use_nucleus": self.use_nucleus}
64
+ if params is None:
65
+ params = {}
66
+ for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw", "wncs"):
67
+ if k in params:
68
+ self.calc_kwds[k] = params[k]
69
+ logger.writeln("setting geometry weight {}= {}".format(k, params[k]))
70
+ inc_tors, exc_tors = utils.restraints.make_torsion_rules(params.get("restr", {}))
71
+ rtors = utils.restraints.select_restrained_torsions(monlib, inc_tors, exc_tors)
72
+ self.geom.mon_tors_names = rtors["monomer"]
73
+ self.geom.link_tors_names = rtors["link"]
74
+ self.group_occ = GroupOccupancy(self.st, params.get("occu"))
75
+ if not self.unrestrained:
76
+ self.geom.load_topo(topo)
77
+ exte.read_external_restraints(params.get("exte", []), self.st, self.geom)
78
+ self.geom.finalize_restraints()
79
+ self.outlier_sigmas = dict(bond=5, angle=5, torsion=5, vdw=5, ncs=5, chir=5, plane=5, staca=5, stacd=5, per_atom=5)
80
+ self.parents = {}
81
+ self.ncslist = ncslist
82
+ # __init__()
83
+
84
+ def set_h_parents(self):
85
+ self.parents = {}
86
+ for bond in self.geom.bonds:
87
+ if bond.atoms[0].is_hydrogen():
88
+ self.parents[bond.atoms[0]] = bond.atoms[1]
89
+ elif bond.atoms[1].is_hydrogen():
90
+ self.parents[bond.atoms[1]] = bond.atoms[0]
91
+ # set_h_parents()
92
+ def setup_nonbonded(self, refine_xyz):
93
+ skip_critical_dist = not refine_xyz or self.unrestrained
94
+ self.geom.setup_nonbonded(skip_critical_dist=skip_critical_dist, group_idxes=self.group_occ.group_idxes)
95
+ if self.ncslist:
96
+ self.geom.setup_ncsr(self.ncslist)
97
+ def calc(self, target_only):
98
+ return self.geom.calc(check_only=target_only, **self.calc_kwds)
99
+ def calc_adp_restraint(self, target_only):
100
+ return self.geom.calc_adp_restraint(target_only, self.adpr_w)
101
+ def calc_occ_restraint(self, target_only):
102
+ return self.geom.calc_occ_restraint(target_only, self.occr_w)
103
+ def calc_target(self, target_only, refine_xyz, adp_mode, use_occr):
104
+ self.geom.clear_target()
105
+ geom_x = self.calc(target_only) if refine_xyz else 0
106
+ geom_a = self.calc_adp_restraint(target_only) if adp_mode > 0 else 0
107
+ geom_q = self.calc_occ_restraint(target_only) if use_occr > 0 else 0
108
+ logger.writeln(" geom_x = {}".format(geom_x))
109
+ logger.writeln(" geom_a = {}".format(geom_a))
110
+ logger.writeln(" geom_q = {}".format(geom_q))
111
+ geom = geom_x + geom_a + geom_q
112
+ if not target_only:
113
+ self.geom.spec_correction()
114
+ return geom
115
+
116
+ def show_model_stats(self, refine_xyz=True, adp_mode=1, use_occr=False, show_outliers=True):
117
+ if refine_xyz:
118
+ self.calc(True)
119
+ if adp_mode > 0:
120
+ self.calc_adp_restraint(True)
121
+ if use_occr:
122
+ self.calc_occ_restraint(True)
123
+ ret = {"outliers": {}}
124
+ if show_outliers:
125
+ get_table = dict(bond=self.geom.reporting.get_bond_outliers,
126
+ angle=self.geom.reporting.get_angle_outliers,
127
+ torsion=self.geom.reporting.get_torsion_outliers,
128
+ chir=self.geom.reporting.get_chiral_outliers,
129
+ plane=self.geom.reporting.get_plane_outliers,
130
+ staca=self.geom.reporting.get_stacking_angle_outliers,
131
+ stacd=self.geom.reporting.get_stacking_dist_outliers,
132
+ vdw=self.geom.reporting.get_vdw_outliers,
133
+ #ncs=self.geom.reporting.get_ncsr_outliers, # not useful?
134
+ )
135
+ labs = dict(bond="Bond distances",
136
+ angle="Bond angles",
137
+ torsion="Torsion angles",
138
+ chir="Chiral centres",
139
+ plane="Planar groups",
140
+ staca="Stacking plane angles",
141
+ stacd="Stacking plane distances",
142
+ vdw="VDW repulsions",
143
+ ncs="Local NCS restraints")
144
+
145
+ for k in get_table:
146
+ kwgs = {"min_z": self.outlier_sigmas[k]}
147
+ if k == "bond": kwgs["use_nucleus"] = self.use_nucleus
148
+ table = get_table[k](**kwgs)
149
+ if table["z"]:
150
+ for kk in table:
151
+ if kk.startswith(("atom", "plane", "1_atom", "2_atom")):
152
+ table[kk] = [str(self.lookup[x]) for x in table[kk]]
153
+ df = pandas.DataFrame(table)
154
+ df = df.reindex(df.z.abs().sort_values(ascending=False).index)
155
+ ret["outliers"][k] = df
156
+ if k == "bond":
157
+ df0 = df[df.type < 2].drop(columns=["type", "alpha"])
158
+ if len(df0.index) > 0:
159
+ logger.writeln(" *** {} outliers (Z >= {}) ***\n".format(labs[k], self.outlier_sigmas[k]))
160
+ logger.writeln(df0.to_string(float_format="{:.3f}".format, index=False) + "\n")
161
+ df0 = df[df.type == 2].drop(columns=["type"])
162
+ if len(df0.index) > 0:
163
+ logger.writeln(" *** External bond outliers (Z >= {}) ***\n".format(self.outlier_sigmas[k]))
164
+ logger.writeln(df0.to_string(float_format="{:.3f}".format, index=False) + "\n")
165
+ else:
166
+ logger.writeln(" *** {} outliers (Z >= {}) ***\n".format(labs[k], self.outlier_sigmas[k]))
167
+ logger.writeln(df.to_string(float_format="{:.3f}".format, index=False) + "\n")
168
+
169
+ # Per-atom score
170
+ if 0:
171
+ peratom = self.geom.reporting.per_atom_score(len(self.atoms), self.use_nucleus, "mean")
172
+ df = pandas.DataFrame(peratom)
173
+ df.insert(0, "atom", [str(self.lookup[x]) for x in self.atoms])
174
+ df = df[df["total"] >= self.outlier_sigmas["per_atom"]]
175
+ if show_outliers and len(df.index) > 0:
176
+ df.sort_values("total", ascending=False, inplace=True)
177
+ ret["outliers"]["per_atom"] = df
178
+ logger.writeln(" *** Per-atom violations (Z >= {}) ***\n".format(self.outlier_sigmas["per_atom"]))
179
+ logger.writeln(df.to_string(float_format="{:.2f}".format, index=False) + "\n")
180
+
181
+ df = pandas.DataFrame(self.geom.reporting.get_summary_table(self.use_nucleus))
182
+ df = df.set_index("Restraint type").rename_axis(index=None)
183
+ ret["summary"] = df
184
+ logger.writeln(df.to_string(float_format="{:.3f}".format) + "\n")
185
+ return ret
186
+
187
+ def show_binstats(df, cycle_number):
188
+ forplot = []
189
+ rlabs = [x for x in df if x.startswith("R")]
190
+ fsclabs = [x for x in df if x.startswith("fsc")]
191
+ cclabs = [x for x in df if x.startswith("CC")]
192
+ dlabs = [x for x in df if re.search("^D[0-9]*", x)]
193
+ if "fsc_model" in df: forplot.append(["FSC", ["fsc_model"]])
194
+ if rlabs: forplot.append(["R", rlabs])
195
+ if fsclabs: forplot.append(["FSC", fsclabs])
196
+ if cclabs: forplot.append(["CC", cclabs])
197
+ if dlabs: forplot.append(["ML parameters - D", dlabs])
198
+ if "S" in df: forplot.append(["ML parameters - Sigma", ["S"]])
199
+ lstr = utils.make_loggraph_str(df, "Data stats in cycle {}".format(cycle_number), forplot,
200
+ s2=1/df["d_min"]**2,
201
+ float_format="{:.4f}".format)
202
+ logger.writeln(lstr)
203
+ # show_binstats()
204
+
205
+ def convert_stats_to_dicts(stats):
206
+ tmp = []
207
+ for s in stats: # stats must be a list of dict
208
+ tmp.append({})
209
+ for k in s:
210
+ if k == "geom":
211
+ tmp[-1]["geom"] = {"summary": s["geom"]["summary"].to_dict()}
212
+ for kk in s["geom"]["outliers"]:
213
+ tmp[-1]["geom"].setdefault("outliers", {})[kk] = s["geom"]["outliers"][kk].to_dict(orient="records")
214
+ else:
215
+ tmp[-1][k] = s[k]
216
+ return tmp
217
+ # convert_stats_to_dicts()
218
+
219
+ def write_stats_json_safe(stats, json_out):
220
+ tmp = convert_stats_to_dicts(stats)
221
+ out_tmp = json_out + ".part"
222
+ with open(out_tmp, "w") as ofs:
223
+ json.dump(tmp, ofs, indent=2)
224
+ os.replace(out_tmp, json_out)
225
+ logger.writeln(f"Refinement statistics saved: {json_out}")
226
+ # write_stats_json_safe()
227
+
228
+ def print_h_options(h_change, h_present, refine_h, hout, geom_only):
229
+ if not h_present:
230
+ h_change = gemmi.HydrogenChange.Remove
231
+ logger.writeln("Hydrogen related options")
232
+ logger.write(" use in refinement{}: hydrogen atoms ".format("" if geom_only else "/map calculation"))
233
+ logger.writeln({gemmi.HydrogenChange.ReAddButWater: "have been (re)generated",
234
+ gemmi.HydrogenChange.ReAdd: "(including water) have been (re)generated",
235
+ gemmi.HydrogenChange.ReAddKnown: "(except for rotatable) have been (re) generated",
236
+ gemmi.HydrogenChange.NoChange: "from the input model have been retained",
237
+ gemmi.HydrogenChange.Remove: "have either been removed or were not present"}[h_change])
238
+ if h_present:
239
+ logger.write(" target: hydrogen atoms will be ")
240
+ if geom_only or not refine_h:
241
+ logger.writeln("just optimized according to geometric restraints")
242
+ else:
243
+ logger.writeln("refined against experimental data")
244
+ logger.writeln(" in output model: " + ("written" if hout and h_present else "not written"))
245
+ logger.writeln("")
246
+ # print_hydrogen_options()
247
+
248
+ class GroupOccupancy:
249
+ # TODO max may not be one. should check multiplicity
250
+ def __init__(self, st, params):
251
+ self.groups = []
252
+ self.consts = []
253
+ self.group_idxes = [0 for _ in range(st[0].count_atom_sites())]
254
+ self.ncycle = 0
255
+ if not params or not params.get("groups"):
256
+ return
257
+ logger.writeln("Occupancy groups:")
258
+ self.atom_pos = [-1 for _ in range(st[0].count_atom_sites())]
259
+ count = 0
260
+ for igr in params["groups"]:
261
+ self.groups.append([[], []]) # list of [indexes, atoms]
262
+ n_curr = count
263
+ for sel in params["groups"][igr]:
264
+ sel_chains = sel.get("chains")
265
+ sel_from = sel.get("resi_from")
266
+ sel_to = sel.get("resi_to")
267
+ sel_seq = sel.get("resi")
268
+ sel_atom = sel.get("atom")
269
+ sel_alt = sel.get("alt")
270
+ for chain in st[0]:
271
+ if sel_chains and chain.name not in sel_chains:
272
+ continue
273
+ flag = False
274
+ for res in chain:
275
+ if sel_seq and res.seqid != sel_seq:
276
+ continue
277
+ if sel_from and res.seqid == sel_from:
278
+ flag = True
279
+ if sel_from and not flag:
280
+ continue
281
+ for atom in res:
282
+ if sel_atom and atom.name != sel_atom:
283
+ continue
284
+ if sel_alt and atom.altloc != sel_alt:
285
+ continue
286
+ self.atom_pos[atom.serial-1] = count
287
+ self.groups[-1][0].append(count)
288
+ self.groups[-1][1].append(atom)
289
+ self.group_idxes[atom.serial-1] = len(self.groups)
290
+ count += 1
291
+ if sel_to and res.seqid == sel_to:
292
+ flag = False
293
+ logger.writeln(" id= {} atoms= {}".format(igr, count - n_curr))
294
+
295
+ igr_idxes = {igr:i for i, igr in enumerate(params["groups"])}
296
+ self.consts = [(is_comp, [igr_idxes[g] for g in gids])
297
+ for is_comp, gids in params["const"]]
298
+ self.ncycle = params.get("ncycle", 5)
299
+ # __init__()
300
+
301
+ def constraint(self, x):
302
+ # x: occupancy parameters
303
+ ret = []
304
+ for is_comp, ids in self.consts:
305
+ x_sum = numpy.sum(x[ids])
306
+ if is_comp or x_sum > 1:
307
+ ret.append(x_sum - 1)
308
+ else:
309
+ ret.append(0.)
310
+ return numpy.array(ret)
311
+
312
+ def ensure_constraints(self):
313
+ vals = []
314
+ for _, atoms in self.groups:
315
+ occ = numpy.mean([a.occ for a in atoms])
316
+ occ = min(1, max(1e-3, occ))
317
+ vals.append(occ)
318
+ for is_comp, idxes in self.consts:
319
+ sum_occ = sum(vals[i] for i in idxes)
320
+ if not is_comp and sum_occ < 1:
321
+ sum_occ = 1. # do nothing
322
+ for i in idxes:
323
+ #logger.writeln("Imposing constraints: {} {}".format(vals[i], vals[i]/sum_occ))
324
+ vals[i] /= sum_occ
325
+ for occ, (_, atoms) in zip(vals, self.groups):
326
+ for a in atoms: a.occ = occ
327
+
328
+ def get_x(self):
329
+ return numpy.array([atoms[0].occ for _, atoms in self.groups])
330
+
331
+ def set_x(self, x):
332
+ for p, (_, atoms) in zip(x, self.groups):
333
+ for a in atoms:
334
+ a.occ = p
335
+ #a.occ = max(1, min(1e-3, p))
336
+
337
+ def target(self, x, ll, ls, u):
338
+ self.set_x(x)
339
+ ll.update_fc()
340
+ c = self.constraint(x)
341
+ f = ll.calc_target() - numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
342
+ return f
343
+
344
+ def grad(self, x, ll, ls, u, refine_h):
345
+ c = self.constraint(x)
346
+ ll.calc_grad(self.atom_pos, refine_xyz=False, adp_mode=0, refine_occ=True, refine_h=refine_h, specs=None)
347
+ #print("grad=", ll.ll.vn)
348
+ #print("diag=", ll.ll.am)
349
+ assert len(ll.ll.vn) == len(ll.ll.am)
350
+ vn = []
351
+ diag = []
352
+ for idxes, atoms in self.groups:
353
+ if not refine_h:
354
+ idxes = [i for i, a in zip(idxes, atoms) if not a.is_hydrogen()]
355
+ vn.append(numpy.sum(numpy.array(ll.ll.vn)[idxes]))
356
+ diag.append(numpy.sum(numpy.array(ll.ll.am)[idxes]))
357
+ vn, diag = numpy.array(vn), numpy.array(diag)
358
+ for i, (is_comp, idxes) in enumerate(self.consts):
359
+ dcdx = numpy.zeros(len(self.groups))
360
+ dcdx[idxes] = 1.
361
+ if is_comp or c[i] != 0:
362
+ vn -= (ls[i] - u * c[i]) * dcdx
363
+ diag += u * dcdx**2
364
+
365
+ return vn, diag
366
+
367
+ def refine(self, ll, refine_h, alpha=1.1):
368
+ # Refinement of grouped occupancies using augmented Lagrangian
369
+ # f(x) = LL(x) - sum_j (lambda_j c_j(x)) + u/2 sum_j (c_j(x))^2
370
+ # with c_j(x) = 0 constraints
371
+ if not self.groups:
372
+ return
373
+ logger.writeln("\n== Group occupancy refinement ==")
374
+ self.ensure_constraints() # make sure constrained groups have the same occupancies.
375
+ ls = 0 * numpy.ones(len(self.consts)) # Lagrange multiplier
376
+ u = 10000. # penalty parameter. in Refmac 1/0.01**2
377
+ x0 = self.get_x()
378
+ #logger.writeln(" parameters: {}".format(len(x0)))
379
+ f0 = self.target(x0, ll, ls, u)
380
+ ret = []
381
+ for cyc in range(self.ncycle):
382
+ ret.append({"Ncyc": cyc+1, "f0": f0})
383
+ logger.writeln("occ_{}_f0= {:.4e}".format(cyc, f0))
384
+ vn, diag = self.grad(x0, ll, ls, u, refine_h)
385
+ diag[diag < 1e-6] = 1.
386
+ dx = -vn / diag
387
+ if 0:
388
+ ofs = open("debug.dat", "w")
389
+ for scale in (-1, -0.5, 0, 0.1, 0.2, 0.3, 0.4, 0.5, 1, 2):
390
+ self.set_x(x0 + scale * dx)
391
+ ll.update_fc()
392
+ c = self.constraint(x0 + dx)
393
+ f = ll.calc_target() + numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
394
+ ofs.write("{} {}\n".format(scale, f))
395
+ ofs.close()
396
+ import scipy.optimize
397
+ print(scipy.optimize.line_search(f=lambda x: self.target(x, ll, ls, u),
398
+ myfprime= lambda x: self.grad(ll, ls, u, refine_h)[0],
399
+ xk= x0,
400
+ pk= dx))
401
+ quit()
402
+
403
+ scale = 1
404
+ for i in range(3):
405
+ scale = 1/2**i
406
+ f1 = self.target(x0 + dx * scale, ll, ls, u)
407
+ logger.writeln("occ_{}_f1, {}= {:.4e}".format(cyc, i, f1))
408
+ if f1 < f0: break
409
+ else:
410
+ logger.writeln("WARNING: function not minimised")
411
+ #self.set_x(x0) # Refmac accepts it even when function increases
412
+ c = self.constraint(x0 + dx * scale)
413
+ ret[-1]["f1"] = f1
414
+ ret[-1]["shift_scale"] = scale
415
+ f0 = f1
416
+ x0 = x0 + dx * scale
417
+ ls -= u * c
418
+ u = alpha * u
419
+ ret[-1]["const_viol"] = list(c)
420
+ ret[-1]["lambda_new"] = list(ls)
421
+ self.ensure_constraints()
422
+ ll.update_fc()
423
+ f = ll.calc_target()
424
+ logger.writeln("final -LL= {}".format(f))
425
+ return ret
426
+
427
+
428
+ class Refine:
429
+ def __init__(self, st, geom, ll=None, refine_xyz=True, adp_mode=1, refine_h=False, refine_occ=False,
430
+ unrestrained=False, params=None):
431
+ assert adp_mode in (0, 1, 2) # 0=fix, 1=iso, 2=aniso
432
+ assert geom is not None
433
+ self.st = st # clone()?
434
+ self.st_traj = None
435
+ self.atoms = geom.atoms # not a copy
436
+ self.geom = geom
437
+ self.ll = ll
438
+ self.gamma = 0
439
+ self.adp_mode = 0 if self.ll is None else adp_mode
440
+ self.refine_xyz = refine_xyz
441
+ self.refine_occ = refine_occ
442
+ self.use_occr = self.refine_occ # for now?
443
+ self.unrestrained = unrestrained
444
+ self.refine_h = refine_h
445
+ self.h_inherit_parent_adp = self.adp_mode > 0 and not self.refine_h and self.st[0].has_hydrogen()
446
+ if self.h_inherit_parent_adp:
447
+ self.geom.set_h_parents()
448
+ if params and params.get("write_trajectory"):
449
+ self.st_traj = self.st.clone()
450
+ self.st_traj[-1].num = 0
451
+ assert self.geom.group_occ.groups or self.n_params() > 0
452
+ # __init__()
453
+
454
+ def print_weights(self): # TODO unfinished
455
+ logger.writeln("Geometry weights")
456
+ g = self.geom.geom
457
+ if self.adp_mode > 0:
458
+ logger.writeln(" ADP restraints")
459
+ logger.writeln(" weight: {}".format(self.geom.adpr_w))
460
+ logger.writeln(" mode: {}".format(g.adpr_mode))
461
+ if g.adpr_mode == "diff":
462
+ logger.writeln(" sigmas: {}".format(" ".join("{:.2f}".format(x) for x in g.adpr_diff_sigs)))
463
+ elif g.adpr_mode == "kldiv":
464
+ logger.writeln(" sigmas: {}".format(" ".join("{:.2f}".format(x) for x in g.adpr_kl_sigs)))
465
+ else:
466
+ raise LookupError("unknown adpr_mode")
467
+ if self.refine_occ:
468
+ logger.writeln(" Occupancy restraints")
469
+ logger.writeln(" weight: {}".format(self.geom.occr_w))
470
+
471
+ def scale_shifts(self, dx, scale):
472
+ n_atoms = self.geom.n_refine_atoms
473
+ #ave_shift = numpy.mean(dx)
474
+ #max_shift = numpy.maximum(dx)
475
+ #rms_shift = numpy.std(dx)
476
+ shift_allow_high = 1.0
477
+ shift_allow_low = -1.0
478
+ shift_max_allow_B = 30.0
479
+ shift_min_allow_B = -30.0
480
+ shift_max_allow_q = 0.5
481
+ shift_min_allow_q = -0.5
482
+ dx = scale * dx
483
+ offset_b = n_atoms * 3 if self.refine_xyz else 0
484
+ offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
485
+ if self.refine_xyz:
486
+ dxx = dx[:offset_b]
487
+ logger.writeln("min(dx) = {}".format(numpy.min(dxx)))
488
+ logger.writeln("max(dx) = {}".format(numpy.max(dxx)))
489
+ logger.writeln("mean(dx)= {}".format(numpy.mean(dxx)))
490
+ dxx[dxx > shift_allow_high] = shift_allow_high
491
+ dxx[dxx < shift_allow_low] = shift_allow_low
492
+ if self.adp_mode == 1:
493
+ dxb = dx[offset_b:offset_q]
494
+ logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
495
+ logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
496
+ logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
497
+ dxb[dxb > shift_max_allow_B] = shift_max_allow_B
498
+ dxb[dxb < shift_min_allow_B] = shift_min_allow_B
499
+ elif self.adp_mode == 2:
500
+ dxb = dx[offset_b:offset_q]
501
+ # TODO this is misleading
502
+ logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
503
+ logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
504
+ logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
505
+ for i in range(len(dxb)//6):
506
+ j = i * 6
507
+ a = numpy.array([[dxb[j], dxb[j+3], dxb[j+4]],
508
+ [dxb[j+3], dxb[j+1], dxb[j+5]],
509
+ [dxb[j+4], dxb[j+5], dxb[j+2]]])
510
+ v, Q = numpy.linalg.eigh(a)
511
+ v[v > shift_max_allow_B] = shift_max_allow_B
512
+ v[v < shift_min_allow_B] = shift_min_allow_B
513
+ a = Q.dot(numpy.diag(v)).dot(Q.T)
514
+ dxb[j:j+6] = a[0,0], a[1,1], a[2,2], a[0,1], a[0,2], a[1,2]
515
+ if self.refine_occ:
516
+ dxq = dx[offset_q:]
517
+ logger.writeln("min(dq) = {}".format(numpy.min(dxq)))
518
+ logger.writeln("max(dq) = {}".format(numpy.max(dxq)))
519
+ logger.writeln("mean(dq)= {}".format(numpy.mean(dxq)))
520
+ dxq[dxq > shift_max_allow_q] = shift_max_allow_q
521
+ dxq[dxq < shift_min_allow_q] = shift_min_allow_q
522
+
523
+ return dx
524
+
525
+ def n_params(self):
526
+ n_atoms = self.geom.n_refine_atoms
527
+ n_params = 0
528
+ if self.refine_xyz: n_params += 3 * n_atoms
529
+ if self.adp_mode == 1:
530
+ n_params += n_atoms
531
+ elif self.adp_mode == 2:
532
+ n_params += 6 * n_atoms
533
+ if self.refine_occ:
534
+ n_params += n_atoms
535
+ return n_params
536
+
537
+ def set_x(self, x):
538
+ n_atoms = self.geom.n_refine_atoms
539
+ offset_b = n_atoms * 3 if self.refine_xyz else 0
540
+ offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
541
+ max_occ = {}
542
+ if self.refine_occ and self.geom.specs:
543
+ max_occ = {atom: 1./(len(images)+1) for atom, images, _, _ in self.geom.specs}
544
+ for i, j in enumerate(self.geom.atom_pos):
545
+ if j < 0: continue
546
+ if self.refine_xyz:
547
+ self.atoms[i].pos.fromlist(x[3*j:3*j+3]) # faster than substituting pos.x,pos.y,pos.z
548
+ if self.adp_mode == 1:
549
+ self.atoms[i].b_iso = max(0.5, x[offset_b + j]) # minimum B = 0.5
550
+ elif self.adp_mode == 2:
551
+ a = x[offset_b + 6 * j: offset_b + 6 * (j+1)]
552
+ a = gemmi.SMat33d(*a)
553
+ M = a.as_mat33().array
554
+ v, Q = numpy.linalg.eigh(M) # eig() may return complex due to numerical precision?
555
+ v = numpy.maximum(v, 0.5) # avoid NPD with minimum B = 0.5
556
+ M2 = Q.dot(numpy.diag(v)).dot(Q.T)
557
+ self.atoms[i].b_iso = M2.trace() / 3
558
+ M2 *= b_to_u
559
+ self.atoms[i].aniso = gemmi.SMat33f(M2[0,0], M2[1,1], M2[2,2], M2[0,1], M2[0,2], M2[1,2])
560
+ if self.refine_occ:
561
+ self.atoms[i].occ = min(max_occ.get(self.atoms[i], 1), max(1e-3, x[offset_q + j]))
562
+
563
+ # Copy B of hydrogen from parent
564
+ if self.h_inherit_parent_adp:
565
+ for h in self.geom.parents:
566
+ p = self.geom.parents[h]
567
+ h.b_iso = p.b_iso
568
+ h.aniso = p.aniso
569
+
570
+ if self.ll is not None:
571
+ self.ll.update_fc()
572
+
573
+ self.geom.setup_nonbonded(self.refine_xyz) # if refine_xyz=False, no need to do it every time
574
+ self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ, self.use_occr)
575
+ logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
576
+ logger.writeln(f"atoms = {self.geom.geom.target.n_atoms()}")
577
+ logger.writeln(f"pairs = {self.geom.geom.target.n_pairs()}")
578
+
579
+ def get_x(self):
580
+ n_atoms = self.geom.n_refine_atoms
581
+ offset_b = n_atoms * 3 if self.refine_xyz else 0
582
+ offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
583
+ x = numpy.zeros(self.n_params())
584
+ for i, j in enumerate(self.geom.atom_pos):
585
+ if j < 0: continue
586
+ a = self.atoms[i]
587
+ if self.refine_xyz:
588
+ x[3*j:3*(j+1)] = a.pos.tolist()
589
+ if self.adp_mode == 1:
590
+ x[offset_b + j] = self.atoms[i].b_iso
591
+ elif self.adp_mode == 2:
592
+ x[offset_b + 6*j : offset_b + 6*(j+1)] = self.atoms[i].aniso.elements_pdb()
593
+ x[offset_b + 6*j : offset_b + 6*(j+1)] *= u_to_b
594
+ if self.refine_occ:
595
+ x[offset_q + j] = a.occ
596
+
597
+ return x
598
+ #@profile
599
+ def calc_target(self, w=1, target_only=False):
600
+ N = self.n_params()
601
+ geom = self.geom.calc_target(target_only,
602
+ not self.unrestrained and self.refine_xyz,
603
+ self.adp_mode, self.use_occr)
604
+ if self.ll is not None:
605
+ ll = self.ll.calc_target()
606
+ logger.writeln(" ll= {}".format(ll))
607
+ if not target_only:
608
+ self.ll.calc_grad(self.geom.atom_pos, self.refine_xyz, self.adp_mode, self.refine_occ,
609
+ self.refine_h, self.geom.geom.specials)
610
+ else:
611
+ ll = 0
612
+
613
+ f = w * ll + geom
614
+ return f
615
+
616
+ #@profile
617
+ def run_cycle(self, weight=1):
618
+ if 0: # test of grad
619
+ self.ll.update_fc()
620
+ x0 = self.get_x()
621
+ f0,ader,_ = self.calc_target(weight)
622
+ i = 1
623
+ for e in 1e-1,1e-2,1e-3, 1e-4, 1e-5:
624
+ x1 = numpy.copy(x0)
625
+ x1[i] += e
626
+ self.set_x(x1)
627
+ self.ll.update_fc()
628
+ f1,_,_ = self.calc_target(weight, target_only=True)
629
+ nder = (f1 - f0) / e
630
+ print("e=", e)
631
+ print("NUM DER=", nder)
632
+ print("ANA DER=", ader[i])
633
+ print("ratio=", nder/ader[i])
634
+ quit()
635
+
636
+ f0 = self.calc_target(weight)
637
+ x0 = self.get_x()
638
+ logger.writeln("f0= {:.4e}".format(f0))
639
+ if 1:
640
+ use_ic = False # incomplete cholesky. problematic at least in geometry optimisation case
641
+ logger.writeln("using cgsolve in c++, ic={}".format(use_ic))
642
+ cgsolver = ext.CgSolve(self.geom.geom.target, None if self.ll is None else self.ll.ll)
643
+ if use_ic:
644
+ cgsolver.gamma = 0
645
+ cgsolver.max_gamma_cyc = 1
646
+ else:
647
+ cgsolver.gamma = self.gamma
648
+ dx = cgsolver.solve(weight, logger, use_ic)
649
+ self.gamma = cgsolver.gamma
650
+ else:
651
+ logger.writeln("using cgsolve in py")
652
+ am = self.geom.geom.target.am_spmat
653
+ vn = numpy.array(self.geom.geom.target.vn)
654
+ if self.ll is not None:
655
+ am += self.ll.ll.fisher_spmat * weight
656
+ vn += numpy.array(self.ll.ll.vn) * weight
657
+ diag = am.diagonal()
658
+ diag[diag<=0] = 1.
659
+ diag = numpy.sqrt(diag)
660
+ rdiag = 1./diag # sk
661
+ M = scipy.sparse.diags(rdiag)
662
+ dx, self.gamma = cgsolve.cgsolve_rm(A=am, v=vn, M=M, gamma=self.gamma)
663
+
664
+ if 0: # to check hessian scale
665
+ with open("minimise_line.dat", "w") as ofs:
666
+ ofs.write("s f\n")
667
+ for s in numpy.arange(-2, 2, 0.1):
668
+ dx2 = self.scale_shifts(dx, s)
669
+ self.set_x(x0 + dx2)
670
+ fval = self.calc_target(weight, target_only=True)[0]
671
+ ofs.write("{} {}\n".format(s, fval))
672
+ quit()
673
+
674
+ ret = True # success
675
+ shift_scale = 1
676
+ for i in range(3):
677
+ shift_scale = 1/2**i
678
+ dx2 = self.scale_shifts(dx, shift_scale)
679
+ self.set_x(x0 - dx2)
680
+ f1 = self.calc_target(weight, target_only=True)
681
+ logger.writeln("f1, {}= {:.4e}".format(i, f1))
682
+ if f1 < f0: break
683
+ else:
684
+ ret = False
685
+ logger.writeln("WARNING: function not minimised")
686
+ #self.set_x(x0) # Refmac accepts it even when function increases
687
+
688
+ return ret, shift_scale, f1
689
+
690
+ def run_cycles(self, ncycles, weight=1, weight_adjust=False, debug=False,
691
+ weight_adjust_bond_rmsz_range=(0.5, 1.), stats_json_out=None):
692
+ self.print_weights()
693
+ stats = [{"Ncyc": 0}]
694
+ self.geom.setup_nonbonded(self.refine_xyz)
695
+ self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ, self.use_occr)
696
+ logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
697
+ logger.writeln(f"atoms = {self.geom.geom.target.n_atoms()}")
698
+ logger.writeln(f"pairs = {self.geom.geom.target.n_pairs()}")
699
+ stats[-1]["geom"] = self.geom.show_model_stats(refine_xyz=self.refine_xyz and not self.unrestrained,
700
+ adp_mode=self.adp_mode,
701
+ use_occr=self.refine_occ,
702
+ show_outliers=True)
703
+ if self.ll is not None:
704
+ self.ll.update_fc()
705
+ self.ll.overall_scale()
706
+ self.ll.update_ml_params()
707
+ self.ll.prepare_target()
708
+ llstats = self.ll.calc_stats(bin_stats=True)
709
+ stats[-1]["data"] = {"summary": llstats["summary"],
710
+ "binned": llstats["bin_stats"].to_dict(orient="records")}
711
+ if "twin_alpha" in llstats:
712
+ stats[-1]["twin_alpha"] = llstats["twin_alpha"]
713
+ show_binstats(llstats["bin_stats"], 0)
714
+ if self.adp_mode > 0:
715
+ utils.model.adp_analysis(self.st)
716
+ if stats_json_out:
717
+ write_stats_json_safe(stats, stats_json_out)
718
+ occ_refine_flag = self.ll is not None and self.geom.group_occ.groups and self.geom.group_occ.ncycle > 0
719
+
720
+ for i in range(ncycles):
721
+ logger.writeln("\n====== CYCLE {:2d} ======\n".format(i+1))
722
+ logger.writeln(f" weight = {weight:.4e}")
723
+ if self.refine_xyz or self.adp_mode > 0 or self.refine_occ:
724
+ is_ok, shift_scale, fval = self.run_cycle(weight=weight)
725
+ stats.append({"Ncyc": len(stats), "shift_scale": shift_scale, "fval": fval, "fval_decreased": is_ok,
726
+ "weight": weight})
727
+ elif occ_refine_flag:
728
+ stats.append({"Ncyc": len(stats)})
729
+ if occ_refine_flag:
730
+ stats[-1]["occ_refine"] = self.geom.group_occ.refine(self.ll, self.refine_h)
731
+ if debug: utils.fileio.write_model(self.st, "refined_{:02d}".format(i+1), pdb=True)#, cif=True)
732
+ stats[-1]["geom"] = self.geom.show_model_stats(refine_xyz=self.refine_xyz and not self.unrestrained,
733
+ adp_mode=self.adp_mode,
734
+ use_occr=self.refine_occ,
735
+ show_outliers=(i==ncycles-1))
736
+ if self.ll is not None:
737
+ self.ll.overall_scale()
738
+ f0 = self.ll.calc_target()
739
+ self.ll.update_ml_params()
740
+ self.ll.prepare_target()
741
+ llstats = self.ll.calc_stats(bin_stats=True)#(i==ncycles-1))
742
+ if llstats["summary"]["-LL"] > f0:
743
+ logger.writeln("WARNING: -LL has increased after ML parameter optimization:"
744
+ "{} to {}".format(f0, llstats["summary"]["-LL"]))
745
+ stats[-1]["data"] = {"summary": llstats["summary"],
746
+ "binned": llstats["bin_stats"].to_dict(orient="records")}
747
+ if "twin_alpha" in llstats:
748
+ stats[-1]["twin_alpha"] = llstats["twin_alpha"]
749
+ show_binstats(llstats["bin_stats"], i+1)
750
+ if self.adp_mode > 0:
751
+ utils.model.adp_analysis(self.st)
752
+ if (weight_adjust and self.refine_xyz and not self.unrestrained and self.ll is not None and
753
+ len(stats) > 2 and "Bond distances, non H" in stats[-1]["geom"]["summary"].index):
754
+ rmsz = stats[-1]["geom"]["summary"]["r.m.s.Z"]["Bond distances, non H"]
755
+ rmsz0 = stats[-2]["geom"]["summary"]["r.m.s.Z"]["Bond distances, non H"]
756
+ if rmsz > weight_adjust_bond_rmsz_range[1] and rmsz > rmsz0:
757
+ weight /= 1.1
758
+ elif rmsz < weight_adjust_bond_rmsz_range[0] and rmsz0 < weight_adjust_bond_rmsz_range[0] and rmsz < rmsz0:
759
+ weight *= 1.3
760
+ elif rmsz > 1.5 * rmsz0:
761
+ weight /= 1.1
762
+ if self.st_traj is not None:
763
+ self.st_traj.add_model(self.st[0])
764
+ self.st_traj[-1].num = len(self.st_traj)
765
+ if stats_json_out:
766
+ write_stats_json_safe(stats, stats_json_out)
767
+
768
+ logger.writeln("")
769
+
770
+ # Make table
771
+ data_keys, geom_keys = set(), set()
772
+ tmp = []
773
+ for d in stats:
774
+ x = {"Ncyc": d["Ncyc"]}
775
+ if "data" in d and "summary" in d["data"]:
776
+ x.update(d["data"]["summary"])
777
+ data_keys.update(d["data"]["summary"])
778
+ if "geom" in d:
779
+ for k, n, l in (("r.m.s.d.", "Bond distances, non H", "rmsBOND"),
780
+ ("r.m.s.Z", "Bond distances, non H", "zBOND"),
781
+ ("r.m.s.d.", "Bond angles, non H", "rmsANGL"),
782
+ ("r.m.s.Z", "Bond angles, non H", "zANGL")):
783
+ if k in d["geom"]["summary"] and n in d["geom"]["summary"][k]:
784
+ x[l] = d["geom"]["summary"][k].get(n)
785
+ geom_keys.add(l)
786
+ tmp.append(x)
787
+ df = pandas.DataFrame(tmp)
788
+ forplot = []
789
+ if "FSCaverage" in data_keys:
790
+ forplot.append(["FSC", ["Ncyc", "FSCaverage"]])
791
+ r_keys = [x for x in data_keys if x.startswith("R")]
792
+ if r_keys:
793
+ forplot.append(["R", ["Ncyc"] + r_keys])
794
+ cc_keys = [x for x in data_keys if x.startswith("CC")]
795
+ if cc_keys:
796
+ forplot.append(["CC", ["Ncyc"] + cc_keys])
797
+ if "-LL" in data_keys:
798
+ forplot.append(["-LL", ["Ncyc", "-LL"]])
799
+ rms_keys = [x for x in geom_keys if x.startswith("rms")]
800
+ if rms_keys:
801
+ forplot.append(["Geometry", ["Ncyc"] + rms_keys])
802
+ z_keys = [x for x in geom_keys if x.startswith("z")]
803
+ if z_keys:
804
+ forplot.append(["Geometry Z", ["Ncyc"] + z_keys])
805
+
806
+ lstr = utils.make_loggraph_str(df, "stats vs cycle", forplot,
807
+ float_format="{:.4f}".format)
808
+ logger.writeln(lstr)
809
+ return stats
810
+
811
+ # class Refine
812
+
813
+ def update_meta(st, stats, ll=None):
814
+ # TODO write stats. probably geom.reporting.get_summary_table should return with _refine_ls_restr.type names
815
+ # should remove st.mod_residues?
816
+ st.helices.clear()
817
+ st.sheets.clear()
818
+ raw_remarks = [f'REMARK 3',
819
+ f'REMARK 3 REFINEMENT.',
820
+ f'REMARK 3 PROGRAM : SERVALCAT {servalcat.__version__}',
821
+ f'REMARK 3 AUTHORS : YAMASHITA,MURSHUDOV',
822
+ f'REMARK 3',
823
+ ]
824
+ si = gemmi.SoftwareItem()
825
+ si.classification = gemmi.SoftwareItem.Classification.Refinement
826
+ si.name = "Servalcat"
827
+ si.version = servalcat.__version__
828
+ si.date = servalcat.__date__
829
+ st.meta.software = [si]
830
+
831
+ ri = gemmi.RefinementInfo()
832
+ if "geom" in stats:
833
+ restr_stats = []
834
+ raw_remarks.append("REMARK 3 RMS DEVIATIONS FROM IDEAL VALUES COUNT RMS WEIGHT")
835
+ for k, n, l, pl in (("r.m.s.d.", "Bond distances, non H", "s_bond_nonh_d", "BOND LENGTHS REFINED ATOMS (A)"),
836
+ ("r.m.s.d.", "Bond angles, non H", "s_angle_nonh_deg", "BOND ANGLES REFINED ATOMS (DEGREES)"),
837
+ ("r.m.s.d.", "Torsion angles, period 1", "s_dihedral_angle_1_deg", "TORSION ANGLES, PERIOD 1 (DEGREES)"),
838
+ ("r.m.s.d.", "Torsion angles, period 2", "s_dihedral_angle_2_deg", "TORSION ANGLES, PERIOD 2 (DEGREES)"),
839
+ ("r.m.s.d.", "Torsion angles, period 3", "s_dihedral_angle_3_deg", "TORSION ANGLES, PERIOD 3 (DEGREES)"),
840
+ ("r.m.s.d.", "Torsion angles, period 6", "s_dihedral_angle_6_deg", "TORSION ANGLES, PERIOD 6 (DEGREES)"),
841
+ ("r.m.s.d.", "Chiral centres", "s_chiral_restr", "CHIRAL-CENTER RESTRAINTS (A**3)"),
842
+ ("r.m.s.d.", "Planar groups", "s_planes", "GENERAL PLANES REFINED ATOMS (A)"),
843
+ ("r.m.s.d.", "VDW nonbonded", "s_nbd", ""),
844
+ ("r.m.s.d.", "VDW torsion", "s_nbtor", ""),
845
+ ("r.m.s.d.", "VDW hbond", "s_hbond_nbd", ""),
846
+ ("r.m.s.d.", "VDW metal", "s_metal_ion", ""),
847
+ ("r.m.s.d.", "VDW dummy", "s_dummy_nbd", ""),
848
+ ("r.m.s.d.", "VDW nonbonded, symmetry", "s_symmetry_nbd", ""),
849
+ ("r.m.s.d.", "VDW torsion, symmetry", "s_symmetry_nbtor", ""),
850
+ ("r.m.s.d.", "VDW hbond, symmetry", "s_symmetry_hbond_nbd", ""),
851
+ ("r.m.s.d.", "VDW metal, symmetry", "s_symmetry_metal_ion", ""),
852
+ ("r.m.s.d.", "VDW dummy, symmetry", "s_symmetry_dummy_nbd", "")):
853
+ if k in stats["geom"]["summary"] and n in stats["geom"]["summary"][k]:
854
+ rr = gemmi.RefinementInfo.Restr(l)
855
+ rr.dev_ideal = round(stats["geom"]["summary"][k].get(n), 4)
856
+ rr.count = stats["geom"]["summary"]["N restraints"].get(n)
857
+ rr.weight = round(stats["geom"]["summary"]["Mn(sigma)"].get(n), 4)
858
+ restr_stats.append(rr)
859
+ if pl:
860
+ raw_remarks.append(f"REMARK 3 {pl}:{rr.count:6d} ;{rr.dev_ideal:6.3f} ;{rr.weight:6.3f}")
861
+ ri.restr_stats = restr_stats
862
+ raw_remarks.append("REMARK 3")
863
+ if ll is not None:
864
+ ri.id = ll.refine_id()
865
+ ri.mean_b = round(numpy.mean([cra.atom.b_iso for cra in st[0].all()]), 2)
866
+ if ll.b_aniso is not None:
867
+ ri.aniso_b = ll.b_aniso
868
+ for k, kd, nd in (("Rwork", "r_work", 4), ("Rfree", "r_free", 4), ("R", "r_all", 4),
869
+ ("FSCaverage", "fsc_work", 4),
870
+ ("FSCaverage_half1", "fsc_work", 4), ("FSCaverage_half2", "fsc_free", 4)):
871
+ if k in stats["data"]["summary"]:
872
+ setattr(ri, kd, round(stats["data"]["summary"][k], nd))
873
+ bins = []
874
+ n_all = 0
875
+ for b in stats["data"]["binned"]:
876
+ bri = gemmi.BasicRefinementInfo()
877
+ bri.resolution_high = round(b["d_min"], 3)
878
+ bri.resolution_low = round(b["d_max"], 3)
879
+ for k, kd, nd in (("Rwork", "r_work", 4), ("Rfree", "r_free", 4),
880
+ ("R1work", "r_work", 4), ("R1free", "r_free", 4),
881
+ ("R", "r_all", 4), ("R1", "r_all", 4),
882
+ ("CCI", "cc_intensity_work", 4), ("CCF", "cc_fo_fc_work", 4),
883
+ ("CCIwork", "cc_intensity_work", 4), ("CCIfree", "cc_intensity_free", 4),
884
+ ("CCFwork", "cc_fo_fc_work", 4), ("CCFfree", "cc_fo_fc_free", 4),
885
+ ("fsc_FC_full", "fsc_work", 4), ("fsc_model", "fsc_work", 4),
886
+ ("fsc_model_half1", "fsc_work", 4), ("fsc_model_half2", "fsc_free", 4),
887
+ ("n_work", "work_set_count", 0), ("n_free", "rfree_set_count", 0),
888
+ ("n_obs", "reflection_count", 0), ("ncoeffs", "reflection_count", 0)):
889
+ if k in b: setattr(bri, kd, round(b[k], nd))
890
+ if "n_all" in b and "n_obs" in b:
891
+ bri.completeness = round(b["n_obs"] / b["n_all"] * 100, 2)
892
+ n_all += b["n_all"]
893
+ bins.append(bri)
894
+ ri.rfree_set_count = max(-1, sum(b.rfree_set_count for b in bins))
895
+ ri.work_set_count = max(-1, sum(b.work_set_count for b in bins))
896
+ ri.reflection_count = max(-1, sum(b.reflection_count for b in bins))
897
+ ri.resolution_high = round(min(b.resolution_high for b in bins), 3)
898
+ ri.resolution_low = round(max(b.resolution_low for b in bins), 3)
899
+ if ri.reflection_count > 0 and n_all > 0:
900
+ ri.completeness = round(ri.reflection_count / n_all * 100, 2)
901
+ ri.bins = bins
902
+ if ri.rfree_set_count > 0:
903
+ ri.cross_validation_method = "THROUGHOUT"
904
+ st.meta.refinement = [ri]
905
+ st.raw_remarks = raw_remarks
906
+ # update_meta()