servalcat 0.4.99__cp311-cp311-macosx_10_14_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of servalcat might be problematic. Click here for more details.
- servalcat/__init__.py +10 -0
- servalcat/__main__.py +120 -0
- servalcat/ext.cpython-311-darwin.so +0 -0
- servalcat/refine/__init__.py +0 -0
- servalcat/refine/cgsolve.py +100 -0
- servalcat/refine/refine.py +906 -0
- servalcat/refine/refine_geom.py +233 -0
- servalcat/refine/refine_spa.py +366 -0
- servalcat/refine/refine_xtal.py +281 -0
- servalcat/refine/spa.py +144 -0
- servalcat/refine/xtal.py +276 -0
- servalcat/refmac/__init__.py +0 -0
- servalcat/refmac/exte.py +182 -0
- servalcat/refmac/refmac_keywords.py +639 -0
- servalcat/refmac/refmac_wrapper.py +395 -0
- servalcat/spa/__init__.py +0 -0
- servalcat/spa/fofc.py +479 -0
- servalcat/spa/fsc.py +385 -0
- servalcat/spa/localcc.py +188 -0
- servalcat/spa/realspcc_from_var.py +128 -0
- servalcat/spa/run_refmac.py +977 -0
- servalcat/spa/shift_maps.py +293 -0
- servalcat/spa/shiftback.py +137 -0
- servalcat/spa/translate.py +129 -0
- servalcat/utils/__init__.py +35 -0
- servalcat/utils/commands.py +1547 -0
- servalcat/utils/fileio.py +744 -0
- servalcat/utils/generate_operators.py +296 -0
- servalcat/utils/hkl.py +714 -0
- servalcat/utils/logger.py +140 -0
- servalcat/utils/maps.py +345 -0
- servalcat/utils/model.py +782 -0
- servalcat/utils/refmac.py +760 -0
- servalcat/utils/restraints.py +781 -0
- servalcat/utils/symmetry.py +295 -0
- servalcat/xtal/__init__.py +0 -0
- servalcat/xtal/french_wilson.py +258 -0
- servalcat/xtal/run_refmac_small.py +240 -0
- servalcat/xtal/sigmaa.py +1644 -0
- servalcat/xtal/twin.py +121 -0
- servalcat-0.4.99.dist-info/METADATA +55 -0
- servalcat-0.4.99.dist-info/RECORD +45 -0
- servalcat-0.4.99.dist-info/WHEEL +5 -0
- servalcat-0.4.99.dist-info/entry_points.txt +4 -0
- servalcat-0.4.99.dist-info/licenses/LICENSE +373 -0
|
@@ -0,0 +1,906 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Author: "Keitaro Yamashita, Garib N. Murshudov"
|
|
3
|
+
MRC Laboratory of Molecular Biology
|
|
4
|
+
|
|
5
|
+
This software is released under the
|
|
6
|
+
Mozilla Public License, version 2.0; see LICENSE.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import absolute_import, division, print_function, generators
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import gemmi
|
|
12
|
+
import numpy
|
|
13
|
+
import json
|
|
14
|
+
import pandas
|
|
15
|
+
import scipy.sparse
|
|
16
|
+
import servalcat # for version
|
|
17
|
+
from servalcat.utils import logger
|
|
18
|
+
from servalcat import utils
|
|
19
|
+
from servalcat.refmac import exte
|
|
20
|
+
from servalcat import ext
|
|
21
|
+
from . import cgsolve
|
|
22
|
+
u_to_b = utils.model.u_to_b
|
|
23
|
+
b_to_u = utils.model.b_to_u
|
|
24
|
+
|
|
25
|
+
#import line_profiler
|
|
26
|
+
#import atexit
|
|
27
|
+
#profile = line_profiler.LineProfiler()
|
|
28
|
+
#atexit.register(profile.print_stats)
|
|
29
|
+
|
|
30
|
+
class Geom:
|
|
31
|
+
def __init__(self, st, topo, monlib, adpr_w=1, occr_w=1, shake_rms=0,
|
|
32
|
+
params=None, unrestrained=False, use_nucleus=False,
|
|
33
|
+
ncslist=None, atom_pos=None):
|
|
34
|
+
self.st = st
|
|
35
|
+
self.atoms = [None for _ in range(self.st[0].count_atom_sites())]
|
|
36
|
+
for cra in self.st[0].all(): self.atoms[cra.atom.serial-1] = cra.atom
|
|
37
|
+
if atom_pos is not None:
|
|
38
|
+
self.atom_pos = atom_pos
|
|
39
|
+
else:
|
|
40
|
+
self.atom_pos = list(range(len(self.atoms)))
|
|
41
|
+
self.n_refine_atoms = max(self.atom_pos) + 1
|
|
42
|
+
self.lookup = {x.atom: x for x in self.st[0].all()}
|
|
43
|
+
try:
|
|
44
|
+
self.geom = ext.Geometry(self.st, self.atom_pos, monlib.ener_lib)
|
|
45
|
+
except TypeError as e:
|
|
46
|
+
raise SystemExit(f"An error occurred while creating the Geometry object:\n{e}\n\n"
|
|
47
|
+
"This likely indicates an installation issue. "
|
|
48
|
+
"Please verify that you have the correct version of gemmi installed and that both gemmi and servalcat were compiled in the same environment.")
|
|
49
|
+
self.specs = utils.model.find_special_positions(self.st)
|
|
50
|
+
#cs_count = len(self.st.find_spacegroup().operations())
|
|
51
|
+
for atom, images, matp, mata in self.specs:
|
|
52
|
+
#n_sym = len([x for x in images if x < cs_count]) + 1
|
|
53
|
+
n_sym = len(images) + 1
|
|
54
|
+
self.geom.specials.append(ext.Geometry.Special(atom, matp, mata, n_sym))
|
|
55
|
+
self.adpr_w = adpr_w
|
|
56
|
+
self.occr_w = occr_w
|
|
57
|
+
self.unrestrained = unrestrained
|
|
58
|
+
if shake_rms > 0:
|
|
59
|
+
numpy.random.seed(0)
|
|
60
|
+
utils.model.shake_structure(self.st, shake_rms, copy=False)
|
|
61
|
+
#utils.fileio.write_model(self.st, "shaken", pdb=True, cif=True)
|
|
62
|
+
self.use_nucleus = use_nucleus
|
|
63
|
+
self.calc_kwds = {"use_nucleus": self.use_nucleus}
|
|
64
|
+
if params is None:
|
|
65
|
+
params = {}
|
|
66
|
+
for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw", "wncs"):
|
|
67
|
+
if k in params:
|
|
68
|
+
self.calc_kwds[k] = params[k]
|
|
69
|
+
logger.writeln("setting geometry weight {}= {}".format(k, params[k]))
|
|
70
|
+
inc_tors, exc_tors = utils.restraints.make_torsion_rules(params.get("restr", {}))
|
|
71
|
+
rtors = utils.restraints.select_restrained_torsions(monlib, inc_tors, exc_tors)
|
|
72
|
+
self.geom.mon_tors_names = rtors["monomer"]
|
|
73
|
+
self.geom.link_tors_names = rtors["link"]
|
|
74
|
+
self.group_occ = GroupOccupancy(self.st, params.get("occu"))
|
|
75
|
+
if not self.unrestrained:
|
|
76
|
+
self.geom.load_topo(topo)
|
|
77
|
+
exte.read_external_restraints(params.get("exte", []), self.st, self.geom)
|
|
78
|
+
self.geom.finalize_restraints()
|
|
79
|
+
self.outlier_sigmas = dict(bond=5, angle=5, torsion=5, vdw=5, ncs=5, chir=5, plane=5, staca=5, stacd=5, per_atom=5)
|
|
80
|
+
self.parents = {}
|
|
81
|
+
self.ncslist = ncslist
|
|
82
|
+
# __init__()
|
|
83
|
+
|
|
84
|
+
def set_h_parents(self):
|
|
85
|
+
self.parents = {}
|
|
86
|
+
for bond in self.geom.bonds:
|
|
87
|
+
if bond.atoms[0].is_hydrogen():
|
|
88
|
+
self.parents[bond.atoms[0]] = bond.atoms[1]
|
|
89
|
+
elif bond.atoms[1].is_hydrogen():
|
|
90
|
+
self.parents[bond.atoms[1]] = bond.atoms[0]
|
|
91
|
+
# set_h_parents()
|
|
92
|
+
def setup_nonbonded(self, refine_xyz):
|
|
93
|
+
skip_critical_dist = not refine_xyz or self.unrestrained
|
|
94
|
+
self.geom.setup_nonbonded(skip_critical_dist=skip_critical_dist, group_idxes=self.group_occ.group_idxes)
|
|
95
|
+
if self.ncslist:
|
|
96
|
+
self.geom.setup_ncsr(self.ncslist)
|
|
97
|
+
def calc(self, target_only):
|
|
98
|
+
return self.geom.calc(check_only=target_only, **self.calc_kwds)
|
|
99
|
+
def calc_adp_restraint(self, target_only):
|
|
100
|
+
return self.geom.calc_adp_restraint(target_only, self.adpr_w)
|
|
101
|
+
def calc_occ_restraint(self, target_only):
|
|
102
|
+
return self.geom.calc_occ_restraint(target_only, self.occr_w)
|
|
103
|
+
def calc_target(self, target_only, refine_xyz, adp_mode, use_occr):
|
|
104
|
+
self.geom.clear_target()
|
|
105
|
+
geom_x = self.calc(target_only) if refine_xyz else 0
|
|
106
|
+
geom_a = self.calc_adp_restraint(target_only) if adp_mode > 0 else 0
|
|
107
|
+
geom_q = self.calc_occ_restraint(target_only) if use_occr > 0 else 0
|
|
108
|
+
logger.writeln(" geom_x = {}".format(geom_x))
|
|
109
|
+
logger.writeln(" geom_a = {}".format(geom_a))
|
|
110
|
+
logger.writeln(" geom_q = {}".format(geom_q))
|
|
111
|
+
geom = geom_x + geom_a + geom_q
|
|
112
|
+
if not target_only:
|
|
113
|
+
self.geom.spec_correction()
|
|
114
|
+
return geom
|
|
115
|
+
|
|
116
|
+
def show_model_stats(self, refine_xyz=True, adp_mode=1, use_occr=False, show_outliers=True):
|
|
117
|
+
if refine_xyz:
|
|
118
|
+
self.calc(True)
|
|
119
|
+
if adp_mode > 0:
|
|
120
|
+
self.calc_adp_restraint(True)
|
|
121
|
+
if use_occr:
|
|
122
|
+
self.calc_occ_restraint(True)
|
|
123
|
+
ret = {"outliers": {}}
|
|
124
|
+
if show_outliers:
|
|
125
|
+
get_table = dict(bond=self.geom.reporting.get_bond_outliers,
|
|
126
|
+
angle=self.geom.reporting.get_angle_outliers,
|
|
127
|
+
torsion=self.geom.reporting.get_torsion_outliers,
|
|
128
|
+
chir=self.geom.reporting.get_chiral_outliers,
|
|
129
|
+
plane=self.geom.reporting.get_plane_outliers,
|
|
130
|
+
staca=self.geom.reporting.get_stacking_angle_outliers,
|
|
131
|
+
stacd=self.geom.reporting.get_stacking_dist_outliers,
|
|
132
|
+
vdw=self.geom.reporting.get_vdw_outliers,
|
|
133
|
+
#ncs=self.geom.reporting.get_ncsr_outliers, # not useful?
|
|
134
|
+
)
|
|
135
|
+
labs = dict(bond="Bond distances",
|
|
136
|
+
angle="Bond angles",
|
|
137
|
+
torsion="Torsion angles",
|
|
138
|
+
chir="Chiral centres",
|
|
139
|
+
plane="Planar groups",
|
|
140
|
+
staca="Stacking plane angles",
|
|
141
|
+
stacd="Stacking plane distances",
|
|
142
|
+
vdw="VDW repulsions",
|
|
143
|
+
ncs="Local NCS restraints")
|
|
144
|
+
|
|
145
|
+
for k in get_table:
|
|
146
|
+
kwgs = {"min_z": self.outlier_sigmas[k]}
|
|
147
|
+
if k == "bond": kwgs["use_nucleus"] = self.use_nucleus
|
|
148
|
+
table = get_table[k](**kwgs)
|
|
149
|
+
if table["z"]:
|
|
150
|
+
for kk in table:
|
|
151
|
+
if kk.startswith(("atom", "plane", "1_atom", "2_atom")):
|
|
152
|
+
table[kk] = [str(self.lookup[x]) for x in table[kk]]
|
|
153
|
+
df = pandas.DataFrame(table)
|
|
154
|
+
df = df.reindex(df.z.abs().sort_values(ascending=False).index)
|
|
155
|
+
ret["outliers"][k] = df
|
|
156
|
+
if k == "bond":
|
|
157
|
+
df0 = df[df.type < 2].drop(columns=["type", "alpha"])
|
|
158
|
+
if len(df0.index) > 0:
|
|
159
|
+
logger.writeln(" *** {} outliers (Z >= {}) ***\n".format(labs[k], self.outlier_sigmas[k]))
|
|
160
|
+
logger.writeln(df0.to_string(float_format="{:.3f}".format, index=False) + "\n")
|
|
161
|
+
df0 = df[df.type == 2].drop(columns=["type"])
|
|
162
|
+
if len(df0.index) > 0:
|
|
163
|
+
logger.writeln(" *** External bond outliers (Z >= {}) ***\n".format(self.outlier_sigmas[k]))
|
|
164
|
+
logger.writeln(df0.to_string(float_format="{:.3f}".format, index=False) + "\n")
|
|
165
|
+
else:
|
|
166
|
+
logger.writeln(" *** {} outliers (Z >= {}) ***\n".format(labs[k], self.outlier_sigmas[k]))
|
|
167
|
+
logger.writeln(df.to_string(float_format="{:.3f}".format, index=False) + "\n")
|
|
168
|
+
|
|
169
|
+
# Per-atom score
|
|
170
|
+
if 0:
|
|
171
|
+
peratom = self.geom.reporting.per_atom_score(len(self.atoms), self.use_nucleus, "mean")
|
|
172
|
+
df = pandas.DataFrame(peratom)
|
|
173
|
+
df.insert(0, "atom", [str(self.lookup[x]) for x in self.atoms])
|
|
174
|
+
df = df[df["total"] >= self.outlier_sigmas["per_atom"]]
|
|
175
|
+
if show_outliers and len(df.index) > 0:
|
|
176
|
+
df.sort_values("total", ascending=False, inplace=True)
|
|
177
|
+
ret["outliers"]["per_atom"] = df
|
|
178
|
+
logger.writeln(" *** Per-atom violations (Z >= {}) ***\n".format(self.outlier_sigmas["per_atom"]))
|
|
179
|
+
logger.writeln(df.to_string(float_format="{:.2f}".format, index=False) + "\n")
|
|
180
|
+
|
|
181
|
+
df = pandas.DataFrame(self.geom.reporting.get_summary_table(self.use_nucleus))
|
|
182
|
+
df = df.set_index("Restraint type").rename_axis(index=None)
|
|
183
|
+
ret["summary"] = df
|
|
184
|
+
logger.writeln(df.to_string(float_format="{:.3f}".format) + "\n")
|
|
185
|
+
return ret
|
|
186
|
+
|
|
187
|
+
def show_binstats(df, cycle_number):
|
|
188
|
+
forplot = []
|
|
189
|
+
rlabs = [x for x in df if x.startswith("R")]
|
|
190
|
+
fsclabs = [x for x in df if x.startswith("fsc")]
|
|
191
|
+
cclabs = [x for x in df if x.startswith("CC")]
|
|
192
|
+
dlabs = [x for x in df if re.search("^D[0-9]*", x)]
|
|
193
|
+
if "fsc_model" in df: forplot.append(["FSC", ["fsc_model"]])
|
|
194
|
+
if rlabs: forplot.append(["R", rlabs])
|
|
195
|
+
if fsclabs: forplot.append(["FSC", fsclabs])
|
|
196
|
+
if cclabs: forplot.append(["CC", cclabs])
|
|
197
|
+
if dlabs: forplot.append(["ML parameters - D", dlabs])
|
|
198
|
+
if "S" in df: forplot.append(["ML parameters - Sigma", ["S"]])
|
|
199
|
+
lstr = utils.make_loggraph_str(df, "Data stats in cycle {}".format(cycle_number), forplot,
|
|
200
|
+
s2=1/df["d_min"]**2,
|
|
201
|
+
float_format="{:.4f}".format)
|
|
202
|
+
logger.writeln(lstr)
|
|
203
|
+
# show_binstats()
|
|
204
|
+
|
|
205
|
+
def convert_stats_to_dicts(stats):
|
|
206
|
+
tmp = []
|
|
207
|
+
for s in stats: # stats must be a list of dict
|
|
208
|
+
tmp.append({})
|
|
209
|
+
for k in s:
|
|
210
|
+
if k == "geom":
|
|
211
|
+
tmp[-1]["geom"] = {"summary": s["geom"]["summary"].to_dict()}
|
|
212
|
+
for kk in s["geom"]["outliers"]:
|
|
213
|
+
tmp[-1]["geom"].setdefault("outliers", {})[kk] = s["geom"]["outliers"][kk].to_dict(orient="records")
|
|
214
|
+
else:
|
|
215
|
+
tmp[-1][k] = s[k]
|
|
216
|
+
return tmp
|
|
217
|
+
# convert_stats_to_dicts()
|
|
218
|
+
|
|
219
|
+
def write_stats_json_safe(stats, json_out):
|
|
220
|
+
tmp = convert_stats_to_dicts(stats)
|
|
221
|
+
out_tmp = json_out + ".part"
|
|
222
|
+
with open(out_tmp, "w") as ofs:
|
|
223
|
+
json.dump(tmp, ofs, indent=2)
|
|
224
|
+
os.replace(out_tmp, json_out)
|
|
225
|
+
logger.writeln(f"Refinement statistics saved: {json_out}")
|
|
226
|
+
# write_stats_json_safe()
|
|
227
|
+
|
|
228
|
+
def print_h_options(h_change, h_present, refine_h, hout, geom_only):
|
|
229
|
+
if not h_present:
|
|
230
|
+
h_change = gemmi.HydrogenChange.Remove
|
|
231
|
+
logger.writeln("Hydrogen related options")
|
|
232
|
+
logger.write(" use in refinement{}: hydrogen atoms ".format("" if geom_only else "/map calculation"))
|
|
233
|
+
logger.writeln({gemmi.HydrogenChange.ReAddButWater: "have been (re)generated",
|
|
234
|
+
gemmi.HydrogenChange.ReAdd: "(including water) have been (re)generated",
|
|
235
|
+
gemmi.HydrogenChange.ReAddKnown: "(except for rotatable) have been (re) generated",
|
|
236
|
+
gemmi.HydrogenChange.NoChange: "from the input model have been retained",
|
|
237
|
+
gemmi.HydrogenChange.Remove: "have either been removed or were not present"}[h_change])
|
|
238
|
+
if h_present:
|
|
239
|
+
logger.write(" target: hydrogen atoms will be ")
|
|
240
|
+
if geom_only or not refine_h:
|
|
241
|
+
logger.writeln("just optimized according to geometric restraints")
|
|
242
|
+
else:
|
|
243
|
+
logger.writeln("refined against experimental data")
|
|
244
|
+
logger.writeln(" in output model: " + ("written" if hout and h_present else "not written"))
|
|
245
|
+
logger.writeln("")
|
|
246
|
+
# print_hydrogen_options()
|
|
247
|
+
|
|
248
|
+
class GroupOccupancy:
|
|
249
|
+
# TODO max may not be one. should check multiplicity
|
|
250
|
+
def __init__(self, st, params):
|
|
251
|
+
self.groups = []
|
|
252
|
+
self.consts = []
|
|
253
|
+
self.group_idxes = [0 for _ in range(st[0].count_atom_sites())]
|
|
254
|
+
self.ncycle = 0
|
|
255
|
+
if not params or not params.get("groups"):
|
|
256
|
+
return
|
|
257
|
+
logger.writeln("Occupancy groups:")
|
|
258
|
+
self.atom_pos = [-1 for _ in range(st[0].count_atom_sites())]
|
|
259
|
+
count = 0
|
|
260
|
+
for igr in params["groups"]:
|
|
261
|
+
self.groups.append([[], []]) # list of [indexes, atoms]
|
|
262
|
+
n_curr = count
|
|
263
|
+
for sel in params["groups"][igr]:
|
|
264
|
+
sel_chains = sel.get("chains")
|
|
265
|
+
sel_from = sel.get("resi_from")
|
|
266
|
+
sel_to = sel.get("resi_to")
|
|
267
|
+
sel_seq = sel.get("resi")
|
|
268
|
+
sel_atom = sel.get("atom")
|
|
269
|
+
sel_alt = sel.get("alt")
|
|
270
|
+
for chain in st[0]:
|
|
271
|
+
if sel_chains and chain.name not in sel_chains:
|
|
272
|
+
continue
|
|
273
|
+
flag = False
|
|
274
|
+
for res in chain:
|
|
275
|
+
if sel_seq and res.seqid != sel_seq:
|
|
276
|
+
continue
|
|
277
|
+
if sel_from and res.seqid == sel_from:
|
|
278
|
+
flag = True
|
|
279
|
+
if sel_from and not flag:
|
|
280
|
+
continue
|
|
281
|
+
for atom in res:
|
|
282
|
+
if sel_atom and atom.name != sel_atom:
|
|
283
|
+
continue
|
|
284
|
+
if sel_alt and atom.altloc != sel_alt:
|
|
285
|
+
continue
|
|
286
|
+
self.atom_pos[atom.serial-1] = count
|
|
287
|
+
self.groups[-1][0].append(count)
|
|
288
|
+
self.groups[-1][1].append(atom)
|
|
289
|
+
self.group_idxes[atom.serial-1] = len(self.groups)
|
|
290
|
+
count += 1
|
|
291
|
+
if sel_to and res.seqid == sel_to:
|
|
292
|
+
flag = False
|
|
293
|
+
logger.writeln(" id= {} atoms= {}".format(igr, count - n_curr))
|
|
294
|
+
|
|
295
|
+
igr_idxes = {igr:i for i, igr in enumerate(params["groups"])}
|
|
296
|
+
self.consts = [(is_comp, [igr_idxes[g] for g in gids])
|
|
297
|
+
for is_comp, gids in params["const"]]
|
|
298
|
+
self.ncycle = params.get("ncycle", 5)
|
|
299
|
+
# __init__()
|
|
300
|
+
|
|
301
|
+
def constraint(self, x):
|
|
302
|
+
# x: occupancy parameters
|
|
303
|
+
ret = []
|
|
304
|
+
for is_comp, ids in self.consts:
|
|
305
|
+
x_sum = numpy.sum(x[ids])
|
|
306
|
+
if is_comp or x_sum > 1:
|
|
307
|
+
ret.append(x_sum - 1)
|
|
308
|
+
else:
|
|
309
|
+
ret.append(0.)
|
|
310
|
+
return numpy.array(ret)
|
|
311
|
+
|
|
312
|
+
def ensure_constraints(self):
|
|
313
|
+
vals = []
|
|
314
|
+
for _, atoms in self.groups:
|
|
315
|
+
occ = numpy.mean([a.occ for a in atoms])
|
|
316
|
+
occ = min(1, max(1e-3, occ))
|
|
317
|
+
vals.append(occ)
|
|
318
|
+
for is_comp, idxes in self.consts:
|
|
319
|
+
sum_occ = sum(vals[i] for i in idxes)
|
|
320
|
+
if not is_comp and sum_occ < 1:
|
|
321
|
+
sum_occ = 1. # do nothing
|
|
322
|
+
for i in idxes:
|
|
323
|
+
#logger.writeln("Imposing constraints: {} {}".format(vals[i], vals[i]/sum_occ))
|
|
324
|
+
vals[i] /= sum_occ
|
|
325
|
+
for occ, (_, atoms) in zip(vals, self.groups):
|
|
326
|
+
for a in atoms: a.occ = occ
|
|
327
|
+
|
|
328
|
+
def get_x(self):
|
|
329
|
+
return numpy.array([atoms[0].occ for _, atoms in self.groups])
|
|
330
|
+
|
|
331
|
+
def set_x(self, x):
|
|
332
|
+
for p, (_, atoms) in zip(x, self.groups):
|
|
333
|
+
for a in atoms:
|
|
334
|
+
a.occ = p
|
|
335
|
+
#a.occ = max(1, min(1e-3, p))
|
|
336
|
+
|
|
337
|
+
def target(self, x, ll, ls, u):
|
|
338
|
+
self.set_x(x)
|
|
339
|
+
ll.update_fc()
|
|
340
|
+
c = self.constraint(x)
|
|
341
|
+
f = ll.calc_target() - numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
|
|
342
|
+
return f
|
|
343
|
+
|
|
344
|
+
def grad(self, x, ll, ls, u, refine_h):
|
|
345
|
+
c = self.constraint(x)
|
|
346
|
+
ll.calc_grad(self.atom_pos, refine_xyz=False, adp_mode=0, refine_occ=True, refine_h=refine_h, specs=None)
|
|
347
|
+
#print("grad=", ll.ll.vn)
|
|
348
|
+
#print("diag=", ll.ll.am)
|
|
349
|
+
assert len(ll.ll.vn) == len(ll.ll.am)
|
|
350
|
+
vn = []
|
|
351
|
+
diag = []
|
|
352
|
+
for idxes, atoms in self.groups:
|
|
353
|
+
if not refine_h:
|
|
354
|
+
idxes = [i for i, a in zip(idxes, atoms) if not a.is_hydrogen()]
|
|
355
|
+
vn.append(numpy.sum(numpy.array(ll.ll.vn)[idxes]))
|
|
356
|
+
diag.append(numpy.sum(numpy.array(ll.ll.am)[idxes]))
|
|
357
|
+
vn, diag = numpy.array(vn), numpy.array(diag)
|
|
358
|
+
for i, (is_comp, idxes) in enumerate(self.consts):
|
|
359
|
+
dcdx = numpy.zeros(len(self.groups))
|
|
360
|
+
dcdx[idxes] = 1.
|
|
361
|
+
if is_comp or c[i] != 0:
|
|
362
|
+
vn -= (ls[i] - u * c[i]) * dcdx
|
|
363
|
+
diag += u * dcdx**2
|
|
364
|
+
|
|
365
|
+
return vn, diag
|
|
366
|
+
|
|
367
|
+
def refine(self, ll, refine_h, alpha=1.1):
|
|
368
|
+
# Refinement of grouped occupancies using augmented Lagrangian
|
|
369
|
+
# f(x) = LL(x) - sum_j (lambda_j c_j(x)) + u/2 sum_j (c_j(x))^2
|
|
370
|
+
# with c_j(x) = 0 constraints
|
|
371
|
+
if not self.groups:
|
|
372
|
+
return
|
|
373
|
+
logger.writeln("\n== Group occupancy refinement ==")
|
|
374
|
+
self.ensure_constraints() # make sure constrained groups have the same occupancies.
|
|
375
|
+
ls = 0 * numpy.ones(len(self.consts)) # Lagrange multiplier
|
|
376
|
+
u = 10000. # penalty parameter. in Refmac 1/0.01**2
|
|
377
|
+
x0 = self.get_x()
|
|
378
|
+
#logger.writeln(" parameters: {}".format(len(x0)))
|
|
379
|
+
f0 = self.target(x0, ll, ls, u)
|
|
380
|
+
ret = []
|
|
381
|
+
for cyc in range(self.ncycle):
|
|
382
|
+
ret.append({"Ncyc": cyc+1, "f0": f0})
|
|
383
|
+
logger.writeln("occ_{}_f0= {:.4e}".format(cyc, f0))
|
|
384
|
+
vn, diag = self.grad(x0, ll, ls, u, refine_h)
|
|
385
|
+
diag[diag < 1e-6] = 1.
|
|
386
|
+
dx = -vn / diag
|
|
387
|
+
if 0:
|
|
388
|
+
ofs = open("debug.dat", "w")
|
|
389
|
+
for scale in (-1, -0.5, 0, 0.1, 0.2, 0.3, 0.4, 0.5, 1, 2):
|
|
390
|
+
self.set_x(x0 + scale * dx)
|
|
391
|
+
ll.update_fc()
|
|
392
|
+
c = self.constraint(x0 + dx)
|
|
393
|
+
f = ll.calc_target() + numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
|
|
394
|
+
ofs.write("{} {}\n".format(scale, f))
|
|
395
|
+
ofs.close()
|
|
396
|
+
import scipy.optimize
|
|
397
|
+
print(scipy.optimize.line_search(f=lambda x: self.target(x, ll, ls, u),
|
|
398
|
+
myfprime= lambda x: self.grad(ll, ls, u, refine_h)[0],
|
|
399
|
+
xk= x0,
|
|
400
|
+
pk= dx))
|
|
401
|
+
quit()
|
|
402
|
+
|
|
403
|
+
scale = 1
|
|
404
|
+
for i in range(3):
|
|
405
|
+
scale = 1/2**i
|
|
406
|
+
f1 = self.target(x0 + dx * scale, ll, ls, u)
|
|
407
|
+
logger.writeln("occ_{}_f1, {}= {:.4e}".format(cyc, i, f1))
|
|
408
|
+
if f1 < f0: break
|
|
409
|
+
else:
|
|
410
|
+
logger.writeln("WARNING: function not minimised")
|
|
411
|
+
#self.set_x(x0) # Refmac accepts it even when function increases
|
|
412
|
+
c = self.constraint(x0 + dx * scale)
|
|
413
|
+
ret[-1]["f1"] = f1
|
|
414
|
+
ret[-1]["shift_scale"] = scale
|
|
415
|
+
f0 = f1
|
|
416
|
+
x0 = x0 + dx * scale
|
|
417
|
+
ls -= u * c
|
|
418
|
+
u = alpha * u
|
|
419
|
+
ret[-1]["const_viol"] = list(c)
|
|
420
|
+
ret[-1]["lambda_new"] = list(ls)
|
|
421
|
+
self.ensure_constraints()
|
|
422
|
+
ll.update_fc()
|
|
423
|
+
f = ll.calc_target()
|
|
424
|
+
logger.writeln("final -LL= {}".format(f))
|
|
425
|
+
return ret
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
class Refine:
|
|
429
|
+
def __init__(self, st, geom, ll=None, refine_xyz=True, adp_mode=1, refine_h=False, refine_occ=False,
|
|
430
|
+
unrestrained=False, params=None):
|
|
431
|
+
assert adp_mode in (0, 1, 2) # 0=fix, 1=iso, 2=aniso
|
|
432
|
+
assert geom is not None
|
|
433
|
+
self.st = st # clone()?
|
|
434
|
+
self.st_traj = None
|
|
435
|
+
self.atoms = geom.atoms # not a copy
|
|
436
|
+
self.geom = geom
|
|
437
|
+
self.ll = ll
|
|
438
|
+
self.gamma = 0
|
|
439
|
+
self.adp_mode = 0 if self.ll is None else adp_mode
|
|
440
|
+
self.refine_xyz = refine_xyz
|
|
441
|
+
self.refine_occ = refine_occ
|
|
442
|
+
self.use_occr = self.refine_occ # for now?
|
|
443
|
+
self.unrestrained = unrestrained
|
|
444
|
+
self.refine_h = refine_h
|
|
445
|
+
self.h_inherit_parent_adp = self.adp_mode > 0 and not self.refine_h and self.st[0].has_hydrogen()
|
|
446
|
+
if self.h_inherit_parent_adp:
|
|
447
|
+
self.geom.set_h_parents()
|
|
448
|
+
if params and params.get("write_trajectory"):
|
|
449
|
+
self.st_traj = self.st.clone()
|
|
450
|
+
self.st_traj[-1].num = 0
|
|
451
|
+
assert self.geom.group_occ.groups or self.n_params() > 0
|
|
452
|
+
# __init__()
|
|
453
|
+
|
|
454
|
+
def print_weights(self): # TODO unfinished
|
|
455
|
+
logger.writeln("Geometry weights")
|
|
456
|
+
g = self.geom.geom
|
|
457
|
+
if self.adp_mode > 0:
|
|
458
|
+
logger.writeln(" ADP restraints")
|
|
459
|
+
logger.writeln(" weight: {}".format(self.geom.adpr_w))
|
|
460
|
+
logger.writeln(" mode: {}".format(g.adpr_mode))
|
|
461
|
+
if g.adpr_mode == "diff":
|
|
462
|
+
logger.writeln(" sigmas: {}".format(" ".join("{:.2f}".format(x) for x in g.adpr_diff_sigs)))
|
|
463
|
+
elif g.adpr_mode == "kldiv":
|
|
464
|
+
logger.writeln(" sigmas: {}".format(" ".join("{:.2f}".format(x) for x in g.adpr_kl_sigs)))
|
|
465
|
+
else:
|
|
466
|
+
raise LookupError("unknown adpr_mode")
|
|
467
|
+
if self.refine_occ:
|
|
468
|
+
logger.writeln(" Occupancy restraints")
|
|
469
|
+
logger.writeln(" weight: {}".format(self.geom.occr_w))
|
|
470
|
+
|
|
471
|
+
def scale_shifts(self, dx, scale):
|
|
472
|
+
n_atoms = self.geom.n_refine_atoms
|
|
473
|
+
#ave_shift = numpy.mean(dx)
|
|
474
|
+
#max_shift = numpy.maximum(dx)
|
|
475
|
+
#rms_shift = numpy.std(dx)
|
|
476
|
+
shift_allow_high = 1.0
|
|
477
|
+
shift_allow_low = -1.0
|
|
478
|
+
shift_max_allow_B = 30.0
|
|
479
|
+
shift_min_allow_B = -30.0
|
|
480
|
+
shift_max_allow_q = 0.5
|
|
481
|
+
shift_min_allow_q = -0.5
|
|
482
|
+
dx = scale * dx
|
|
483
|
+
offset_b = n_atoms * 3 if self.refine_xyz else 0
|
|
484
|
+
offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
|
|
485
|
+
if self.refine_xyz:
|
|
486
|
+
dxx = dx[:offset_b]
|
|
487
|
+
logger.writeln("min(dx) = {}".format(numpy.min(dxx)))
|
|
488
|
+
logger.writeln("max(dx) = {}".format(numpy.max(dxx)))
|
|
489
|
+
logger.writeln("mean(dx)= {}".format(numpy.mean(dxx)))
|
|
490
|
+
dxx[dxx > shift_allow_high] = shift_allow_high
|
|
491
|
+
dxx[dxx < shift_allow_low] = shift_allow_low
|
|
492
|
+
if self.adp_mode == 1:
|
|
493
|
+
dxb = dx[offset_b:offset_q]
|
|
494
|
+
logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
|
|
495
|
+
logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
|
|
496
|
+
logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
|
|
497
|
+
dxb[dxb > shift_max_allow_B] = shift_max_allow_B
|
|
498
|
+
dxb[dxb < shift_min_allow_B] = shift_min_allow_B
|
|
499
|
+
elif self.adp_mode == 2:
|
|
500
|
+
dxb = dx[offset_b:offset_q]
|
|
501
|
+
# TODO this is misleading
|
|
502
|
+
logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
|
|
503
|
+
logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
|
|
504
|
+
logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
|
|
505
|
+
for i in range(len(dxb)//6):
|
|
506
|
+
j = i * 6
|
|
507
|
+
a = numpy.array([[dxb[j], dxb[j+3], dxb[j+4]],
|
|
508
|
+
[dxb[j+3], dxb[j+1], dxb[j+5]],
|
|
509
|
+
[dxb[j+4], dxb[j+5], dxb[j+2]]])
|
|
510
|
+
v, Q = numpy.linalg.eigh(a)
|
|
511
|
+
v[v > shift_max_allow_B] = shift_max_allow_B
|
|
512
|
+
v[v < shift_min_allow_B] = shift_min_allow_B
|
|
513
|
+
a = Q.dot(numpy.diag(v)).dot(Q.T)
|
|
514
|
+
dxb[j:j+6] = a[0,0], a[1,1], a[2,2], a[0,1], a[0,2], a[1,2]
|
|
515
|
+
if self.refine_occ:
|
|
516
|
+
dxq = dx[offset_q:]
|
|
517
|
+
logger.writeln("min(dq) = {}".format(numpy.min(dxq)))
|
|
518
|
+
logger.writeln("max(dq) = {}".format(numpy.max(dxq)))
|
|
519
|
+
logger.writeln("mean(dq)= {}".format(numpy.mean(dxq)))
|
|
520
|
+
dxq[dxq > shift_max_allow_q] = shift_max_allow_q
|
|
521
|
+
dxq[dxq < shift_min_allow_q] = shift_min_allow_q
|
|
522
|
+
|
|
523
|
+
return dx
|
|
524
|
+
|
|
525
|
+
def n_params(self):
|
|
526
|
+
n_atoms = self.geom.n_refine_atoms
|
|
527
|
+
n_params = 0
|
|
528
|
+
if self.refine_xyz: n_params += 3 * n_atoms
|
|
529
|
+
if self.adp_mode == 1:
|
|
530
|
+
n_params += n_atoms
|
|
531
|
+
elif self.adp_mode == 2:
|
|
532
|
+
n_params += 6 * n_atoms
|
|
533
|
+
if self.refine_occ:
|
|
534
|
+
n_params += n_atoms
|
|
535
|
+
return n_params
|
|
536
|
+
|
|
537
|
+
def set_x(self, x):
|
|
538
|
+
n_atoms = self.geom.n_refine_atoms
|
|
539
|
+
offset_b = n_atoms * 3 if self.refine_xyz else 0
|
|
540
|
+
offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
|
|
541
|
+
max_occ = {}
|
|
542
|
+
if self.refine_occ and self.geom.specs:
|
|
543
|
+
max_occ = {atom: 1./(len(images)+1) for atom, images, _, _ in self.geom.specs}
|
|
544
|
+
for i, j in enumerate(self.geom.atom_pos):
|
|
545
|
+
if j < 0: continue
|
|
546
|
+
if self.refine_xyz:
|
|
547
|
+
self.atoms[i].pos.fromlist(x[3*j:3*j+3]) # faster than substituting pos.x,pos.y,pos.z
|
|
548
|
+
if self.adp_mode == 1:
|
|
549
|
+
self.atoms[i].b_iso = max(0.5, x[offset_b + j]) # minimum B = 0.5
|
|
550
|
+
elif self.adp_mode == 2:
|
|
551
|
+
a = x[offset_b + 6 * j: offset_b + 6 * (j+1)]
|
|
552
|
+
a = gemmi.SMat33d(*a)
|
|
553
|
+
M = a.as_mat33().array
|
|
554
|
+
v, Q = numpy.linalg.eigh(M) # eig() may return complex due to numerical precision?
|
|
555
|
+
v = numpy.maximum(v, 0.5) # avoid NPD with minimum B = 0.5
|
|
556
|
+
M2 = Q.dot(numpy.diag(v)).dot(Q.T)
|
|
557
|
+
self.atoms[i].b_iso = M2.trace() / 3
|
|
558
|
+
M2 *= b_to_u
|
|
559
|
+
self.atoms[i].aniso = gemmi.SMat33f(M2[0,0], M2[1,1], M2[2,2], M2[0,1], M2[0,2], M2[1,2])
|
|
560
|
+
if self.refine_occ:
|
|
561
|
+
self.atoms[i].occ = min(max_occ.get(self.atoms[i], 1), max(1e-3, x[offset_q + j]))
|
|
562
|
+
|
|
563
|
+
# Copy B of hydrogen from parent
|
|
564
|
+
if self.h_inherit_parent_adp:
|
|
565
|
+
for h in self.geom.parents:
|
|
566
|
+
p = self.geom.parents[h]
|
|
567
|
+
h.b_iso = p.b_iso
|
|
568
|
+
h.aniso = p.aniso
|
|
569
|
+
|
|
570
|
+
if self.ll is not None:
|
|
571
|
+
self.ll.update_fc()
|
|
572
|
+
|
|
573
|
+
self.geom.setup_nonbonded(self.refine_xyz) # if refine_xyz=False, no need to do it every time
|
|
574
|
+
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ, self.use_occr)
|
|
575
|
+
logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
|
|
576
|
+
logger.writeln(f"atoms = {self.geom.geom.target.n_atoms()}")
|
|
577
|
+
logger.writeln(f"pairs = {self.geom.geom.target.n_pairs()}")
|
|
578
|
+
|
|
579
|
+
def get_x(self):
|
|
580
|
+
n_atoms = self.geom.n_refine_atoms
|
|
581
|
+
offset_b = n_atoms * 3 if self.refine_xyz else 0
|
|
582
|
+
offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
|
|
583
|
+
x = numpy.zeros(self.n_params())
|
|
584
|
+
for i, j in enumerate(self.geom.atom_pos):
|
|
585
|
+
if j < 0: continue
|
|
586
|
+
a = self.atoms[i]
|
|
587
|
+
if self.refine_xyz:
|
|
588
|
+
x[3*j:3*(j+1)] = a.pos.tolist()
|
|
589
|
+
if self.adp_mode == 1:
|
|
590
|
+
x[offset_b + j] = self.atoms[i].b_iso
|
|
591
|
+
elif self.adp_mode == 2:
|
|
592
|
+
x[offset_b + 6*j : offset_b + 6*(j+1)] = self.atoms[i].aniso.elements_pdb()
|
|
593
|
+
x[offset_b + 6*j : offset_b + 6*(j+1)] *= u_to_b
|
|
594
|
+
if self.refine_occ:
|
|
595
|
+
x[offset_q + j] = a.occ
|
|
596
|
+
|
|
597
|
+
return x
|
|
598
|
+
#@profile
|
|
599
|
+
def calc_target(self, w=1, target_only=False):
|
|
600
|
+
N = self.n_params()
|
|
601
|
+
geom = self.geom.calc_target(target_only,
|
|
602
|
+
not self.unrestrained and self.refine_xyz,
|
|
603
|
+
self.adp_mode, self.use_occr)
|
|
604
|
+
if self.ll is not None:
|
|
605
|
+
ll = self.ll.calc_target()
|
|
606
|
+
logger.writeln(" ll= {}".format(ll))
|
|
607
|
+
if not target_only:
|
|
608
|
+
self.ll.calc_grad(self.geom.atom_pos, self.refine_xyz, self.adp_mode, self.refine_occ,
|
|
609
|
+
self.refine_h, self.geom.geom.specials)
|
|
610
|
+
else:
|
|
611
|
+
ll = 0
|
|
612
|
+
|
|
613
|
+
f = w * ll + geom
|
|
614
|
+
return f
|
|
615
|
+
|
|
616
|
+
#@profile
|
|
617
|
+
def run_cycle(self, weight=1):
|
|
618
|
+
if 0: # test of grad
|
|
619
|
+
self.ll.update_fc()
|
|
620
|
+
x0 = self.get_x()
|
|
621
|
+
f0,ader,_ = self.calc_target(weight)
|
|
622
|
+
i = 1
|
|
623
|
+
for e in 1e-1,1e-2,1e-3, 1e-4, 1e-5:
|
|
624
|
+
x1 = numpy.copy(x0)
|
|
625
|
+
x1[i] += e
|
|
626
|
+
self.set_x(x1)
|
|
627
|
+
self.ll.update_fc()
|
|
628
|
+
f1,_,_ = self.calc_target(weight, target_only=True)
|
|
629
|
+
nder = (f1 - f0) / e
|
|
630
|
+
print("e=", e)
|
|
631
|
+
print("NUM DER=", nder)
|
|
632
|
+
print("ANA DER=", ader[i])
|
|
633
|
+
print("ratio=", nder/ader[i])
|
|
634
|
+
quit()
|
|
635
|
+
|
|
636
|
+
f0 = self.calc_target(weight)
|
|
637
|
+
x0 = self.get_x()
|
|
638
|
+
logger.writeln("f0= {:.4e}".format(f0))
|
|
639
|
+
if 1:
|
|
640
|
+
use_ic = False # incomplete cholesky. problematic at least in geometry optimisation case
|
|
641
|
+
logger.writeln("using cgsolve in c++, ic={}".format(use_ic))
|
|
642
|
+
cgsolver = ext.CgSolve(self.geom.geom.target, None if self.ll is None else self.ll.ll)
|
|
643
|
+
if use_ic:
|
|
644
|
+
cgsolver.gamma = 0
|
|
645
|
+
cgsolver.max_gamma_cyc = 1
|
|
646
|
+
else:
|
|
647
|
+
cgsolver.gamma = self.gamma
|
|
648
|
+
dx = cgsolver.solve(weight, logger, use_ic)
|
|
649
|
+
self.gamma = cgsolver.gamma
|
|
650
|
+
else:
|
|
651
|
+
logger.writeln("using cgsolve in py")
|
|
652
|
+
am = self.geom.geom.target.am_spmat
|
|
653
|
+
vn = numpy.array(self.geom.geom.target.vn)
|
|
654
|
+
if self.ll is not None:
|
|
655
|
+
am += self.ll.ll.fisher_spmat * weight
|
|
656
|
+
vn += numpy.array(self.ll.ll.vn) * weight
|
|
657
|
+
diag = am.diagonal()
|
|
658
|
+
diag[diag<=0] = 1.
|
|
659
|
+
diag = numpy.sqrt(diag)
|
|
660
|
+
rdiag = 1./diag # sk
|
|
661
|
+
M = scipy.sparse.diags(rdiag)
|
|
662
|
+
dx, self.gamma = cgsolve.cgsolve_rm(A=am, v=vn, M=M, gamma=self.gamma)
|
|
663
|
+
|
|
664
|
+
if 0: # to check hessian scale
|
|
665
|
+
with open("minimise_line.dat", "w") as ofs:
|
|
666
|
+
ofs.write("s f\n")
|
|
667
|
+
for s in numpy.arange(-2, 2, 0.1):
|
|
668
|
+
dx2 = self.scale_shifts(dx, s)
|
|
669
|
+
self.set_x(x0 + dx2)
|
|
670
|
+
fval = self.calc_target(weight, target_only=True)[0]
|
|
671
|
+
ofs.write("{} {}\n".format(s, fval))
|
|
672
|
+
quit()
|
|
673
|
+
|
|
674
|
+
ret = True # success
|
|
675
|
+
shift_scale = 1
|
|
676
|
+
for i in range(3):
|
|
677
|
+
shift_scale = 1/2**i
|
|
678
|
+
dx2 = self.scale_shifts(dx, shift_scale)
|
|
679
|
+
self.set_x(x0 - dx2)
|
|
680
|
+
f1 = self.calc_target(weight, target_only=True)
|
|
681
|
+
logger.writeln("f1, {}= {:.4e}".format(i, f1))
|
|
682
|
+
if f1 < f0: break
|
|
683
|
+
else:
|
|
684
|
+
ret = False
|
|
685
|
+
logger.writeln("WARNING: function not minimised")
|
|
686
|
+
#self.set_x(x0) # Refmac accepts it even when function increases
|
|
687
|
+
|
|
688
|
+
return ret, shift_scale, f1
|
|
689
|
+
|
|
690
|
+
def run_cycles(self, ncycles, weight=1, weight_adjust=False, debug=False,
|
|
691
|
+
weight_adjust_bond_rmsz_range=(0.5, 1.), stats_json_out=None):
|
|
692
|
+
self.print_weights()
|
|
693
|
+
stats = [{"Ncyc": 0}]
|
|
694
|
+
self.geom.setup_nonbonded(self.refine_xyz)
|
|
695
|
+
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ, self.use_occr)
|
|
696
|
+
logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
|
|
697
|
+
logger.writeln(f"atoms = {self.geom.geom.target.n_atoms()}")
|
|
698
|
+
logger.writeln(f"pairs = {self.geom.geom.target.n_pairs()}")
|
|
699
|
+
stats[-1]["geom"] = self.geom.show_model_stats(refine_xyz=self.refine_xyz and not self.unrestrained,
|
|
700
|
+
adp_mode=self.adp_mode,
|
|
701
|
+
use_occr=self.refine_occ,
|
|
702
|
+
show_outliers=True)
|
|
703
|
+
if self.ll is not None:
|
|
704
|
+
self.ll.update_fc()
|
|
705
|
+
self.ll.overall_scale()
|
|
706
|
+
self.ll.update_ml_params()
|
|
707
|
+
self.ll.prepare_target()
|
|
708
|
+
llstats = self.ll.calc_stats(bin_stats=True)
|
|
709
|
+
stats[-1]["data"] = {"summary": llstats["summary"],
|
|
710
|
+
"binned": llstats["bin_stats"].to_dict(orient="records")}
|
|
711
|
+
if "twin_alpha" in llstats:
|
|
712
|
+
stats[-1]["twin_alpha"] = llstats["twin_alpha"]
|
|
713
|
+
show_binstats(llstats["bin_stats"], 0)
|
|
714
|
+
if self.adp_mode > 0:
|
|
715
|
+
utils.model.adp_analysis(self.st)
|
|
716
|
+
if stats_json_out:
|
|
717
|
+
write_stats_json_safe(stats, stats_json_out)
|
|
718
|
+
occ_refine_flag = self.ll is not None and self.geom.group_occ.groups and self.geom.group_occ.ncycle > 0
|
|
719
|
+
|
|
720
|
+
for i in range(ncycles):
|
|
721
|
+
logger.writeln("\n====== CYCLE {:2d} ======\n".format(i+1))
|
|
722
|
+
logger.writeln(f" weight = {weight:.4e}")
|
|
723
|
+
if self.refine_xyz or self.adp_mode > 0 or self.refine_occ:
|
|
724
|
+
is_ok, shift_scale, fval = self.run_cycle(weight=weight)
|
|
725
|
+
stats.append({"Ncyc": len(stats), "shift_scale": shift_scale, "fval": fval, "fval_decreased": is_ok,
|
|
726
|
+
"weight": weight})
|
|
727
|
+
elif occ_refine_flag:
|
|
728
|
+
stats.append({"Ncyc": len(stats)})
|
|
729
|
+
if occ_refine_flag:
|
|
730
|
+
stats[-1]["occ_refine"] = self.geom.group_occ.refine(self.ll, self.refine_h)
|
|
731
|
+
if debug: utils.fileio.write_model(self.st, "refined_{:02d}".format(i+1), pdb=True)#, cif=True)
|
|
732
|
+
stats[-1]["geom"] = self.geom.show_model_stats(refine_xyz=self.refine_xyz and not self.unrestrained,
|
|
733
|
+
adp_mode=self.adp_mode,
|
|
734
|
+
use_occr=self.refine_occ,
|
|
735
|
+
show_outliers=(i==ncycles-1))
|
|
736
|
+
if self.ll is not None:
|
|
737
|
+
self.ll.overall_scale()
|
|
738
|
+
f0 = self.ll.calc_target()
|
|
739
|
+
self.ll.update_ml_params()
|
|
740
|
+
self.ll.prepare_target()
|
|
741
|
+
llstats = self.ll.calc_stats(bin_stats=True)#(i==ncycles-1))
|
|
742
|
+
if llstats["summary"]["-LL"] > f0:
|
|
743
|
+
logger.writeln("WARNING: -LL has increased after ML parameter optimization:"
|
|
744
|
+
"{} to {}".format(f0, llstats["summary"]["-LL"]))
|
|
745
|
+
stats[-1]["data"] = {"summary": llstats["summary"],
|
|
746
|
+
"binned": llstats["bin_stats"].to_dict(orient="records")}
|
|
747
|
+
if "twin_alpha" in llstats:
|
|
748
|
+
stats[-1]["twin_alpha"] = llstats["twin_alpha"]
|
|
749
|
+
show_binstats(llstats["bin_stats"], i+1)
|
|
750
|
+
if self.adp_mode > 0:
|
|
751
|
+
utils.model.adp_analysis(self.st)
|
|
752
|
+
if (weight_adjust and self.refine_xyz and not self.unrestrained and self.ll is not None and
|
|
753
|
+
len(stats) > 2 and "Bond distances, non H" in stats[-1]["geom"]["summary"].index):
|
|
754
|
+
rmsz = stats[-1]["geom"]["summary"]["r.m.s.Z"]["Bond distances, non H"]
|
|
755
|
+
rmsz0 = stats[-2]["geom"]["summary"]["r.m.s.Z"]["Bond distances, non H"]
|
|
756
|
+
if rmsz > weight_adjust_bond_rmsz_range[1] and rmsz > rmsz0:
|
|
757
|
+
weight /= 1.1
|
|
758
|
+
elif rmsz < weight_adjust_bond_rmsz_range[0] and rmsz0 < weight_adjust_bond_rmsz_range[0] and rmsz < rmsz0:
|
|
759
|
+
weight *= 1.3
|
|
760
|
+
elif rmsz > 1.5 * rmsz0:
|
|
761
|
+
weight /= 1.1
|
|
762
|
+
if self.st_traj is not None:
|
|
763
|
+
self.st_traj.add_model(self.st[0])
|
|
764
|
+
self.st_traj[-1].num = len(self.st_traj)
|
|
765
|
+
if stats_json_out:
|
|
766
|
+
write_stats_json_safe(stats, stats_json_out)
|
|
767
|
+
|
|
768
|
+
logger.writeln("")
|
|
769
|
+
|
|
770
|
+
# Make table
|
|
771
|
+
data_keys, geom_keys = set(), set()
|
|
772
|
+
tmp = []
|
|
773
|
+
for d in stats:
|
|
774
|
+
x = {"Ncyc": d["Ncyc"]}
|
|
775
|
+
if "data" in d and "summary" in d["data"]:
|
|
776
|
+
x.update(d["data"]["summary"])
|
|
777
|
+
data_keys.update(d["data"]["summary"])
|
|
778
|
+
if "geom" in d:
|
|
779
|
+
for k, n, l in (("r.m.s.d.", "Bond distances, non H", "rmsBOND"),
|
|
780
|
+
("r.m.s.Z", "Bond distances, non H", "zBOND"),
|
|
781
|
+
("r.m.s.d.", "Bond angles, non H", "rmsANGL"),
|
|
782
|
+
("r.m.s.Z", "Bond angles, non H", "zANGL")):
|
|
783
|
+
if k in d["geom"]["summary"] and n in d["geom"]["summary"][k]:
|
|
784
|
+
x[l] = d["geom"]["summary"][k].get(n)
|
|
785
|
+
geom_keys.add(l)
|
|
786
|
+
tmp.append(x)
|
|
787
|
+
df = pandas.DataFrame(tmp)
|
|
788
|
+
forplot = []
|
|
789
|
+
if "FSCaverage" in data_keys:
|
|
790
|
+
forplot.append(["FSC", ["Ncyc", "FSCaverage"]])
|
|
791
|
+
r_keys = [x for x in data_keys if x.startswith("R")]
|
|
792
|
+
if r_keys:
|
|
793
|
+
forplot.append(["R", ["Ncyc"] + r_keys])
|
|
794
|
+
cc_keys = [x for x in data_keys if x.startswith("CC")]
|
|
795
|
+
if cc_keys:
|
|
796
|
+
forplot.append(["CC", ["Ncyc"] + cc_keys])
|
|
797
|
+
if "-LL" in data_keys:
|
|
798
|
+
forplot.append(["-LL", ["Ncyc", "-LL"]])
|
|
799
|
+
rms_keys = [x for x in geom_keys if x.startswith("rms")]
|
|
800
|
+
if rms_keys:
|
|
801
|
+
forplot.append(["Geometry", ["Ncyc"] + rms_keys])
|
|
802
|
+
z_keys = [x for x in geom_keys if x.startswith("z")]
|
|
803
|
+
if z_keys:
|
|
804
|
+
forplot.append(["Geometry Z", ["Ncyc"] + z_keys])
|
|
805
|
+
|
|
806
|
+
lstr = utils.make_loggraph_str(df, "stats vs cycle", forplot,
|
|
807
|
+
float_format="{:.4f}".format)
|
|
808
|
+
logger.writeln(lstr)
|
|
809
|
+
return stats
|
|
810
|
+
|
|
811
|
+
# class Refine
|
|
812
|
+
|
|
813
|
+
def update_meta(st, stats, ll=None):
|
|
814
|
+
# TODO write stats. probably geom.reporting.get_summary_table should return with _refine_ls_restr.type names
|
|
815
|
+
# should remove st.mod_residues?
|
|
816
|
+
st.helices.clear()
|
|
817
|
+
st.sheets.clear()
|
|
818
|
+
raw_remarks = [f'REMARK 3',
|
|
819
|
+
f'REMARK 3 REFINEMENT.',
|
|
820
|
+
f'REMARK 3 PROGRAM : SERVALCAT {servalcat.__version__}',
|
|
821
|
+
f'REMARK 3 AUTHORS : YAMASHITA,MURSHUDOV',
|
|
822
|
+
f'REMARK 3',
|
|
823
|
+
]
|
|
824
|
+
si = gemmi.SoftwareItem()
|
|
825
|
+
si.classification = gemmi.SoftwareItem.Classification.Refinement
|
|
826
|
+
si.name = "Servalcat"
|
|
827
|
+
si.version = servalcat.__version__
|
|
828
|
+
si.date = servalcat.__date__
|
|
829
|
+
st.meta.software = [si]
|
|
830
|
+
|
|
831
|
+
ri = gemmi.RefinementInfo()
|
|
832
|
+
if "geom" in stats:
|
|
833
|
+
restr_stats = []
|
|
834
|
+
raw_remarks.append("REMARK 3 RMS DEVIATIONS FROM IDEAL VALUES COUNT RMS WEIGHT")
|
|
835
|
+
for k, n, l, pl in (("r.m.s.d.", "Bond distances, non H", "s_bond_nonh_d", "BOND LENGTHS REFINED ATOMS (A)"),
|
|
836
|
+
("r.m.s.d.", "Bond angles, non H", "s_angle_nonh_deg", "BOND ANGLES REFINED ATOMS (DEGREES)"),
|
|
837
|
+
("r.m.s.d.", "Torsion angles, period 1", "s_dihedral_angle_1_deg", "TORSION ANGLES, PERIOD 1 (DEGREES)"),
|
|
838
|
+
("r.m.s.d.", "Torsion angles, period 2", "s_dihedral_angle_2_deg", "TORSION ANGLES, PERIOD 2 (DEGREES)"),
|
|
839
|
+
("r.m.s.d.", "Torsion angles, period 3", "s_dihedral_angle_3_deg", "TORSION ANGLES, PERIOD 3 (DEGREES)"),
|
|
840
|
+
("r.m.s.d.", "Torsion angles, period 6", "s_dihedral_angle_6_deg", "TORSION ANGLES, PERIOD 6 (DEGREES)"),
|
|
841
|
+
("r.m.s.d.", "Chiral centres", "s_chiral_restr", "CHIRAL-CENTER RESTRAINTS (A**3)"),
|
|
842
|
+
("r.m.s.d.", "Planar groups", "s_planes", "GENERAL PLANES REFINED ATOMS (A)"),
|
|
843
|
+
("r.m.s.d.", "VDW nonbonded", "s_nbd", ""),
|
|
844
|
+
("r.m.s.d.", "VDW torsion", "s_nbtor", ""),
|
|
845
|
+
("r.m.s.d.", "VDW hbond", "s_hbond_nbd", ""),
|
|
846
|
+
("r.m.s.d.", "VDW metal", "s_metal_ion", ""),
|
|
847
|
+
("r.m.s.d.", "VDW dummy", "s_dummy_nbd", ""),
|
|
848
|
+
("r.m.s.d.", "VDW nonbonded, symmetry", "s_symmetry_nbd", ""),
|
|
849
|
+
("r.m.s.d.", "VDW torsion, symmetry", "s_symmetry_nbtor", ""),
|
|
850
|
+
("r.m.s.d.", "VDW hbond, symmetry", "s_symmetry_hbond_nbd", ""),
|
|
851
|
+
("r.m.s.d.", "VDW metal, symmetry", "s_symmetry_metal_ion", ""),
|
|
852
|
+
("r.m.s.d.", "VDW dummy, symmetry", "s_symmetry_dummy_nbd", "")):
|
|
853
|
+
if k in stats["geom"]["summary"] and n in stats["geom"]["summary"][k]:
|
|
854
|
+
rr = gemmi.RefinementInfo.Restr(l)
|
|
855
|
+
rr.dev_ideal = round(stats["geom"]["summary"][k].get(n), 4)
|
|
856
|
+
rr.count = stats["geom"]["summary"]["N restraints"].get(n)
|
|
857
|
+
rr.weight = round(stats["geom"]["summary"]["Mn(sigma)"].get(n), 4)
|
|
858
|
+
restr_stats.append(rr)
|
|
859
|
+
if pl:
|
|
860
|
+
raw_remarks.append(f"REMARK 3 {pl}:{rr.count:6d} ;{rr.dev_ideal:6.3f} ;{rr.weight:6.3f}")
|
|
861
|
+
ri.restr_stats = restr_stats
|
|
862
|
+
raw_remarks.append("REMARK 3")
|
|
863
|
+
if ll is not None:
|
|
864
|
+
ri.id = ll.refine_id()
|
|
865
|
+
ri.mean_b = round(numpy.mean([cra.atom.b_iso for cra in st[0].all()]), 2)
|
|
866
|
+
if ll.b_aniso is not None:
|
|
867
|
+
ri.aniso_b = ll.b_aniso
|
|
868
|
+
for k, kd, nd in (("Rwork", "r_work", 4), ("Rfree", "r_free", 4), ("R", "r_all", 4),
|
|
869
|
+
("FSCaverage", "fsc_work", 4),
|
|
870
|
+
("FSCaverage_half1", "fsc_work", 4), ("FSCaverage_half2", "fsc_free", 4)):
|
|
871
|
+
if k in stats["data"]["summary"]:
|
|
872
|
+
setattr(ri, kd, round(stats["data"]["summary"][k], nd))
|
|
873
|
+
bins = []
|
|
874
|
+
n_all = 0
|
|
875
|
+
for b in stats["data"]["binned"]:
|
|
876
|
+
bri = gemmi.BasicRefinementInfo()
|
|
877
|
+
bri.resolution_high = round(b["d_min"], 3)
|
|
878
|
+
bri.resolution_low = round(b["d_max"], 3)
|
|
879
|
+
for k, kd, nd in (("Rwork", "r_work", 4), ("Rfree", "r_free", 4),
|
|
880
|
+
("R1work", "r_work", 4), ("R1free", "r_free", 4),
|
|
881
|
+
("R", "r_all", 4), ("R1", "r_all", 4),
|
|
882
|
+
("CCI", "cc_intensity_work", 4), ("CCF", "cc_fo_fc_work", 4),
|
|
883
|
+
("CCIwork", "cc_intensity_work", 4), ("CCIfree", "cc_intensity_free", 4),
|
|
884
|
+
("CCFwork", "cc_fo_fc_work", 4), ("CCFfree", "cc_fo_fc_free", 4),
|
|
885
|
+
("fsc_FC_full", "fsc_work", 4), ("fsc_model", "fsc_work", 4),
|
|
886
|
+
("fsc_model_half1", "fsc_work", 4), ("fsc_model_half2", "fsc_free", 4),
|
|
887
|
+
("n_work", "work_set_count", 0), ("n_free", "rfree_set_count", 0),
|
|
888
|
+
("n_obs", "reflection_count", 0), ("ncoeffs", "reflection_count", 0)):
|
|
889
|
+
if k in b: setattr(bri, kd, round(b[k], nd))
|
|
890
|
+
if "n_all" in b and "n_obs" in b:
|
|
891
|
+
bri.completeness = round(b["n_obs"] / b["n_all"] * 100, 2)
|
|
892
|
+
n_all += b["n_all"]
|
|
893
|
+
bins.append(bri)
|
|
894
|
+
ri.rfree_set_count = max(-1, sum(b.rfree_set_count for b in bins))
|
|
895
|
+
ri.work_set_count = max(-1, sum(b.work_set_count for b in bins))
|
|
896
|
+
ri.reflection_count = max(-1, sum(b.reflection_count for b in bins))
|
|
897
|
+
ri.resolution_high = round(min(b.resolution_high for b in bins), 3)
|
|
898
|
+
ri.resolution_low = round(max(b.resolution_low for b in bins), 3)
|
|
899
|
+
if ri.reflection_count > 0 and n_all > 0:
|
|
900
|
+
ri.completeness = round(ri.reflection_count / n_all * 100, 2)
|
|
901
|
+
ri.bins = bins
|
|
902
|
+
if ri.rfree_set_count > 0:
|
|
903
|
+
ri.cross_validation_method = "THROUGHOUT"
|
|
904
|
+
st.meta.refinement = [ri]
|
|
905
|
+
st.raw_remarks = raw_remarks
|
|
906
|
+
# update_meta()
|