servalcat 0.4.32__cp39-cp39-win_amd64.whl → 0.4.60__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of servalcat might be problematic. Click here for more details.
- servalcat/__init__.py +2 -2
- servalcat/{command_line.py → __main__.py} +1 -3
- servalcat/ext.cp39-win_amd64.pyd +0 -0
- servalcat/refine/refine.py +283 -60
- servalcat/refine/refine_geom.py +70 -42
- servalcat/refine/refine_spa.py +17 -4
- servalcat/refine/refine_xtal.py +35 -10
- servalcat/refine/spa.py +5 -4
- servalcat/refine/xtal.py +36 -48
- servalcat/refmac/exte.py +11 -7
- servalcat/refmac/refmac_keywords.py +106 -87
- servalcat/refmac/refmac_wrapper.py +91 -17
- servalcat/spa/fofc.py +11 -4
- servalcat/spa/fsc.py +4 -0
- servalcat/spa/localcc.py +1 -0
- servalcat/spa/run_refmac.py +21 -15
- servalcat/utils/commands.py +74 -29
- servalcat/utils/fileio.py +38 -37
- servalcat/utils/generate_operators.py +4 -2
- servalcat/utils/hkl.py +24 -5
- servalcat/utils/logger.py +11 -3
- servalcat/utils/model.py +53 -44
- servalcat/utils/refmac.py +27 -9
- servalcat/utils/restraints.py +133 -18
- servalcat/utils/symmetry.py +2 -0
- servalcat/xtal/french_wilson.py +30 -26
- servalcat/xtal/run_refmac_small.py +55 -63
- servalcat/xtal/sigmaa.py +258 -250
- servalcat-0.4.60.dist-info/METADATA +56 -0
- servalcat-0.4.60.dist-info/RECORD +44 -0
- {servalcat-0.4.32.dist-info → servalcat-0.4.60.dist-info}/WHEEL +1 -1
- {servalcat-0.4.32.dist-info → servalcat-0.4.60.dist-info}/entry_points.txt +2 -1
- servalcat-0.4.32.dist-info/METADATA +0 -16
- servalcat-0.4.32.dist-info/RECORD +0 -45
- servalcat-0.4.32.dist-info/top_level.txt +0 -1
- {servalcat-0.4.32.dist-info → servalcat-0.4.60.dist-info/licenses}/LICENSE +0 -0
servalcat/__init__.py
CHANGED
|
@@ -67,9 +67,7 @@ def main():
|
|
|
67
67
|
description="A tool for model refinement and map calculation for crystallography and cryo-EM SPA.")
|
|
68
68
|
parser.add_argument("--skip_test", action="store_true", help="Skip installation test")
|
|
69
69
|
parser.add_argument("-v", "--version", action="version",
|
|
70
|
-
version=
|
|
71
|
-
python=platform.python_version(),
|
|
72
|
-
deps=", ".join([x[0]+" "+x[1] for x in logger.dependency_versions().items()])))
|
|
70
|
+
version=logger.versions_str())
|
|
73
71
|
parser.add_argument("--logfile", default="servalcat.log")
|
|
74
72
|
subparsers = parser.add_subparsers(dest="command")
|
|
75
73
|
|
servalcat/ext.cp39-win_amd64.pyd
CHANGED
|
Binary file
|
servalcat/refine/refine.py
CHANGED
|
@@ -29,12 +29,18 @@ b_to_u = utils.model.b_to_u
|
|
|
29
29
|
|
|
30
30
|
class Geom:
|
|
31
31
|
def __init__(self, st, topo, monlib, adpr_w=1, shake_rms=0,
|
|
32
|
-
refmac_keywords=None, unrestrained=False, use_nucleus=False
|
|
32
|
+
refmac_keywords=None, unrestrained=False, use_nucleus=False,
|
|
33
|
+
ncslist=None, atom_pos=None):
|
|
33
34
|
self.st = st
|
|
34
35
|
self.atoms = [None for _ in range(self.st[0].count_atom_sites())]
|
|
35
36
|
for cra in self.st[0].all(): self.atoms[cra.atom.serial-1] = cra.atom
|
|
37
|
+
if atom_pos is not None:
|
|
38
|
+
self.atom_pos = atom_pos
|
|
39
|
+
else:
|
|
40
|
+
self.atom_pos = list(range(len(self.atoms)))
|
|
41
|
+
self.n_refine_atoms = max(self.atom_pos) + 1
|
|
36
42
|
self.lookup = {x.atom: x for x in self.st[0].all()}
|
|
37
|
-
self.geom = ext.Geometry(self.st, monlib.ener_lib)
|
|
43
|
+
self.geom = ext.Geometry(self.st, self.atom_pos, monlib.ener_lib)
|
|
38
44
|
self.specs = utils.model.find_special_positions(self.st)
|
|
39
45
|
#cs_count = len(self.st.find_spacegroup().operations())
|
|
40
46
|
for atom, images, matp, mata in self.specs:
|
|
@@ -55,13 +61,17 @@ class Geom:
|
|
|
55
61
|
if refmac_keywords:
|
|
56
62
|
exte.read_external_restraints(refmac_keywords, self.st, self.geom)
|
|
57
63
|
kwds = parse_keywords(refmac_keywords)
|
|
58
|
-
for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw"):
|
|
64
|
+
for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw", "wncs"):
|
|
59
65
|
if k in kwds:
|
|
60
66
|
self.calc_kwds[k] = kwds[k]
|
|
61
67
|
logger.writeln("setting geometry weight {}= {}".format(k, kwds[k]))
|
|
68
|
+
self.group_occ = GroupOccupancy(self.st, kwds.get("occu"))
|
|
69
|
+
else:
|
|
70
|
+
self.group_occ = GroupOccupancy(self.st, None)
|
|
62
71
|
self.geom.finalize_restraints()
|
|
63
|
-
self.outlier_sigmas = dict(bond=5, angle=5, torsion=5, vdw=5, chir=5, plane=5, staca=5, stacd=5, per_atom=5)
|
|
72
|
+
self.outlier_sigmas = dict(bond=5, angle=5, torsion=5, vdw=5, ncs=5, chir=5, plane=5, staca=5, stacd=5, per_atom=5)
|
|
64
73
|
self.parents = {}
|
|
74
|
+
self.ncslist = ncslist
|
|
65
75
|
# __init__()
|
|
66
76
|
|
|
67
77
|
def check_chemtypes(self, enerlib_path, topo):
|
|
@@ -87,7 +97,9 @@ class Geom:
|
|
|
87
97
|
# set_h_parents()
|
|
88
98
|
def setup_nonbonded(self, refine_xyz):
|
|
89
99
|
skip_critical_dist = not refine_xyz or self.unrestrained
|
|
90
|
-
self.geom.setup_nonbonded(skip_critical_dist=skip_critical_dist)
|
|
100
|
+
self.geom.setup_nonbonded(skip_critical_dist=skip_critical_dist, group_idxes=self.group_occ.group_idxes)
|
|
101
|
+
if self.ncslist:
|
|
102
|
+
self.geom.setup_ncsr(self.ncslist)
|
|
91
103
|
def calc(self, target_only):
|
|
92
104
|
return self.geom.calc(check_only=target_only, **self.calc_kwds)
|
|
93
105
|
def calc_adp_restraint(self, target_only):
|
|
@@ -116,6 +128,7 @@ class Geom:
|
|
|
116
128
|
staca=self.geom.reporting.get_stacking_angle_outliers,
|
|
117
129
|
stacd=self.geom.reporting.get_stacking_dist_outliers,
|
|
118
130
|
vdw=self.geom.reporting.get_vdw_outliers,
|
|
131
|
+
#ncs=self.geom.reporting.get_ncsr_outliers, # not useful?
|
|
119
132
|
)
|
|
120
133
|
labs = dict(bond="Bond distances",
|
|
121
134
|
angle="Bond angles",
|
|
@@ -124,7 +137,8 @@ class Geom:
|
|
|
124
137
|
plane="Planar groups",
|
|
125
138
|
staca="Stacking plane angles",
|
|
126
139
|
stacd="Stacking plane distances",
|
|
127
|
-
vdw="VDW repulsions"
|
|
140
|
+
vdw="VDW repulsions",
|
|
141
|
+
ncs="Local NCS restraints")
|
|
128
142
|
|
|
129
143
|
for k in get_table:
|
|
130
144
|
kwgs = {"min_z": self.outlier_sigmas[k]}
|
|
@@ -132,7 +146,7 @@ class Geom:
|
|
|
132
146
|
table = get_table[k](**kwgs)
|
|
133
147
|
if table["z"]:
|
|
134
148
|
for kk in table:
|
|
135
|
-
if kk.startswith(("atom", "plane")):
|
|
149
|
+
if kk.startswith(("atom", "plane", "1_atom", "2_atom")):
|
|
136
150
|
table[kk] = [str(self.lookup[x]) for x in table[kk]]
|
|
137
151
|
df = pandas.DataFrame(table)
|
|
138
152
|
df = df.reindex(df.z.abs().sort_values(ascending=False).index)
|
|
@@ -166,9 +180,204 @@ class Geom:
|
|
|
166
180
|
ret["summary"] = df
|
|
167
181
|
logger.writeln(df.to_string(float_format="{:.3f}".format) + "\n")
|
|
168
182
|
return ret
|
|
183
|
+
|
|
184
|
+
def show_binstats(df, cycle_number):
|
|
185
|
+
forplot = []
|
|
186
|
+
rlabs = [x for x in df if x.startswith("R")]
|
|
187
|
+
cclabs = [x for x in df if x.startswith("CC")]
|
|
188
|
+
dlabs = [x for x in df if re.search("^D[0-9]*", x)]
|
|
189
|
+
if "fsc_model" in df: forplot.append(["FSC", ["fsc_model"]])
|
|
190
|
+
if rlabs: forplot.append(["R", rlabs])
|
|
191
|
+
if cclabs: forplot.append(["CC", cclabs])
|
|
192
|
+
if dlabs: forplot.append(["ML parameters - D", dlabs])
|
|
193
|
+
if "S" in df: forplot.append(["ML parameters - Sigma", ["S"]])
|
|
194
|
+
lstr = utils.make_loggraph_str(df, "Data stats in cycle {}".format(cycle_number), forplot,
|
|
195
|
+
s2=1/df["d_min"]**2,
|
|
196
|
+
float_format="{:.4f}".format)
|
|
197
|
+
logger.writeln(lstr)
|
|
198
|
+
# show_binstats()
|
|
199
|
+
|
|
200
|
+
class GroupOccupancy:
|
|
201
|
+
# TODO max may not be one. should check multiplicity
|
|
202
|
+
def __init__(self, st, params):
|
|
203
|
+
self.groups = []
|
|
204
|
+
self.consts = []
|
|
205
|
+
self.group_idxes = [0 for _ in range(st[0].count_atom_sites())]
|
|
206
|
+
self.ncycle = 0
|
|
207
|
+
if not params or not params.get("groups"):
|
|
208
|
+
return
|
|
209
|
+
logger.writeln("Occupancy groups:")
|
|
210
|
+
self.atom_pos = [-1 for _ in range(st[0].count_atom_sites())]
|
|
211
|
+
count = 0
|
|
212
|
+
for igr in params["groups"]:
|
|
213
|
+
self.groups.append([[], []]) # list of [indexes, atoms]
|
|
214
|
+
n_curr = count
|
|
215
|
+
for sel in params["groups"][igr]:
|
|
216
|
+
sel_chains = sel.get("chains")
|
|
217
|
+
sel_from = sel.get("resi_from")
|
|
218
|
+
sel_to = sel.get("resi_to")
|
|
219
|
+
sel_seq = sel.get("resi")
|
|
220
|
+
sel_atom = sel.get("atom")
|
|
221
|
+
sel_alt = sel.get("alt")
|
|
222
|
+
for chain in st[0]:
|
|
223
|
+
if sel_chains and chain.name not in sel_chains:
|
|
224
|
+
continue
|
|
225
|
+
flag = False
|
|
226
|
+
for res in chain:
|
|
227
|
+
if sel_seq and res.seqid != sel_seq:
|
|
228
|
+
continue
|
|
229
|
+
if sel_from and res.seqid == sel_from:
|
|
230
|
+
flag = True
|
|
231
|
+
if sel_from and not flag:
|
|
232
|
+
continue
|
|
233
|
+
for atom in res:
|
|
234
|
+
if sel_atom and atom.name != sel_atom:
|
|
235
|
+
continue
|
|
236
|
+
if sel_alt and atom.altloc != sel_alt:
|
|
237
|
+
continue
|
|
238
|
+
self.atom_pos[atom.serial-1] = count
|
|
239
|
+
self.groups[-1][0].append(count)
|
|
240
|
+
self.groups[-1][1].append(atom)
|
|
241
|
+
self.group_idxes[atom.serial-1] = len(self.groups)
|
|
242
|
+
count += 1
|
|
243
|
+
if sel_to and res.seqid == sel_to:
|
|
244
|
+
flag = False
|
|
245
|
+
logger.writeln(" id= {} atoms= {}".format(igr, count - n_curr))
|
|
246
|
+
|
|
247
|
+
igr_idxes = {igr:i for i, igr in enumerate(params["groups"])}
|
|
248
|
+
self.consts = [(is_comp, [igr_idxes[g] for g in gids])
|
|
249
|
+
for is_comp, gids in params["const"]]
|
|
250
|
+
self.ncycle = params.get("ncycle", 5)
|
|
251
|
+
# __init__()
|
|
252
|
+
|
|
253
|
+
def constraint(self, x):
|
|
254
|
+
# x: occupancy parameters
|
|
255
|
+
ret = []
|
|
256
|
+
for is_comp, ids in self.consts:
|
|
257
|
+
x_sum = numpy.sum(x[ids])
|
|
258
|
+
if is_comp or x_sum > 1:
|
|
259
|
+
ret.append(x_sum - 1)
|
|
260
|
+
else:
|
|
261
|
+
ret.append(0.)
|
|
262
|
+
return numpy.array(ret)
|
|
263
|
+
|
|
264
|
+
def ensure_constraints(self):
|
|
265
|
+
vals = []
|
|
266
|
+
for _, atoms in self.groups:
|
|
267
|
+
occ = numpy.mean([a.occ for a in atoms])
|
|
268
|
+
vals.append(occ)
|
|
269
|
+
for is_comp, idxes in self.consts:
|
|
270
|
+
sum_occ = sum(vals[i] for i in idxes)
|
|
271
|
+
if not is_comp and sum_occ < 1:
|
|
272
|
+
sum_occ = 1. # do nothing
|
|
273
|
+
for i in idxes:
|
|
274
|
+
#logger.writeln("Imposing constraints: {} {}".format(vals[i], vals[i]/sum_occ))
|
|
275
|
+
vals[i] /= sum_occ
|
|
276
|
+
for occ, (_, atoms) in zip(vals, self.groups):
|
|
277
|
+
for a in atoms: a.occ = occ
|
|
278
|
+
|
|
279
|
+
def get_x(self):
|
|
280
|
+
return numpy.array([atoms[0].occ for _, atoms in self.groups])
|
|
281
|
+
|
|
282
|
+
def set_x(self, x):
|
|
283
|
+
for p, (_, atoms) in zip(x, self.groups):
|
|
284
|
+
for a in atoms:
|
|
285
|
+
a.occ = p
|
|
286
|
+
|
|
287
|
+
def target(self, x, ll, ls, u):
|
|
288
|
+
self.set_x(x)
|
|
289
|
+
ll.update_fc()
|
|
290
|
+
c = self.constraint(x)
|
|
291
|
+
f = ll.calc_target() - numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
|
|
292
|
+
return f
|
|
293
|
+
|
|
294
|
+
def grad(self, x, ll, ls, u, refine_h):
|
|
295
|
+
c = self.constraint(x)
|
|
296
|
+
ll.calc_grad(self.atom_pos, refine_xyz=False, adp_mode=0, refine_occ=True, refine_h=refine_h, specs=None)
|
|
297
|
+
#print("grad=", ll.ll.vn)
|
|
298
|
+
#print("diag=", ll.ll.am)
|
|
299
|
+
assert len(ll.ll.vn) == len(ll.ll.am)
|
|
300
|
+
vn = []
|
|
301
|
+
diag = []
|
|
302
|
+
for idxes, atoms in self.groups:
|
|
303
|
+
if not refine_h:
|
|
304
|
+
idxes = [i for i, a in zip(idxes, atoms) if not a.is_hydrogen()]
|
|
305
|
+
vn.append(numpy.sum(numpy.array(ll.ll.vn)[idxes]))
|
|
306
|
+
diag.append(numpy.sum(numpy.array(ll.ll.am)[idxes]))
|
|
307
|
+
vn, diag = numpy.array(vn), numpy.array(diag)
|
|
308
|
+
for i, (is_comp, idxes) in enumerate(self.consts):
|
|
309
|
+
dcdx = numpy.zeros(len(self.groups))
|
|
310
|
+
dcdx[idxes] = 1.
|
|
311
|
+
if is_comp or c[i] != 0:
|
|
312
|
+
vn -= (ls[i] - u * c[i]) * dcdx
|
|
313
|
+
diag += u * dcdx**2
|
|
314
|
+
|
|
315
|
+
return vn, diag
|
|
169
316
|
|
|
317
|
+
def refine(self, ll, refine_h, alpha=1.1):
|
|
318
|
+
# Refinement of grouped occupancies using augmented Lagrangian
|
|
319
|
+
# f(x) = LL(x) - sum_j (lambda_j c_j(x)) + u/2 sum_j (c_j(x))^2
|
|
320
|
+
# with c_j(x) = 0 constraints
|
|
321
|
+
if not self.groups:
|
|
322
|
+
return
|
|
323
|
+
logger.writeln("\n== Group occupancy refinement ==")
|
|
324
|
+
self.ensure_constraints() # make sure constrained groups have the same occupancies.
|
|
325
|
+
ls = 0 * numpy.ones(len(self.consts)) # Lagrange multiplier
|
|
326
|
+
u = 10000. # penalty parameter. in Refmac 1/0.01**2
|
|
327
|
+
x0 = self.get_x()
|
|
328
|
+
#logger.writeln(" parameters: {}".format(len(x0)))
|
|
329
|
+
f0 = self.target(x0, ll, ls, u)
|
|
330
|
+
ret = []
|
|
331
|
+
for cyc in range(self.ncycle):
|
|
332
|
+
ret.append({"Ncyc": cyc+1, "f0": f0})
|
|
333
|
+
logger.writeln("occ_{}_f0= {:.4e}".format(cyc, f0))
|
|
334
|
+
vn, diag = self.grad(x0, ll, ls, u, refine_h)
|
|
335
|
+
diag[diag < 1e-6] = 1.
|
|
336
|
+
dx = -vn / diag
|
|
337
|
+
if 0:
|
|
338
|
+
ofs = open("debug.dat", "w")
|
|
339
|
+
for scale in (-1, -0.5, 0, 0.1, 0.2, 0.3, 0.4, 0.5, 1, 2):
|
|
340
|
+
self.set_x(x0 + scale * dx)
|
|
341
|
+
ll.update_fc()
|
|
342
|
+
c = self.constraint(x0 + dx)
|
|
343
|
+
f = ll.calc_target() + numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
|
|
344
|
+
ofs.write("{} {}\n".format(scale, f))
|
|
345
|
+
ofs.close()
|
|
346
|
+
import scipy.optimize
|
|
347
|
+
print(scipy.optimize.line_search(f=lambda x: self.target(x, ll, ls, u),
|
|
348
|
+
myfprime= lambda x: self.grad(ll, ls, u, refine_h)[0],
|
|
349
|
+
xk= x0,
|
|
350
|
+
pk= dx))
|
|
351
|
+
quit()
|
|
352
|
+
|
|
353
|
+
scale = 1
|
|
354
|
+
for i in range(3):
|
|
355
|
+
scale = 1/2**i
|
|
356
|
+
f1 = self.target(x0 + dx * scale, ll, ls, u)
|
|
357
|
+
logger.writeln("occ_{}_f1, {}= {:.4e}".format(cyc, i, f1))
|
|
358
|
+
if f1 < f0: break
|
|
359
|
+
else:
|
|
360
|
+
logger.writeln("WARNING: function not minimised")
|
|
361
|
+
#self.set_x(x0) # Refmac accepts it even when function increases
|
|
362
|
+
c = self.constraint(x0 + dx * scale)
|
|
363
|
+
ret[-1]["f1"] = f1
|
|
364
|
+
ret[-1]["shift_scale"] = scale
|
|
365
|
+
f0 = f1
|
|
366
|
+
x0 = x0 + dx * scale
|
|
367
|
+
ls -= u * c
|
|
368
|
+
u = alpha * u
|
|
369
|
+
ret[-1]["const_viol"] = list(c)
|
|
370
|
+
ret[-1]["lambda_new"] = list(ls)
|
|
371
|
+
self.ensure_constraints()
|
|
372
|
+
ll.update_fc()
|
|
373
|
+
f = ll.calc_target()
|
|
374
|
+
logger.writeln("final -LL= {}".format(f))
|
|
375
|
+
return ret
|
|
376
|
+
|
|
377
|
+
|
|
170
378
|
class Refine:
|
|
171
|
-
def __init__(self, st, geom, ll=None, refine_xyz=True, adp_mode=1, refine_h=False,
|
|
379
|
+
def __init__(self, st, geom, ll=None, refine_xyz=True, adp_mode=1, refine_h=False, refine_occ=False,
|
|
380
|
+
unrestrained=False, refmac_keywords=None):
|
|
172
381
|
assert adp_mode in (0, 1, 2) # 0=fix, 1=iso, 2=aniso
|
|
173
382
|
assert geom is not None
|
|
174
383
|
self.st = st # clone()?
|
|
@@ -178,11 +387,13 @@ class Refine:
|
|
|
178
387
|
self.gamma = 0
|
|
179
388
|
self.adp_mode = 0 if self.ll is None else adp_mode
|
|
180
389
|
self.refine_xyz = refine_xyz
|
|
390
|
+
self.refine_occ = refine_occ
|
|
181
391
|
self.unrestrained = unrestrained
|
|
182
392
|
self.refine_h = refine_h
|
|
183
393
|
self.h_inherit_parent_adp = self.adp_mode > 0 and not self.refine_h and self.st[0].has_hydrogen()
|
|
184
394
|
if self.h_inherit_parent_adp:
|
|
185
395
|
self.geom.set_h_parents()
|
|
396
|
+
assert self.geom.group_occ.groups or self.n_params() > 0
|
|
186
397
|
# __init__()
|
|
187
398
|
|
|
188
399
|
def print_weights(self): # TODO unfinished
|
|
@@ -200,7 +411,7 @@ class Refine:
|
|
|
200
411
|
raise LookupError("unknown adpr_mode")
|
|
201
412
|
|
|
202
413
|
def scale_shifts(self, dx, scale):
|
|
203
|
-
n_atoms =
|
|
414
|
+
n_atoms = self.geom.n_refine_atoms
|
|
204
415
|
#ave_shift = numpy.mean(dx)
|
|
205
416
|
#max_shift = numpy.maximum(dx)
|
|
206
417
|
#rms_shift = numpy.std(dx)
|
|
@@ -208,19 +419,31 @@ class Refine:
|
|
|
208
419
|
shift_allow_low = -1.0
|
|
209
420
|
shift_max_allow_B = 30.0
|
|
210
421
|
shift_min_allow_B = -30.0
|
|
422
|
+
shift_max_allow_q = 0.5
|
|
423
|
+
shift_min_allow_q = -0.5
|
|
211
424
|
dx = scale * dx
|
|
212
|
-
offset_b = 0
|
|
425
|
+
offset_b = n_atoms * 3 if self.refine_xyz else 0
|
|
426
|
+
offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
|
|
213
427
|
if self.refine_xyz:
|
|
214
|
-
dxx = dx[:
|
|
428
|
+
dxx = dx[:offset_b]
|
|
429
|
+
logger.writeln("min(dx) = {}".format(numpy.min(dxx)))
|
|
430
|
+
logger.writeln("max(dx) = {}".format(numpy.max(dxx)))
|
|
431
|
+
logger.writeln("mean(dx)= {}".format(numpy.mean(dxx)))
|
|
215
432
|
dxx[dxx > shift_allow_high] = shift_allow_high
|
|
216
433
|
dxx[dxx < shift_allow_low] = shift_allow_low
|
|
217
|
-
offset_b = n_atoms*3
|
|
218
434
|
if self.adp_mode == 1:
|
|
219
|
-
dxb = dx[offset_b:]
|
|
435
|
+
dxb = dx[offset_b:offset_q]
|
|
436
|
+
logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
|
|
437
|
+
logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
|
|
438
|
+
logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
|
|
220
439
|
dxb[dxb > shift_max_allow_B] = shift_max_allow_B
|
|
221
440
|
dxb[dxb < shift_min_allow_B] = shift_min_allow_B
|
|
222
441
|
elif self.adp_mode == 2:
|
|
223
|
-
dxb = dx[offset_b:]
|
|
442
|
+
dxb = dx[offset_b:offset_q]
|
|
443
|
+
# TODO this is misleading
|
|
444
|
+
logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
|
|
445
|
+
logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
|
|
446
|
+
logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
|
|
224
447
|
for i in range(len(dxb)//6):
|
|
225
448
|
j = i * 6
|
|
226
449
|
a = numpy.array([[dxb[j], dxb[j+3], dxb[j+4]],
|
|
@@ -231,29 +454,43 @@ class Refine:
|
|
|
231
454
|
v[v < shift_min_allow_B] = shift_min_allow_B
|
|
232
455
|
a = Q.dot(numpy.diag(v)).dot(Q.T)
|
|
233
456
|
dxb[j:j+6] = a[0,0], a[1,1], a[2,2], a[0,1], a[0,2], a[1,2]
|
|
234
|
-
|
|
457
|
+
if self.refine_occ:
|
|
458
|
+
dxq = dx[offset_q:]
|
|
459
|
+
logger.writeln("min(dq) = {}".format(numpy.min(dxq)))
|
|
460
|
+
logger.writeln("max(dq) = {}".format(numpy.max(dxq)))
|
|
461
|
+
logger.writeln("mean(dq)= {}".format(numpy.mean(dxq)))
|
|
462
|
+
dxq[dxq > shift_max_allow_q] = shift_max_allow_q
|
|
463
|
+
dxq[dxq < shift_min_allow_q] = shift_min_allow_q
|
|
464
|
+
|
|
235
465
|
return dx
|
|
236
466
|
|
|
237
467
|
def n_params(self):
|
|
238
|
-
n_atoms =
|
|
468
|
+
n_atoms = self.geom.n_refine_atoms
|
|
239
469
|
n_params = 0
|
|
240
470
|
if self.refine_xyz: n_params += 3 * n_atoms
|
|
241
471
|
if self.adp_mode == 1:
|
|
242
472
|
n_params += n_atoms
|
|
243
473
|
elif self.adp_mode == 2:
|
|
244
474
|
n_params += 6 * n_atoms
|
|
475
|
+
if self.refine_occ:
|
|
476
|
+
n_params += n_atoms
|
|
245
477
|
return n_params
|
|
246
478
|
|
|
247
479
|
def set_x(self, x):
|
|
248
|
-
n_atoms =
|
|
480
|
+
n_atoms = self.geom.n_refine_atoms
|
|
249
481
|
offset_b = n_atoms * 3 if self.refine_xyz else 0
|
|
250
|
-
|
|
482
|
+
offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
|
|
483
|
+
max_occ = {}
|
|
484
|
+
if self.refine_occ and self.geom.specs:
|
|
485
|
+
max_occ = {atom: 1./(len(images)+1) for atom, images, _, _ in self.geom.specs}
|
|
486
|
+
for i, j in enumerate(self.geom.atom_pos):
|
|
487
|
+
if j < 0: continue
|
|
251
488
|
if self.refine_xyz:
|
|
252
|
-
self.atoms[i].pos.fromlist(x[3*
|
|
489
|
+
self.atoms[i].pos.fromlist(x[3*j:3*j+3]) # faster than substituting pos.x,pos.y,pos.z
|
|
253
490
|
if self.adp_mode == 1:
|
|
254
|
-
self.atoms[i].b_iso = max(0.5, x[offset_b +
|
|
491
|
+
self.atoms[i].b_iso = max(0.5, x[offset_b + j]) # minimum B = 0.5
|
|
255
492
|
elif self.adp_mode == 2:
|
|
256
|
-
a = x[offset_b + 6 *
|
|
493
|
+
a = x[offset_b + 6 * j: offset_b + 6 * (j+1)]
|
|
257
494
|
a = gemmi.SMat33d(*a)
|
|
258
495
|
M = numpy.array(a.as_mat33())
|
|
259
496
|
v, Q = numpy.linalg.eigh(M) # eig() may return complex due to numerical precision?
|
|
@@ -262,6 +499,8 @@ class Refine:
|
|
|
262
499
|
self.atoms[i].b_iso = M2.trace() / 3
|
|
263
500
|
M2 *= b_to_u
|
|
264
501
|
self.atoms[i].aniso = gemmi.SMat33f(M2[0,0], M2[1,1], M2[2,2], M2[0,1], M2[0,2], M2[1,2])
|
|
502
|
+
if self.refine_occ:
|
|
503
|
+
self.atoms[i].occ = min(max_occ.get(self.atoms[i], 1), max(1e-3, x[offset_q + j]))
|
|
265
504
|
|
|
266
505
|
# Copy B of hydrogen from parent
|
|
267
506
|
if self.h_inherit_parent_adp:
|
|
@@ -274,21 +513,26 @@ class Refine:
|
|
|
274
513
|
self.ll.update_fc()
|
|
275
514
|
|
|
276
515
|
self.geom.setup_nonbonded(self.refine_xyz) # if refine_xyz=False, no need to do it every time
|
|
277
|
-
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode)
|
|
516
|
+
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
|
|
278
517
|
logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
|
|
279
518
|
|
|
280
519
|
def get_x(self):
|
|
281
|
-
n_atoms =
|
|
520
|
+
n_atoms = self.geom.n_refine_atoms
|
|
282
521
|
offset_b = n_atoms * 3 if self.refine_xyz else 0
|
|
522
|
+
offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
|
|
283
523
|
x = numpy.zeros(self.n_params())
|
|
284
|
-
for i,
|
|
524
|
+
for i, j in enumerate(self.geom.atom_pos):
|
|
525
|
+
if j < 0: continue
|
|
526
|
+
a = self.atoms[i]
|
|
285
527
|
if self.refine_xyz:
|
|
286
|
-
x[3*
|
|
528
|
+
x[3*j:3*(j+1)] = a.pos.tolist()
|
|
287
529
|
if self.adp_mode == 1:
|
|
288
|
-
x[offset_b +
|
|
530
|
+
x[offset_b + j] = self.atoms[i].b_iso
|
|
289
531
|
elif self.adp_mode == 2:
|
|
290
|
-
x[offset_b + 6*
|
|
291
|
-
x[offset_b + 6*
|
|
532
|
+
x[offset_b + 6*j : offset_b + 6*(j+1)] = self.atoms[i].aniso.elements_pdb()
|
|
533
|
+
x[offset_b + 6*j : offset_b + 6*(j+1)] *= u_to_b
|
|
534
|
+
if self.refine_occ:
|
|
535
|
+
x[offset_q + j] = a.occ
|
|
292
536
|
|
|
293
537
|
return x
|
|
294
538
|
#@profile
|
|
@@ -301,7 +545,8 @@ class Refine:
|
|
|
301
545
|
ll = self.ll.calc_target()
|
|
302
546
|
logger.writeln(" ll= {}".format(ll))
|
|
303
547
|
if not target_only:
|
|
304
|
-
self.ll.calc_grad(self.
|
|
548
|
+
self.ll.calc_grad(self.geom.atom_pos, self.refine_xyz, self.adp_mode, self.refine_occ,
|
|
549
|
+
self.refine_h, self.geom.geom.specials)
|
|
305
550
|
else:
|
|
306
551
|
ll = 0
|
|
307
552
|
|
|
@@ -356,19 +601,6 @@ class Refine:
|
|
|
356
601
|
M = scipy.sparse.diags(rdiag)
|
|
357
602
|
dx, self.gamma = cgsolve.cgsolve_rm(A=am, v=vn, M=M, gamma=self.gamma)
|
|
358
603
|
|
|
359
|
-
if self.refine_xyz:
|
|
360
|
-
dxx = dx[:len(self.atoms)*3]
|
|
361
|
-
#logger.writeln("dx = {}".format(dxx))
|
|
362
|
-
logger.writeln("min(dx) = {}".format(numpy.min(dxx)))
|
|
363
|
-
logger.writeln("max(dx) = {}".format(numpy.max(dxx)))
|
|
364
|
-
logger.writeln("mean(dx)= {}".format(numpy.mean(dxx)))
|
|
365
|
-
if self.adp_mode > 0: # TODO for aniso
|
|
366
|
-
db = dx[len(self.atoms)*3 if self.refine_xyz else 0:]
|
|
367
|
-
#logger.writeln("dB = {}".format(db))
|
|
368
|
-
logger.writeln("min(dB) = {}".format(numpy.min(db)))
|
|
369
|
-
logger.writeln("max(dB) = {}".format(numpy.max(db)))
|
|
370
|
-
logger.writeln("mean(dB)= {}".format(numpy.mean(db)))
|
|
371
|
-
|
|
372
604
|
if 0: # to check hessian scale
|
|
373
605
|
with open("minimise_line.dat", "w") as ofs:
|
|
374
606
|
ofs.write("s f\n")
|
|
@@ -399,7 +631,7 @@ class Refine:
|
|
|
399
631
|
self.print_weights()
|
|
400
632
|
stats = [{"Ncyc": 0}]
|
|
401
633
|
self.geom.setup_nonbonded(self.refine_xyz)
|
|
402
|
-
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode)
|
|
634
|
+
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
|
|
403
635
|
logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
|
|
404
636
|
if self.refine_xyz and not self.unrestrained:
|
|
405
637
|
stats[-1]["geom"] = self.geom.show_model_stats(show_outliers=True)["summary"]
|
|
@@ -410,13 +642,18 @@ class Refine:
|
|
|
410
642
|
llstats = self.ll.calc_stats(bin_stats=True)
|
|
411
643
|
stats[-1]["data"] = {"summary": llstats["summary"],
|
|
412
644
|
"binned": llstats["bin_stats"].to_dict(orient="records")}
|
|
645
|
+
show_binstats(llstats["bin_stats"], 0)
|
|
413
646
|
if self.adp_mode > 0:
|
|
414
647
|
utils.model.adp_analysis(self.st)
|
|
648
|
+
occ_refine_flag = self.ll is not None and self.geom.group_occ.groups and self.geom.group_occ.ncycle > 0
|
|
415
649
|
|
|
416
650
|
for i in range(ncycles):
|
|
417
651
|
logger.writeln("\n====== CYCLE {:2d} ======\n".format(i+1))
|
|
418
|
-
|
|
419
|
-
|
|
652
|
+
if self.refine_xyz or self.adp_mode > 0:
|
|
653
|
+
is_ok, shift_scale, fval = self.run_cycle(weight=weight)
|
|
654
|
+
stats.append({"Ncyc": len(stats), "shift_scale": shift_scale, "fval": fval, "fval_decreased": is_ok})
|
|
655
|
+
if occ_refine_flag:
|
|
656
|
+
stats[-1]["occ_refine"] = self.geom.group_occ.refine(self.ll, self.refine_h)
|
|
420
657
|
if debug: utils.fileio.write_model(self.st, "refined_{:02d}".format(i+1), pdb=True)#, cif=True)
|
|
421
658
|
if self.refine_xyz and not self.unrestrained:
|
|
422
659
|
stats[-1]["geom"] = self.geom.show_model_stats(show_outliers=(i==ncycles-1))["summary"]
|
|
@@ -430,21 +667,7 @@ class Refine:
|
|
|
430
667
|
"{} to {}".format(f0, llstats["summary"]["-LL"]))
|
|
431
668
|
stats[-1]["data"] = {"summary": llstats["summary"],
|
|
432
669
|
"binned": llstats["bin_stats"].to_dict(orient="records")}
|
|
433
|
-
|
|
434
|
-
df = llstats["bin_stats"]
|
|
435
|
-
forplot = []
|
|
436
|
-
rlabs = [x for x in df if x.startswith("R")]
|
|
437
|
-
cclabs = [x for x in df if x.startswith("CC")]
|
|
438
|
-
dlabs = [x for x in df if re.search("D[0-9]*", x)]
|
|
439
|
-
if "fsc_model" in df: forplot.append(["FSC", ["fsc_model"]])
|
|
440
|
-
if rlabs: forplot.append(["R", rlabs])
|
|
441
|
-
if cclabs: forplot.append(["CC", cclabs])
|
|
442
|
-
if dlabs: forplot.append(["ML parameters - D", dlabs])
|
|
443
|
-
if "S" in df: forplot.append(["ML parameters - Sigma", ["S"]])
|
|
444
|
-
lstr = utils.make_loggraph_str(df, "Data stats in cycle {}".format(i+1), forplot,
|
|
445
|
-
s2=1/df["d_min"]**2,
|
|
446
|
-
float_format="{:.4f}".format)
|
|
447
|
-
logger.writeln(lstr)
|
|
670
|
+
show_binstats(llstats["bin_stats"], i+1)
|
|
448
671
|
if self.adp_mode > 0:
|
|
449
672
|
utils.model.adp_analysis(self.st)
|
|
450
673
|
logger.writeln("")
|