servalcat 0.4.39__cp39-cp39-win_amd64.whl → 0.4.60__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of servalcat might be problematic. Click here for more details.
- servalcat/__init__.py +2 -2
- servalcat/ext.cp39-win_amd64.pyd +0 -0
- servalcat/refine/refine.py +265 -45
- servalcat/refine/refine_geom.py +68 -40
- servalcat/refine/refine_spa.py +10 -2
- servalcat/refine/refine_xtal.py +25 -6
- servalcat/refine/spa.py +4 -3
- servalcat/refine/xtal.py +10 -8
- servalcat/refmac/exte.py +11 -7
- servalcat/refmac/refmac_keywords.py +106 -87
- servalcat/refmac/refmac_wrapper.py +76 -15
- servalcat/spa/fofc.py +7 -4
- servalcat/spa/run_refmac.py +19 -14
- servalcat/utils/commands.py +45 -22
- servalcat/utils/fileio.py +37 -36
- servalcat/utils/generate_operators.py +2 -2
- servalcat/utils/hkl.py +20 -5
- servalcat/utils/model.py +7 -10
- servalcat/utils/refmac.py +20 -7
- servalcat/utils/restraints.py +119 -9
- servalcat/xtal/run_refmac_small.py +55 -63
- servalcat/xtal/sigmaa.py +112 -64
- servalcat-0.4.60.dist-info/METADATA +56 -0
- servalcat-0.4.60.dist-info/RECORD +44 -0
- {servalcat-0.4.39.dist-info → servalcat-0.4.60.dist-info}/WHEEL +1 -1
- {servalcat-0.4.39.dist-info → servalcat-0.4.60.dist-info}/entry_points.txt +2 -1
- servalcat-0.4.39.dist-info/METADATA +0 -16
- servalcat-0.4.39.dist-info/RECORD +0 -45
- servalcat-0.4.39.dist-info/top_level.txt +0 -1
- /servalcat/{command_line.py → __main__.py} +0 -0
- {servalcat-0.4.39.dist-info → servalcat-0.4.60.dist-info/licenses}/LICENSE +0 -0
servalcat/__init__.py
CHANGED
servalcat/ext.cp39-win_amd64.pyd
CHANGED
|
Binary file
|
servalcat/refine/refine.py
CHANGED
|
@@ -29,12 +29,18 @@ b_to_u = utils.model.b_to_u
|
|
|
29
29
|
|
|
30
30
|
class Geom:
|
|
31
31
|
def __init__(self, st, topo, monlib, adpr_w=1, shake_rms=0,
|
|
32
|
-
refmac_keywords=None, unrestrained=False, use_nucleus=False
|
|
32
|
+
refmac_keywords=None, unrestrained=False, use_nucleus=False,
|
|
33
|
+
ncslist=None, atom_pos=None):
|
|
33
34
|
self.st = st
|
|
34
35
|
self.atoms = [None for _ in range(self.st[0].count_atom_sites())]
|
|
35
36
|
for cra in self.st[0].all(): self.atoms[cra.atom.serial-1] = cra.atom
|
|
37
|
+
if atom_pos is not None:
|
|
38
|
+
self.atom_pos = atom_pos
|
|
39
|
+
else:
|
|
40
|
+
self.atom_pos = list(range(len(self.atoms)))
|
|
41
|
+
self.n_refine_atoms = max(self.atom_pos) + 1
|
|
36
42
|
self.lookup = {x.atom: x for x in self.st[0].all()}
|
|
37
|
-
self.geom = ext.Geometry(self.st, monlib.ener_lib)
|
|
43
|
+
self.geom = ext.Geometry(self.st, self.atom_pos, monlib.ener_lib)
|
|
38
44
|
self.specs = utils.model.find_special_positions(self.st)
|
|
39
45
|
#cs_count = len(self.st.find_spacegroup().operations())
|
|
40
46
|
for atom, images, matp, mata in self.specs:
|
|
@@ -55,13 +61,17 @@ class Geom:
|
|
|
55
61
|
if refmac_keywords:
|
|
56
62
|
exte.read_external_restraints(refmac_keywords, self.st, self.geom)
|
|
57
63
|
kwds = parse_keywords(refmac_keywords)
|
|
58
|
-
for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw"):
|
|
64
|
+
for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw", "wncs"):
|
|
59
65
|
if k in kwds:
|
|
60
66
|
self.calc_kwds[k] = kwds[k]
|
|
61
67
|
logger.writeln("setting geometry weight {}= {}".format(k, kwds[k]))
|
|
68
|
+
self.group_occ = GroupOccupancy(self.st, kwds.get("occu"))
|
|
69
|
+
else:
|
|
70
|
+
self.group_occ = GroupOccupancy(self.st, None)
|
|
62
71
|
self.geom.finalize_restraints()
|
|
63
|
-
self.outlier_sigmas = dict(bond=5, angle=5, torsion=5, vdw=5, chir=5, plane=5, staca=5, stacd=5, per_atom=5)
|
|
72
|
+
self.outlier_sigmas = dict(bond=5, angle=5, torsion=5, vdw=5, ncs=5, chir=5, plane=5, staca=5, stacd=5, per_atom=5)
|
|
64
73
|
self.parents = {}
|
|
74
|
+
self.ncslist = ncslist
|
|
65
75
|
# __init__()
|
|
66
76
|
|
|
67
77
|
def check_chemtypes(self, enerlib_path, topo):
|
|
@@ -87,7 +97,9 @@ class Geom:
|
|
|
87
97
|
# set_h_parents()
|
|
88
98
|
def setup_nonbonded(self, refine_xyz):
|
|
89
99
|
skip_critical_dist = not refine_xyz or self.unrestrained
|
|
90
|
-
self.geom.setup_nonbonded(skip_critical_dist=skip_critical_dist)
|
|
100
|
+
self.geom.setup_nonbonded(skip_critical_dist=skip_critical_dist, group_idxes=self.group_occ.group_idxes)
|
|
101
|
+
if self.ncslist:
|
|
102
|
+
self.geom.setup_ncsr(self.ncslist)
|
|
91
103
|
def calc(self, target_only):
|
|
92
104
|
return self.geom.calc(check_only=target_only, **self.calc_kwds)
|
|
93
105
|
def calc_adp_restraint(self, target_only):
|
|
@@ -116,6 +128,7 @@ class Geom:
|
|
|
116
128
|
staca=self.geom.reporting.get_stacking_angle_outliers,
|
|
117
129
|
stacd=self.geom.reporting.get_stacking_dist_outliers,
|
|
118
130
|
vdw=self.geom.reporting.get_vdw_outliers,
|
|
131
|
+
#ncs=self.geom.reporting.get_ncsr_outliers, # not useful?
|
|
119
132
|
)
|
|
120
133
|
labs = dict(bond="Bond distances",
|
|
121
134
|
angle="Bond angles",
|
|
@@ -124,7 +137,8 @@ class Geom:
|
|
|
124
137
|
plane="Planar groups",
|
|
125
138
|
staca="Stacking plane angles",
|
|
126
139
|
stacd="Stacking plane distances",
|
|
127
|
-
vdw="VDW repulsions"
|
|
140
|
+
vdw="VDW repulsions",
|
|
141
|
+
ncs="Local NCS restraints")
|
|
128
142
|
|
|
129
143
|
for k in get_table:
|
|
130
144
|
kwgs = {"min_z": self.outlier_sigmas[k]}
|
|
@@ -132,7 +146,7 @@ class Geom:
|
|
|
132
146
|
table = get_table[k](**kwgs)
|
|
133
147
|
if table["z"]:
|
|
134
148
|
for kk in table:
|
|
135
|
-
if kk.startswith(("atom", "plane")):
|
|
149
|
+
if kk.startswith(("atom", "plane", "1_atom", "2_atom")):
|
|
136
150
|
table[kk] = [str(self.lookup[x]) for x in table[kk]]
|
|
137
151
|
df = pandas.DataFrame(table)
|
|
138
152
|
df = df.reindex(df.z.abs().sort_values(ascending=False).index)
|
|
@@ -183,8 +197,187 @@ def show_binstats(df, cycle_number):
|
|
|
183
197
|
logger.writeln(lstr)
|
|
184
198
|
# show_binstats()
|
|
185
199
|
|
|
200
|
+
class GroupOccupancy:
|
|
201
|
+
# TODO max may not be one. should check multiplicity
|
|
202
|
+
def __init__(self, st, params):
|
|
203
|
+
self.groups = []
|
|
204
|
+
self.consts = []
|
|
205
|
+
self.group_idxes = [0 for _ in range(st[0].count_atom_sites())]
|
|
206
|
+
self.ncycle = 0
|
|
207
|
+
if not params or not params.get("groups"):
|
|
208
|
+
return
|
|
209
|
+
logger.writeln("Occupancy groups:")
|
|
210
|
+
self.atom_pos = [-1 for _ in range(st[0].count_atom_sites())]
|
|
211
|
+
count = 0
|
|
212
|
+
for igr in params["groups"]:
|
|
213
|
+
self.groups.append([[], []]) # list of [indexes, atoms]
|
|
214
|
+
n_curr = count
|
|
215
|
+
for sel in params["groups"][igr]:
|
|
216
|
+
sel_chains = sel.get("chains")
|
|
217
|
+
sel_from = sel.get("resi_from")
|
|
218
|
+
sel_to = sel.get("resi_to")
|
|
219
|
+
sel_seq = sel.get("resi")
|
|
220
|
+
sel_atom = sel.get("atom")
|
|
221
|
+
sel_alt = sel.get("alt")
|
|
222
|
+
for chain in st[0]:
|
|
223
|
+
if sel_chains and chain.name not in sel_chains:
|
|
224
|
+
continue
|
|
225
|
+
flag = False
|
|
226
|
+
for res in chain:
|
|
227
|
+
if sel_seq and res.seqid != sel_seq:
|
|
228
|
+
continue
|
|
229
|
+
if sel_from and res.seqid == sel_from:
|
|
230
|
+
flag = True
|
|
231
|
+
if sel_from and not flag:
|
|
232
|
+
continue
|
|
233
|
+
for atom in res:
|
|
234
|
+
if sel_atom and atom.name != sel_atom:
|
|
235
|
+
continue
|
|
236
|
+
if sel_alt and atom.altloc != sel_alt:
|
|
237
|
+
continue
|
|
238
|
+
self.atom_pos[atom.serial-1] = count
|
|
239
|
+
self.groups[-1][0].append(count)
|
|
240
|
+
self.groups[-1][1].append(atom)
|
|
241
|
+
self.group_idxes[atom.serial-1] = len(self.groups)
|
|
242
|
+
count += 1
|
|
243
|
+
if sel_to and res.seqid == sel_to:
|
|
244
|
+
flag = False
|
|
245
|
+
logger.writeln(" id= {} atoms= {}".format(igr, count - n_curr))
|
|
246
|
+
|
|
247
|
+
igr_idxes = {igr:i for i, igr in enumerate(params["groups"])}
|
|
248
|
+
self.consts = [(is_comp, [igr_idxes[g] for g in gids])
|
|
249
|
+
for is_comp, gids in params["const"]]
|
|
250
|
+
self.ncycle = params.get("ncycle", 5)
|
|
251
|
+
# __init__()
|
|
252
|
+
|
|
253
|
+
def constraint(self, x):
|
|
254
|
+
# x: occupancy parameters
|
|
255
|
+
ret = []
|
|
256
|
+
for is_comp, ids in self.consts:
|
|
257
|
+
x_sum = numpy.sum(x[ids])
|
|
258
|
+
if is_comp or x_sum > 1:
|
|
259
|
+
ret.append(x_sum - 1)
|
|
260
|
+
else:
|
|
261
|
+
ret.append(0.)
|
|
262
|
+
return numpy.array(ret)
|
|
263
|
+
|
|
264
|
+
def ensure_constraints(self):
|
|
265
|
+
vals = []
|
|
266
|
+
for _, atoms in self.groups:
|
|
267
|
+
occ = numpy.mean([a.occ for a in atoms])
|
|
268
|
+
vals.append(occ)
|
|
269
|
+
for is_comp, idxes in self.consts:
|
|
270
|
+
sum_occ = sum(vals[i] for i in idxes)
|
|
271
|
+
if not is_comp and sum_occ < 1:
|
|
272
|
+
sum_occ = 1. # do nothing
|
|
273
|
+
for i in idxes:
|
|
274
|
+
#logger.writeln("Imposing constraints: {} {}".format(vals[i], vals[i]/sum_occ))
|
|
275
|
+
vals[i] /= sum_occ
|
|
276
|
+
for occ, (_, atoms) in zip(vals, self.groups):
|
|
277
|
+
for a in atoms: a.occ = occ
|
|
278
|
+
|
|
279
|
+
def get_x(self):
|
|
280
|
+
return numpy.array([atoms[0].occ for _, atoms in self.groups])
|
|
281
|
+
|
|
282
|
+
def set_x(self, x):
|
|
283
|
+
for p, (_, atoms) in zip(x, self.groups):
|
|
284
|
+
for a in atoms:
|
|
285
|
+
a.occ = p
|
|
286
|
+
|
|
287
|
+
def target(self, x, ll, ls, u):
|
|
288
|
+
self.set_x(x)
|
|
289
|
+
ll.update_fc()
|
|
290
|
+
c = self.constraint(x)
|
|
291
|
+
f = ll.calc_target() - numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
|
|
292
|
+
return f
|
|
293
|
+
|
|
294
|
+
def grad(self, x, ll, ls, u, refine_h):
|
|
295
|
+
c = self.constraint(x)
|
|
296
|
+
ll.calc_grad(self.atom_pos, refine_xyz=False, adp_mode=0, refine_occ=True, refine_h=refine_h, specs=None)
|
|
297
|
+
#print("grad=", ll.ll.vn)
|
|
298
|
+
#print("diag=", ll.ll.am)
|
|
299
|
+
assert len(ll.ll.vn) == len(ll.ll.am)
|
|
300
|
+
vn = []
|
|
301
|
+
diag = []
|
|
302
|
+
for idxes, atoms in self.groups:
|
|
303
|
+
if not refine_h:
|
|
304
|
+
idxes = [i for i, a in zip(idxes, atoms) if not a.is_hydrogen()]
|
|
305
|
+
vn.append(numpy.sum(numpy.array(ll.ll.vn)[idxes]))
|
|
306
|
+
diag.append(numpy.sum(numpy.array(ll.ll.am)[idxes]))
|
|
307
|
+
vn, diag = numpy.array(vn), numpy.array(diag)
|
|
308
|
+
for i, (is_comp, idxes) in enumerate(self.consts):
|
|
309
|
+
dcdx = numpy.zeros(len(self.groups))
|
|
310
|
+
dcdx[idxes] = 1.
|
|
311
|
+
if is_comp or c[i] != 0:
|
|
312
|
+
vn -= (ls[i] - u * c[i]) * dcdx
|
|
313
|
+
diag += u * dcdx**2
|
|
314
|
+
|
|
315
|
+
return vn, diag
|
|
316
|
+
|
|
317
|
+
def refine(self, ll, refine_h, alpha=1.1):
|
|
318
|
+
# Refinement of grouped occupancies using augmented Lagrangian
|
|
319
|
+
# f(x) = LL(x) - sum_j (lambda_j c_j(x)) + u/2 sum_j (c_j(x))^2
|
|
320
|
+
# with c_j(x) = 0 constraints
|
|
321
|
+
if not self.groups:
|
|
322
|
+
return
|
|
323
|
+
logger.writeln("\n== Group occupancy refinement ==")
|
|
324
|
+
self.ensure_constraints() # make sure constrained groups have the same occupancies.
|
|
325
|
+
ls = 0 * numpy.ones(len(self.consts)) # Lagrange multiplier
|
|
326
|
+
u = 10000. # penalty parameter. in Refmac 1/0.01**2
|
|
327
|
+
x0 = self.get_x()
|
|
328
|
+
#logger.writeln(" parameters: {}".format(len(x0)))
|
|
329
|
+
f0 = self.target(x0, ll, ls, u)
|
|
330
|
+
ret = []
|
|
331
|
+
for cyc in range(self.ncycle):
|
|
332
|
+
ret.append({"Ncyc": cyc+1, "f0": f0})
|
|
333
|
+
logger.writeln("occ_{}_f0= {:.4e}".format(cyc, f0))
|
|
334
|
+
vn, diag = self.grad(x0, ll, ls, u, refine_h)
|
|
335
|
+
diag[diag < 1e-6] = 1.
|
|
336
|
+
dx = -vn / diag
|
|
337
|
+
if 0:
|
|
338
|
+
ofs = open("debug.dat", "w")
|
|
339
|
+
for scale in (-1, -0.5, 0, 0.1, 0.2, 0.3, 0.4, 0.5, 1, 2):
|
|
340
|
+
self.set_x(x0 + scale * dx)
|
|
341
|
+
ll.update_fc()
|
|
342
|
+
c = self.constraint(x0 + dx)
|
|
343
|
+
f = ll.calc_target() + numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
|
|
344
|
+
ofs.write("{} {}\n".format(scale, f))
|
|
345
|
+
ofs.close()
|
|
346
|
+
import scipy.optimize
|
|
347
|
+
print(scipy.optimize.line_search(f=lambda x: self.target(x, ll, ls, u),
|
|
348
|
+
myfprime= lambda x: self.grad(ll, ls, u, refine_h)[0],
|
|
349
|
+
xk= x0,
|
|
350
|
+
pk= dx))
|
|
351
|
+
quit()
|
|
352
|
+
|
|
353
|
+
scale = 1
|
|
354
|
+
for i in range(3):
|
|
355
|
+
scale = 1/2**i
|
|
356
|
+
f1 = self.target(x0 + dx * scale, ll, ls, u)
|
|
357
|
+
logger.writeln("occ_{}_f1, {}= {:.4e}".format(cyc, i, f1))
|
|
358
|
+
if f1 < f0: break
|
|
359
|
+
else:
|
|
360
|
+
logger.writeln("WARNING: function not minimised")
|
|
361
|
+
#self.set_x(x0) # Refmac accepts it even when function increases
|
|
362
|
+
c = self.constraint(x0 + dx * scale)
|
|
363
|
+
ret[-1]["f1"] = f1
|
|
364
|
+
ret[-1]["shift_scale"] = scale
|
|
365
|
+
f0 = f1
|
|
366
|
+
x0 = x0 + dx * scale
|
|
367
|
+
ls -= u * c
|
|
368
|
+
u = alpha * u
|
|
369
|
+
ret[-1]["const_viol"] = list(c)
|
|
370
|
+
ret[-1]["lambda_new"] = list(ls)
|
|
371
|
+
self.ensure_constraints()
|
|
372
|
+
ll.update_fc()
|
|
373
|
+
f = ll.calc_target()
|
|
374
|
+
logger.writeln("final -LL= {}".format(f))
|
|
375
|
+
return ret
|
|
376
|
+
|
|
377
|
+
|
|
186
378
|
class Refine:
|
|
187
|
-
def __init__(self, st, geom, ll=None, refine_xyz=True, adp_mode=1, refine_h=False,
|
|
379
|
+
def __init__(self, st, geom, ll=None, refine_xyz=True, adp_mode=1, refine_h=False, refine_occ=False,
|
|
380
|
+
unrestrained=False, refmac_keywords=None):
|
|
188
381
|
assert adp_mode in (0, 1, 2) # 0=fix, 1=iso, 2=aniso
|
|
189
382
|
assert geom is not None
|
|
190
383
|
self.st = st # clone()?
|
|
@@ -194,11 +387,13 @@ class Refine:
|
|
|
194
387
|
self.gamma = 0
|
|
195
388
|
self.adp_mode = 0 if self.ll is None else adp_mode
|
|
196
389
|
self.refine_xyz = refine_xyz
|
|
390
|
+
self.refine_occ = refine_occ
|
|
197
391
|
self.unrestrained = unrestrained
|
|
198
392
|
self.refine_h = refine_h
|
|
199
393
|
self.h_inherit_parent_adp = self.adp_mode > 0 and not self.refine_h and self.st[0].has_hydrogen()
|
|
200
394
|
if self.h_inherit_parent_adp:
|
|
201
395
|
self.geom.set_h_parents()
|
|
396
|
+
assert self.geom.group_occ.groups or self.n_params() > 0
|
|
202
397
|
# __init__()
|
|
203
398
|
|
|
204
399
|
def print_weights(self): # TODO unfinished
|
|
@@ -216,7 +411,7 @@ class Refine:
|
|
|
216
411
|
raise LookupError("unknown adpr_mode")
|
|
217
412
|
|
|
218
413
|
def scale_shifts(self, dx, scale):
|
|
219
|
-
n_atoms =
|
|
414
|
+
n_atoms = self.geom.n_refine_atoms
|
|
220
415
|
#ave_shift = numpy.mean(dx)
|
|
221
416
|
#max_shift = numpy.maximum(dx)
|
|
222
417
|
#rms_shift = numpy.std(dx)
|
|
@@ -224,19 +419,31 @@ class Refine:
|
|
|
224
419
|
shift_allow_low = -1.0
|
|
225
420
|
shift_max_allow_B = 30.0
|
|
226
421
|
shift_min_allow_B = -30.0
|
|
422
|
+
shift_max_allow_q = 0.5
|
|
423
|
+
shift_min_allow_q = -0.5
|
|
227
424
|
dx = scale * dx
|
|
228
|
-
offset_b = 0
|
|
425
|
+
offset_b = n_atoms * 3 if self.refine_xyz else 0
|
|
426
|
+
offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
|
|
229
427
|
if self.refine_xyz:
|
|
230
|
-
dxx = dx[:
|
|
428
|
+
dxx = dx[:offset_b]
|
|
429
|
+
logger.writeln("min(dx) = {}".format(numpy.min(dxx)))
|
|
430
|
+
logger.writeln("max(dx) = {}".format(numpy.max(dxx)))
|
|
431
|
+
logger.writeln("mean(dx)= {}".format(numpy.mean(dxx)))
|
|
231
432
|
dxx[dxx > shift_allow_high] = shift_allow_high
|
|
232
433
|
dxx[dxx < shift_allow_low] = shift_allow_low
|
|
233
|
-
offset_b = n_atoms*3
|
|
234
434
|
if self.adp_mode == 1:
|
|
235
|
-
dxb = dx[offset_b:]
|
|
435
|
+
dxb = dx[offset_b:offset_q]
|
|
436
|
+
logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
|
|
437
|
+
logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
|
|
438
|
+
logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
|
|
236
439
|
dxb[dxb > shift_max_allow_B] = shift_max_allow_B
|
|
237
440
|
dxb[dxb < shift_min_allow_B] = shift_min_allow_B
|
|
238
441
|
elif self.adp_mode == 2:
|
|
239
|
-
dxb = dx[offset_b:]
|
|
442
|
+
dxb = dx[offset_b:offset_q]
|
|
443
|
+
# TODO this is misleading
|
|
444
|
+
logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
|
|
445
|
+
logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
|
|
446
|
+
logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
|
|
240
447
|
for i in range(len(dxb)//6):
|
|
241
448
|
j = i * 6
|
|
242
449
|
a = numpy.array([[dxb[j], dxb[j+3], dxb[j+4]],
|
|
@@ -247,29 +454,43 @@ class Refine:
|
|
|
247
454
|
v[v < shift_min_allow_B] = shift_min_allow_B
|
|
248
455
|
a = Q.dot(numpy.diag(v)).dot(Q.T)
|
|
249
456
|
dxb[j:j+6] = a[0,0], a[1,1], a[2,2], a[0,1], a[0,2], a[1,2]
|
|
250
|
-
|
|
457
|
+
if self.refine_occ:
|
|
458
|
+
dxq = dx[offset_q:]
|
|
459
|
+
logger.writeln("min(dq) = {}".format(numpy.min(dxq)))
|
|
460
|
+
logger.writeln("max(dq) = {}".format(numpy.max(dxq)))
|
|
461
|
+
logger.writeln("mean(dq)= {}".format(numpy.mean(dxq)))
|
|
462
|
+
dxq[dxq > shift_max_allow_q] = shift_max_allow_q
|
|
463
|
+
dxq[dxq < shift_min_allow_q] = shift_min_allow_q
|
|
464
|
+
|
|
251
465
|
return dx
|
|
252
466
|
|
|
253
467
|
def n_params(self):
|
|
254
|
-
n_atoms =
|
|
468
|
+
n_atoms = self.geom.n_refine_atoms
|
|
255
469
|
n_params = 0
|
|
256
470
|
if self.refine_xyz: n_params += 3 * n_atoms
|
|
257
471
|
if self.adp_mode == 1:
|
|
258
472
|
n_params += n_atoms
|
|
259
473
|
elif self.adp_mode == 2:
|
|
260
474
|
n_params += 6 * n_atoms
|
|
475
|
+
if self.refine_occ:
|
|
476
|
+
n_params += n_atoms
|
|
261
477
|
return n_params
|
|
262
478
|
|
|
263
479
|
def set_x(self, x):
|
|
264
|
-
n_atoms =
|
|
480
|
+
n_atoms = self.geom.n_refine_atoms
|
|
265
481
|
offset_b = n_atoms * 3 if self.refine_xyz else 0
|
|
266
|
-
|
|
482
|
+
offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
|
|
483
|
+
max_occ = {}
|
|
484
|
+
if self.refine_occ and self.geom.specs:
|
|
485
|
+
max_occ = {atom: 1./(len(images)+1) for atom, images, _, _ in self.geom.specs}
|
|
486
|
+
for i, j in enumerate(self.geom.atom_pos):
|
|
487
|
+
if j < 0: continue
|
|
267
488
|
if self.refine_xyz:
|
|
268
|
-
self.atoms[i].pos.fromlist(x[3*
|
|
489
|
+
self.atoms[i].pos.fromlist(x[3*j:3*j+3]) # faster than substituting pos.x,pos.y,pos.z
|
|
269
490
|
if self.adp_mode == 1:
|
|
270
|
-
self.atoms[i].b_iso = max(0.5, x[offset_b +
|
|
491
|
+
self.atoms[i].b_iso = max(0.5, x[offset_b + j]) # minimum B = 0.5
|
|
271
492
|
elif self.adp_mode == 2:
|
|
272
|
-
a = x[offset_b + 6 *
|
|
493
|
+
a = x[offset_b + 6 * j: offset_b + 6 * (j+1)]
|
|
273
494
|
a = gemmi.SMat33d(*a)
|
|
274
495
|
M = numpy.array(a.as_mat33())
|
|
275
496
|
v, Q = numpy.linalg.eigh(M) # eig() may return complex due to numerical precision?
|
|
@@ -278,6 +499,8 @@ class Refine:
|
|
|
278
499
|
self.atoms[i].b_iso = M2.trace() / 3
|
|
279
500
|
M2 *= b_to_u
|
|
280
501
|
self.atoms[i].aniso = gemmi.SMat33f(M2[0,0], M2[1,1], M2[2,2], M2[0,1], M2[0,2], M2[1,2])
|
|
502
|
+
if self.refine_occ:
|
|
503
|
+
self.atoms[i].occ = min(max_occ.get(self.atoms[i], 1), max(1e-3, x[offset_q + j]))
|
|
281
504
|
|
|
282
505
|
# Copy B of hydrogen from parent
|
|
283
506
|
if self.h_inherit_parent_adp:
|
|
@@ -290,21 +513,26 @@ class Refine:
|
|
|
290
513
|
self.ll.update_fc()
|
|
291
514
|
|
|
292
515
|
self.geom.setup_nonbonded(self.refine_xyz) # if refine_xyz=False, no need to do it every time
|
|
293
|
-
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode)
|
|
516
|
+
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
|
|
294
517
|
logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
|
|
295
518
|
|
|
296
519
|
def get_x(self):
|
|
297
|
-
n_atoms =
|
|
520
|
+
n_atoms = self.geom.n_refine_atoms
|
|
298
521
|
offset_b = n_atoms * 3 if self.refine_xyz else 0
|
|
522
|
+
offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
|
|
299
523
|
x = numpy.zeros(self.n_params())
|
|
300
|
-
for i,
|
|
524
|
+
for i, j in enumerate(self.geom.atom_pos):
|
|
525
|
+
if j < 0: continue
|
|
526
|
+
a = self.atoms[i]
|
|
301
527
|
if self.refine_xyz:
|
|
302
|
-
x[3*
|
|
528
|
+
x[3*j:3*(j+1)] = a.pos.tolist()
|
|
303
529
|
if self.adp_mode == 1:
|
|
304
|
-
x[offset_b +
|
|
530
|
+
x[offset_b + j] = self.atoms[i].b_iso
|
|
305
531
|
elif self.adp_mode == 2:
|
|
306
|
-
x[offset_b + 6*
|
|
307
|
-
x[offset_b + 6*
|
|
532
|
+
x[offset_b + 6*j : offset_b + 6*(j+1)] = self.atoms[i].aniso.elements_pdb()
|
|
533
|
+
x[offset_b + 6*j : offset_b + 6*(j+1)] *= u_to_b
|
|
534
|
+
if self.refine_occ:
|
|
535
|
+
x[offset_q + j] = a.occ
|
|
308
536
|
|
|
309
537
|
return x
|
|
310
538
|
#@profile
|
|
@@ -317,7 +545,8 @@ class Refine:
|
|
|
317
545
|
ll = self.ll.calc_target()
|
|
318
546
|
logger.writeln(" ll= {}".format(ll))
|
|
319
547
|
if not target_only:
|
|
320
|
-
self.ll.calc_grad(self.
|
|
548
|
+
self.ll.calc_grad(self.geom.atom_pos, self.refine_xyz, self.adp_mode, self.refine_occ,
|
|
549
|
+
self.refine_h, self.geom.geom.specials)
|
|
321
550
|
else:
|
|
322
551
|
ll = 0
|
|
323
552
|
|
|
@@ -372,19 +601,6 @@ class Refine:
|
|
|
372
601
|
M = scipy.sparse.diags(rdiag)
|
|
373
602
|
dx, self.gamma = cgsolve.cgsolve_rm(A=am, v=vn, M=M, gamma=self.gamma)
|
|
374
603
|
|
|
375
|
-
if self.refine_xyz:
|
|
376
|
-
dxx = dx[:len(self.atoms)*3]
|
|
377
|
-
#logger.writeln("dx = {}".format(dxx))
|
|
378
|
-
logger.writeln("min(dx) = {}".format(numpy.min(dxx)))
|
|
379
|
-
logger.writeln("max(dx) = {}".format(numpy.max(dxx)))
|
|
380
|
-
logger.writeln("mean(dx)= {}".format(numpy.mean(dxx)))
|
|
381
|
-
if self.adp_mode > 0: # TODO for aniso
|
|
382
|
-
db = dx[len(self.atoms)*3 if self.refine_xyz else 0:]
|
|
383
|
-
#logger.writeln("dB = {}".format(db))
|
|
384
|
-
logger.writeln("min(dB) = {}".format(numpy.min(db)))
|
|
385
|
-
logger.writeln("max(dB) = {}".format(numpy.max(db)))
|
|
386
|
-
logger.writeln("mean(dB)= {}".format(numpy.mean(db)))
|
|
387
|
-
|
|
388
604
|
if 0: # to check hessian scale
|
|
389
605
|
with open("minimise_line.dat", "w") as ofs:
|
|
390
606
|
ofs.write("s f\n")
|
|
@@ -415,7 +631,7 @@ class Refine:
|
|
|
415
631
|
self.print_weights()
|
|
416
632
|
stats = [{"Ncyc": 0}]
|
|
417
633
|
self.geom.setup_nonbonded(self.refine_xyz)
|
|
418
|
-
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode)
|
|
634
|
+
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
|
|
419
635
|
logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
|
|
420
636
|
if self.refine_xyz and not self.unrestrained:
|
|
421
637
|
stats[-1]["geom"] = self.geom.show_model_stats(show_outliers=True)["summary"]
|
|
@@ -429,11 +645,15 @@ class Refine:
|
|
|
429
645
|
show_binstats(llstats["bin_stats"], 0)
|
|
430
646
|
if self.adp_mode > 0:
|
|
431
647
|
utils.model.adp_analysis(self.st)
|
|
648
|
+
occ_refine_flag = self.ll is not None and self.geom.group_occ.groups and self.geom.group_occ.ncycle > 0
|
|
432
649
|
|
|
433
650
|
for i in range(ncycles):
|
|
434
651
|
logger.writeln("\n====== CYCLE {:2d} ======\n".format(i+1))
|
|
435
|
-
|
|
436
|
-
|
|
652
|
+
if self.refine_xyz or self.adp_mode > 0:
|
|
653
|
+
is_ok, shift_scale, fval = self.run_cycle(weight=weight)
|
|
654
|
+
stats.append({"Ncyc": len(stats), "shift_scale": shift_scale, "fval": fval, "fval_decreased": is_ok})
|
|
655
|
+
if occ_refine_flag:
|
|
656
|
+
stats[-1]["occ_refine"] = self.geom.group_occ.refine(self.ll, self.refine_h)
|
|
437
657
|
if debug: utils.fileio.write_model(self.st, "refined_{:02d}".format(i+1), pdb=True)#, cif=True)
|
|
438
658
|
if self.refine_xyz and not self.unrestrained:
|
|
439
659
|
stats[-1]["geom"] = self.geom.show_model_stats(show_outliers=(i==ncycles-1))["summary"]
|
servalcat/refine/refine_geom.py
CHANGED
|
@@ -35,11 +35,14 @@ def add_arguments(parser):
|
|
|
35
35
|
help='Automatically add links')
|
|
36
36
|
parser.add_argument('--randomize', type=float, default=0,
|
|
37
37
|
help='Shake coordinates with specified rmsd')
|
|
38
|
+
parser.add_argument('--ncsr', action='store_true',
|
|
39
|
+
help='Use local NCS restraints')
|
|
38
40
|
parser.add_argument('--keywords', nargs='+', action="append",
|
|
39
41
|
help="refmac keyword(s)")
|
|
40
42
|
parser.add_argument('--keyword_file', nargs='+', action="append",
|
|
41
43
|
help="refmac keyword file(s)")
|
|
42
|
-
parser.add_argument('-o','--output_prefix'
|
|
44
|
+
parser.add_argument('-o','--output_prefix',
|
|
45
|
+
help="Output prefix")
|
|
43
46
|
|
|
44
47
|
# add_arguments()
|
|
45
48
|
|
|
@@ -49,6 +52,29 @@ def parse_args(arg_list):
|
|
|
49
52
|
return parser.parse_args(arg_list)
|
|
50
53
|
# parse_args()
|
|
51
54
|
|
|
55
|
+
def add_program_info_to_dictionary(block, comp_id, program_name="servalcat", descriptor="optimization tool"):
|
|
56
|
+
tab = block.find("_pdbx_chem_comp_description_generator.", ["program_name", "program_version", "descriptor"])
|
|
57
|
+
# just overwrite version if it's there
|
|
58
|
+
for row in tab:
|
|
59
|
+
if row.str(0) == program_name and row.str(2) == descriptor:
|
|
60
|
+
row[1] = gemmi.cif.quote(servalcat.__version__)
|
|
61
|
+
return
|
|
62
|
+
loop = tab.loop
|
|
63
|
+
if not loop:
|
|
64
|
+
loop = block.init_loop("_pdbx_chem_comp_description_generator.", ["comp_id",
|
|
65
|
+
"program_name",
|
|
66
|
+
"program_version",
|
|
67
|
+
"descriptor"])
|
|
68
|
+
tags = [x[x.index(".")+1:] for x in loop.tags]
|
|
69
|
+
row = ["" for _ in range(len(tags))]
|
|
70
|
+
for tag, val in (("comp_id", comp_id),
|
|
71
|
+
("program_name", program_name),
|
|
72
|
+
("program_version", servalcat.__version__),
|
|
73
|
+
("descriptor", descriptor)):
|
|
74
|
+
if tag in tags: row[tags.index(tag)] = val
|
|
75
|
+
loop.add_row(gemmi.cif.quote_list(row))
|
|
76
|
+
# add_program_info_to_dictionary()
|
|
77
|
+
|
|
52
78
|
def refine_and_update_dictionary(cif_in, monomer_dir, output_prefix, randomize=0, ncycle1=10, ncycle2=30):
|
|
53
79
|
doc = gemmi.cif.read(cif_in)
|
|
54
80
|
for block in doc: # this block will be reused below
|
|
@@ -59,55 +85,52 @@ def refine_and_update_dictionary(cif_in, monomer_dir, output_prefix, randomize=0
|
|
|
59
85
|
monlib = utils.restraints.load_monomer_library(st, monomer_dir=monomer_dir, # monlib is needed for ener_lib
|
|
60
86
|
cif_files=[cif_in],
|
|
61
87
|
stop_for_unknowns=True)
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
88
|
+
all_stats = []
|
|
89
|
+
for i_macro in 0, 1:
|
|
90
|
+
try:
|
|
91
|
+
topo, _ = utils.restraints.prepare_topology(st, monlib, h_change=[gemmi.HydrogenChange.Remove, gemmi.HydrogenChange.ReAdd][i_macro],
|
|
92
|
+
check_hydrogen=(i_macro == 1))
|
|
93
|
+
except RuntimeError as e:
|
|
94
|
+
raise SystemExit("Error: {}".format(e))
|
|
67
95
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
96
|
+
geom = Geom(st, topo, monlib, shake_rms=randomize)
|
|
97
|
+
refiner = Refine(st, geom)
|
|
98
|
+
logger.writeln("Running {} cycles with wchir=4 wvdw=2 {} hydrogen".format(ncycle1, ["without","with"][i_macro]))
|
|
99
|
+
geom.calc_kwds["wchir"] = 4
|
|
100
|
+
geom.calc_kwds["wvdw"] = 2
|
|
101
|
+
all_stats.append(refiner.run_cycles(ncycle1))
|
|
74
102
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
refiner = Refine(st, geom)
|
|
80
|
-
logger.writeln("Running {} cycles with wchir=1 wvdw=2".format(ncycle2))
|
|
81
|
-
geom.calc_kwds["wchir"] = 1
|
|
82
|
-
geom.calc_kwds["wvdw"] = 2
|
|
83
|
-
refiner.run_cycles(ncycle2)
|
|
103
|
+
logger.writeln("Running {} cycles with wchir=1 wvdw=2 {} hydrogen".format(ncycle2, ["without","with"][i_macro]))
|
|
104
|
+
geom.calc_kwds["wchir"] = 1
|
|
105
|
+
geom.calc_kwds["wvdw"] = 2
|
|
106
|
+
all_stats.append(refiner.run_cycles(ncycle2))
|
|
84
107
|
|
|
85
108
|
# replace xyz
|
|
86
109
|
pos = {cra.atom.name: cra.atom.pos.tolist() for cra in refiner.st[0].all()}
|
|
87
|
-
for row in block.find("_chem_comp_atom.", ["atom_id", "x", "y", "z"
|
|
110
|
+
for row in block.find("_chem_comp_atom.", ["atom_id", "?x", "?y", "?z",
|
|
111
|
+
"?pdbx_model_Cartn_x_ideal",
|
|
112
|
+
"?pdbx_model_Cartn_y_ideal",
|
|
113
|
+
"?pdbx_model_Cartn_z_ideal"]):
|
|
88
114
|
p = pos[row.str(0)]
|
|
89
115
|
for i in range(3):
|
|
90
|
-
row
|
|
116
|
+
if row.has(i+1):
|
|
117
|
+
row[i+1] = "{:.3f}".format(p[i])
|
|
118
|
+
if row.has(i+4):
|
|
119
|
+
row[i+4] = "{:.3f}".format(p[i])
|
|
91
120
|
# add description
|
|
92
|
-
|
|
93
|
-
if not loop:
|
|
94
|
-
loop = block.init_loop("_pdbx_chem_comp_description_generator.", ["comp_id",
|
|
95
|
-
"program_name",
|
|
96
|
-
"program_version",
|
|
97
|
-
"descriptor"])
|
|
98
|
-
tags = [x[x.index(".")+1:] for x in loop.tags]
|
|
99
|
-
row = ["" for _ in range(len(tags))]
|
|
100
|
-
for tag, val in (("comp_id", st[0][0][0].name),
|
|
101
|
-
("program_name", "servalcat"),
|
|
102
|
-
("program_version", servalcat.__version__),
|
|
103
|
-
("descriptor", "optimization tool")):
|
|
104
|
-
if tag in tags: row[tags.index(tag)] = val
|
|
105
|
-
loop.add_row(gemmi.cif.quote_list(row))
|
|
121
|
+
add_program_info_to_dictionary(block, st[0][0][0].name)
|
|
106
122
|
doc.write_file(output_prefix + "_updated.cif", style=gemmi.cif.Style.Aligned)
|
|
123
|
+
logger.writeln("Updated dictionary saved: {}".format(output_prefix + "_updated.cif"))
|
|
124
|
+
with open(output_prefix + "_stats.json", "w") as ofs:
|
|
125
|
+
for stats in all_stats:
|
|
126
|
+
for s in stats:
|
|
127
|
+
s["geom"] = s["geom"].to_dict()
|
|
128
|
+
json.dump(all_stats, ofs, indent=2)
|
|
129
|
+
logger.writeln("Refinement statistics saved: {}".format(ofs.name))
|
|
107
130
|
# refine_and_update_dictionary()
|
|
108
131
|
|
|
109
132
|
def refine_geom(model_in, monomer_dir, cif_files, h_change, ncycle, output_prefix, randomize, refmac_keywords,
|
|
110
|
-
find_links=False):
|
|
133
|
+
find_links=False, use_ncsr=False):
|
|
111
134
|
st = utils.fileio.read_structure(model_in)
|
|
112
135
|
utils.model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
|
|
113
136
|
if st.ncs:
|
|
@@ -126,7 +149,11 @@ def refine_geom(model_in, monomer_dir, cif_files, h_change, ncycle, output_prefi
|
|
|
126
149
|
except RuntimeError as e:
|
|
127
150
|
raise SystemExit("Error: {}".format(e))
|
|
128
151
|
refmac_keywords = metal_kws + refmac_keywords
|
|
129
|
-
|
|
152
|
+
if use_ncsr:
|
|
153
|
+
ncslist = utils.restraints.prepare_ncs_restraints(st)
|
|
154
|
+
else:
|
|
155
|
+
ncslist = False
|
|
156
|
+
geom = Geom(st, topo, monlib, shake_rms=randomize, refmac_keywords=refmac_keywords, ncslist=ncslist)
|
|
130
157
|
refiner = Refine(st, geom)
|
|
131
158
|
stats = refiner.run_cycles(ncycle)
|
|
132
159
|
refiner.st.name = output_prefix
|
|
@@ -159,7 +186,8 @@ def main(args):
|
|
|
159
186
|
output_prefix=args.output_prefix,
|
|
160
187
|
randomize=args.randomize,
|
|
161
188
|
refmac_keywords=keywords,
|
|
162
|
-
find_links=args.find_links
|
|
189
|
+
find_links=args.find_links,
|
|
190
|
+
use_ncsr=args.ncsr)
|
|
163
191
|
else:
|
|
164
192
|
if not args.output_prefix:
|
|
165
193
|
args.output_prefix = decide_prefix(args.update_dictionary)
|