servalcat 0.4.39__cp39-cp39-win_amd64.whl → 0.4.60__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of servalcat might be problematic. Click here for more details.

servalcat/__init__.py CHANGED
@@ -6,5 +6,5 @@ This software is released under the
6
6
  Mozilla Public License, version 2.0; see LICENSE.
7
7
  """
8
8
 
9
- __version__ = '0.4.39'
10
- __date__ = '2023-11-02'
9
+ __version__ = '0.4.60'
10
+ __date__ = '2024-02-26'
Binary file
@@ -29,12 +29,18 @@ b_to_u = utils.model.b_to_u
29
29
 
30
30
  class Geom:
31
31
  def __init__(self, st, topo, monlib, adpr_w=1, shake_rms=0,
32
- refmac_keywords=None, unrestrained=False, use_nucleus=False):
32
+ refmac_keywords=None, unrestrained=False, use_nucleus=False,
33
+ ncslist=None, atom_pos=None):
33
34
  self.st = st
34
35
  self.atoms = [None for _ in range(self.st[0].count_atom_sites())]
35
36
  for cra in self.st[0].all(): self.atoms[cra.atom.serial-1] = cra.atom
37
+ if atom_pos is not None:
38
+ self.atom_pos = atom_pos
39
+ else:
40
+ self.atom_pos = list(range(len(self.atoms)))
41
+ self.n_refine_atoms = max(self.atom_pos) + 1
36
42
  self.lookup = {x.atom: x for x in self.st[0].all()}
37
- self.geom = ext.Geometry(self.st, monlib.ener_lib)
43
+ self.geom = ext.Geometry(self.st, self.atom_pos, monlib.ener_lib)
38
44
  self.specs = utils.model.find_special_positions(self.st)
39
45
  #cs_count = len(self.st.find_spacegroup().operations())
40
46
  for atom, images, matp, mata in self.specs:
@@ -55,13 +61,17 @@ class Geom:
55
61
  if refmac_keywords:
56
62
  exte.read_external_restraints(refmac_keywords, self.st, self.geom)
57
63
  kwds = parse_keywords(refmac_keywords)
58
- for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw"):
64
+ for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw", "wncs"):
59
65
  if k in kwds:
60
66
  self.calc_kwds[k] = kwds[k]
61
67
  logger.writeln("setting geometry weight {}= {}".format(k, kwds[k]))
68
+ self.group_occ = GroupOccupancy(self.st, kwds.get("occu"))
69
+ else:
70
+ self.group_occ = GroupOccupancy(self.st, None)
62
71
  self.geom.finalize_restraints()
63
- self.outlier_sigmas = dict(bond=5, angle=5, torsion=5, vdw=5, chir=5, plane=5, staca=5, stacd=5, per_atom=5)
72
+ self.outlier_sigmas = dict(bond=5, angle=5, torsion=5, vdw=5, ncs=5, chir=5, plane=5, staca=5, stacd=5, per_atom=5)
64
73
  self.parents = {}
74
+ self.ncslist = ncslist
65
75
  # __init__()
66
76
 
67
77
  def check_chemtypes(self, enerlib_path, topo):
@@ -87,7 +97,9 @@ class Geom:
87
97
  # set_h_parents()
88
98
  def setup_nonbonded(self, refine_xyz):
89
99
  skip_critical_dist = not refine_xyz or self.unrestrained
90
- self.geom.setup_nonbonded(skip_critical_dist=skip_critical_dist)
100
+ self.geom.setup_nonbonded(skip_critical_dist=skip_critical_dist, group_idxes=self.group_occ.group_idxes)
101
+ if self.ncslist:
102
+ self.geom.setup_ncsr(self.ncslist)
91
103
  def calc(self, target_only):
92
104
  return self.geom.calc(check_only=target_only, **self.calc_kwds)
93
105
  def calc_adp_restraint(self, target_only):
@@ -116,6 +128,7 @@ class Geom:
116
128
  staca=self.geom.reporting.get_stacking_angle_outliers,
117
129
  stacd=self.geom.reporting.get_stacking_dist_outliers,
118
130
  vdw=self.geom.reporting.get_vdw_outliers,
131
+ #ncs=self.geom.reporting.get_ncsr_outliers, # not useful?
119
132
  )
120
133
  labs = dict(bond="Bond distances",
121
134
  angle="Bond angles",
@@ -124,7 +137,8 @@ class Geom:
124
137
  plane="Planar groups",
125
138
  staca="Stacking plane angles",
126
139
  stacd="Stacking plane distances",
127
- vdw="VDW repulsions")
140
+ vdw="VDW repulsions",
141
+ ncs="Local NCS restraints")
128
142
 
129
143
  for k in get_table:
130
144
  kwgs = {"min_z": self.outlier_sigmas[k]}
@@ -132,7 +146,7 @@ class Geom:
132
146
  table = get_table[k](**kwgs)
133
147
  if table["z"]:
134
148
  for kk in table:
135
- if kk.startswith(("atom", "plane")):
149
+ if kk.startswith(("atom", "plane", "1_atom", "2_atom")):
136
150
  table[kk] = [str(self.lookup[x]) for x in table[kk]]
137
151
  df = pandas.DataFrame(table)
138
152
  df = df.reindex(df.z.abs().sort_values(ascending=False).index)
@@ -183,8 +197,187 @@ def show_binstats(df, cycle_number):
183
197
  logger.writeln(lstr)
184
198
  # show_binstats()
185
199
 
200
+ class GroupOccupancy:
201
+ # TODO max may not be one. should check multiplicity
202
+ def __init__(self, st, params):
203
+ self.groups = []
204
+ self.consts = []
205
+ self.group_idxes = [0 for _ in range(st[0].count_atom_sites())]
206
+ self.ncycle = 0
207
+ if not params or not params.get("groups"):
208
+ return
209
+ logger.writeln("Occupancy groups:")
210
+ self.atom_pos = [-1 for _ in range(st[0].count_atom_sites())]
211
+ count = 0
212
+ for igr in params["groups"]:
213
+ self.groups.append([[], []]) # list of [indexes, atoms]
214
+ n_curr = count
215
+ for sel in params["groups"][igr]:
216
+ sel_chains = sel.get("chains")
217
+ sel_from = sel.get("resi_from")
218
+ sel_to = sel.get("resi_to")
219
+ sel_seq = sel.get("resi")
220
+ sel_atom = sel.get("atom")
221
+ sel_alt = sel.get("alt")
222
+ for chain in st[0]:
223
+ if sel_chains and chain.name not in sel_chains:
224
+ continue
225
+ flag = False
226
+ for res in chain:
227
+ if sel_seq and res.seqid != sel_seq:
228
+ continue
229
+ if sel_from and res.seqid == sel_from:
230
+ flag = True
231
+ if sel_from and not flag:
232
+ continue
233
+ for atom in res:
234
+ if sel_atom and atom.name != sel_atom:
235
+ continue
236
+ if sel_alt and atom.altloc != sel_alt:
237
+ continue
238
+ self.atom_pos[atom.serial-1] = count
239
+ self.groups[-1][0].append(count)
240
+ self.groups[-1][1].append(atom)
241
+ self.group_idxes[atom.serial-1] = len(self.groups)
242
+ count += 1
243
+ if sel_to and res.seqid == sel_to:
244
+ flag = False
245
+ logger.writeln(" id= {} atoms= {}".format(igr, count - n_curr))
246
+
247
+ igr_idxes = {igr:i for i, igr in enumerate(params["groups"])}
248
+ self.consts = [(is_comp, [igr_idxes[g] for g in gids])
249
+ for is_comp, gids in params["const"]]
250
+ self.ncycle = params.get("ncycle", 5)
251
+ # __init__()
252
+
253
+ def constraint(self, x):
254
+ # x: occupancy parameters
255
+ ret = []
256
+ for is_comp, ids in self.consts:
257
+ x_sum = numpy.sum(x[ids])
258
+ if is_comp or x_sum > 1:
259
+ ret.append(x_sum - 1)
260
+ else:
261
+ ret.append(0.)
262
+ return numpy.array(ret)
263
+
264
+ def ensure_constraints(self):
265
+ vals = []
266
+ for _, atoms in self.groups:
267
+ occ = numpy.mean([a.occ for a in atoms])
268
+ vals.append(occ)
269
+ for is_comp, idxes in self.consts:
270
+ sum_occ = sum(vals[i] for i in idxes)
271
+ if not is_comp and sum_occ < 1:
272
+ sum_occ = 1. # do nothing
273
+ for i in idxes:
274
+ #logger.writeln("Imposing constraints: {} {}".format(vals[i], vals[i]/sum_occ))
275
+ vals[i] /= sum_occ
276
+ for occ, (_, atoms) in zip(vals, self.groups):
277
+ for a in atoms: a.occ = occ
278
+
279
+ def get_x(self):
280
+ return numpy.array([atoms[0].occ for _, atoms in self.groups])
281
+
282
+ def set_x(self, x):
283
+ for p, (_, atoms) in zip(x, self.groups):
284
+ for a in atoms:
285
+ a.occ = p
286
+
287
+ def target(self, x, ll, ls, u):
288
+ self.set_x(x)
289
+ ll.update_fc()
290
+ c = self.constraint(x)
291
+ f = ll.calc_target() - numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
292
+ return f
293
+
294
+ def grad(self, x, ll, ls, u, refine_h):
295
+ c = self.constraint(x)
296
+ ll.calc_grad(self.atom_pos, refine_xyz=False, adp_mode=0, refine_occ=True, refine_h=refine_h, specs=None)
297
+ #print("grad=", ll.ll.vn)
298
+ #print("diag=", ll.ll.am)
299
+ assert len(ll.ll.vn) == len(ll.ll.am)
300
+ vn = []
301
+ diag = []
302
+ for idxes, atoms in self.groups:
303
+ if not refine_h:
304
+ idxes = [i for i, a in zip(idxes, atoms) if not a.is_hydrogen()]
305
+ vn.append(numpy.sum(numpy.array(ll.ll.vn)[idxes]))
306
+ diag.append(numpy.sum(numpy.array(ll.ll.am)[idxes]))
307
+ vn, diag = numpy.array(vn), numpy.array(diag)
308
+ for i, (is_comp, idxes) in enumerate(self.consts):
309
+ dcdx = numpy.zeros(len(self.groups))
310
+ dcdx[idxes] = 1.
311
+ if is_comp or c[i] != 0:
312
+ vn -= (ls[i] - u * c[i]) * dcdx
313
+ diag += u * dcdx**2
314
+
315
+ return vn, diag
316
+
317
+ def refine(self, ll, refine_h, alpha=1.1):
318
+ # Refinement of grouped occupancies using augmented Lagrangian
319
+ # f(x) = LL(x) - sum_j (lambda_j c_j(x)) + u/2 sum_j (c_j(x))^2
320
+ # with c_j(x) = 0 constraints
321
+ if not self.groups:
322
+ return
323
+ logger.writeln("\n== Group occupancy refinement ==")
324
+ self.ensure_constraints() # make sure constrained groups have the same occupancies.
325
+ ls = 0 * numpy.ones(len(self.consts)) # Lagrange multiplier
326
+ u = 10000. # penalty parameter. in Refmac 1/0.01**2
327
+ x0 = self.get_x()
328
+ #logger.writeln(" parameters: {}".format(len(x0)))
329
+ f0 = self.target(x0, ll, ls, u)
330
+ ret = []
331
+ for cyc in range(self.ncycle):
332
+ ret.append({"Ncyc": cyc+1, "f0": f0})
333
+ logger.writeln("occ_{}_f0= {:.4e}".format(cyc, f0))
334
+ vn, diag = self.grad(x0, ll, ls, u, refine_h)
335
+ diag[diag < 1e-6] = 1.
336
+ dx = -vn / diag
337
+ if 0:
338
+ ofs = open("debug.dat", "w")
339
+ for scale in (-1, -0.5, 0, 0.1, 0.2, 0.3, 0.4, 0.5, 1, 2):
340
+ self.set_x(x0 + scale * dx)
341
+ ll.update_fc()
342
+ c = self.constraint(x0 + dx)
343
+ f = ll.calc_target() + numpy.dot(ls, c) + 0.5 * u * numpy.sum(c**2)
344
+ ofs.write("{} {}\n".format(scale, f))
345
+ ofs.close()
346
+ import scipy.optimize
347
+ print(scipy.optimize.line_search(f=lambda x: self.target(x, ll, ls, u),
348
+ myfprime= lambda x: self.grad(ll, ls, u, refine_h)[0],
349
+ xk= x0,
350
+ pk= dx))
351
+ quit()
352
+
353
+ scale = 1
354
+ for i in range(3):
355
+ scale = 1/2**i
356
+ f1 = self.target(x0 + dx * scale, ll, ls, u)
357
+ logger.writeln("occ_{}_f1, {}= {:.4e}".format(cyc, i, f1))
358
+ if f1 < f0: break
359
+ else:
360
+ logger.writeln("WARNING: function not minimised")
361
+ #self.set_x(x0) # Refmac accepts it even when function increases
362
+ c = self.constraint(x0 + dx * scale)
363
+ ret[-1]["f1"] = f1
364
+ ret[-1]["shift_scale"] = scale
365
+ f0 = f1
366
+ x0 = x0 + dx * scale
367
+ ls -= u * c
368
+ u = alpha * u
369
+ ret[-1]["const_viol"] = list(c)
370
+ ret[-1]["lambda_new"] = list(ls)
371
+ self.ensure_constraints()
372
+ ll.update_fc()
373
+ f = ll.calc_target()
374
+ logger.writeln("final -LL= {}".format(f))
375
+ return ret
376
+
377
+
186
378
  class Refine:
187
- def __init__(self, st, geom, ll=None, refine_xyz=True, adp_mode=1, refine_h=False, unrestrained=False):
379
+ def __init__(self, st, geom, ll=None, refine_xyz=True, adp_mode=1, refine_h=False, refine_occ=False,
380
+ unrestrained=False, refmac_keywords=None):
188
381
  assert adp_mode in (0, 1, 2) # 0=fix, 1=iso, 2=aniso
189
382
  assert geom is not None
190
383
  self.st = st # clone()?
@@ -194,11 +387,13 @@ class Refine:
194
387
  self.gamma = 0
195
388
  self.adp_mode = 0 if self.ll is None else adp_mode
196
389
  self.refine_xyz = refine_xyz
390
+ self.refine_occ = refine_occ
197
391
  self.unrestrained = unrestrained
198
392
  self.refine_h = refine_h
199
393
  self.h_inherit_parent_adp = self.adp_mode > 0 and not self.refine_h and self.st[0].has_hydrogen()
200
394
  if self.h_inherit_parent_adp:
201
395
  self.geom.set_h_parents()
396
+ assert self.geom.group_occ.groups or self.n_params() > 0
202
397
  # __init__()
203
398
 
204
399
  def print_weights(self): # TODO unfinished
@@ -216,7 +411,7 @@ class Refine:
216
411
  raise LookupError("unknown adpr_mode")
217
412
 
218
413
  def scale_shifts(self, dx, scale):
219
- n_atoms = len(self.atoms)
414
+ n_atoms = self.geom.n_refine_atoms
220
415
  #ave_shift = numpy.mean(dx)
221
416
  #max_shift = numpy.maximum(dx)
222
417
  #rms_shift = numpy.std(dx)
@@ -224,19 +419,31 @@ class Refine:
224
419
  shift_allow_low = -1.0
225
420
  shift_max_allow_B = 30.0
226
421
  shift_min_allow_B = -30.0
422
+ shift_max_allow_q = 0.5
423
+ shift_min_allow_q = -0.5
227
424
  dx = scale * dx
228
- offset_b = 0
425
+ offset_b = n_atoms * 3 if self.refine_xyz else 0
426
+ offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
229
427
  if self.refine_xyz:
230
- dxx = dx[:n_atoms*3]
428
+ dxx = dx[:offset_b]
429
+ logger.writeln("min(dx) = {}".format(numpy.min(dxx)))
430
+ logger.writeln("max(dx) = {}".format(numpy.max(dxx)))
431
+ logger.writeln("mean(dx)= {}".format(numpy.mean(dxx)))
231
432
  dxx[dxx > shift_allow_high] = shift_allow_high
232
433
  dxx[dxx < shift_allow_low] = shift_allow_low
233
- offset_b = n_atoms*3
234
434
  if self.adp_mode == 1:
235
- dxb = dx[offset_b:]
435
+ dxb = dx[offset_b:offset_q]
436
+ logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
437
+ logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
438
+ logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
236
439
  dxb[dxb > shift_max_allow_B] = shift_max_allow_B
237
440
  dxb[dxb < shift_min_allow_B] = shift_min_allow_B
238
441
  elif self.adp_mode == 2:
239
- dxb = dx[offset_b:]
442
+ dxb = dx[offset_b:offset_q]
443
+ # TODO this is misleading
444
+ logger.writeln("min(dB) = {}".format(numpy.min(dxb)))
445
+ logger.writeln("max(dB) = {}".format(numpy.max(dxb)))
446
+ logger.writeln("mean(dB)= {}".format(numpy.mean(dxb)))
240
447
  for i in range(len(dxb)//6):
241
448
  j = i * 6
242
449
  a = numpy.array([[dxb[j], dxb[j+3], dxb[j+4]],
@@ -247,29 +454,43 @@ class Refine:
247
454
  v[v < shift_min_allow_B] = shift_min_allow_B
248
455
  a = Q.dot(numpy.diag(v)).dot(Q.T)
249
456
  dxb[j:j+6] = a[0,0], a[1,1], a[2,2], a[0,1], a[0,2], a[1,2]
250
-
457
+ if self.refine_occ:
458
+ dxq = dx[offset_q:]
459
+ logger.writeln("min(dq) = {}".format(numpy.min(dxq)))
460
+ logger.writeln("max(dq) = {}".format(numpy.max(dxq)))
461
+ logger.writeln("mean(dq)= {}".format(numpy.mean(dxq)))
462
+ dxq[dxq > shift_max_allow_q] = shift_max_allow_q
463
+ dxq[dxq < shift_min_allow_q] = shift_min_allow_q
464
+
251
465
  return dx
252
466
 
253
467
  def n_params(self):
254
- n_atoms = len(self.atoms)
468
+ n_atoms = self.geom.n_refine_atoms
255
469
  n_params = 0
256
470
  if self.refine_xyz: n_params += 3 * n_atoms
257
471
  if self.adp_mode == 1:
258
472
  n_params += n_atoms
259
473
  elif self.adp_mode == 2:
260
474
  n_params += 6 * n_atoms
475
+ if self.refine_occ:
476
+ n_params += n_atoms
261
477
  return n_params
262
478
 
263
479
  def set_x(self, x):
264
- n_atoms = len(self.atoms)
480
+ n_atoms = self.geom.n_refine_atoms
265
481
  offset_b = n_atoms * 3 if self.refine_xyz else 0
266
- for i in range(len(self.atoms)):
482
+ offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
483
+ max_occ = {}
484
+ if self.refine_occ and self.geom.specs:
485
+ max_occ = {atom: 1./(len(images)+1) for atom, images, _, _ in self.geom.specs}
486
+ for i, j in enumerate(self.geom.atom_pos):
487
+ if j < 0: continue
267
488
  if self.refine_xyz:
268
- self.atoms[i].pos.fromlist(x[3*i:3*i+3]) # faster than substituting pos.x,pos.y,pos.z
489
+ self.atoms[i].pos.fromlist(x[3*j:3*j+3]) # faster than substituting pos.x,pos.y,pos.z
269
490
  if self.adp_mode == 1:
270
- self.atoms[i].b_iso = max(0.5, x[offset_b + i]) # minimum B = 0.5
491
+ self.atoms[i].b_iso = max(0.5, x[offset_b + j]) # minimum B = 0.5
271
492
  elif self.adp_mode == 2:
272
- a = x[offset_b + 6 * i: offset_b + 6 * (i+1)]
493
+ a = x[offset_b + 6 * j: offset_b + 6 * (j+1)]
273
494
  a = gemmi.SMat33d(*a)
274
495
  M = numpy.array(a.as_mat33())
275
496
  v, Q = numpy.linalg.eigh(M) # eig() may return complex due to numerical precision?
@@ -278,6 +499,8 @@ class Refine:
278
499
  self.atoms[i].b_iso = M2.trace() / 3
279
500
  M2 *= b_to_u
280
501
  self.atoms[i].aniso = gemmi.SMat33f(M2[0,0], M2[1,1], M2[2,2], M2[0,1], M2[0,2], M2[1,2])
502
+ if self.refine_occ:
503
+ self.atoms[i].occ = min(max_occ.get(self.atoms[i], 1), max(1e-3, x[offset_q + j]))
281
504
 
282
505
  # Copy B of hydrogen from parent
283
506
  if self.h_inherit_parent_adp:
@@ -290,21 +513,26 @@ class Refine:
290
513
  self.ll.update_fc()
291
514
 
292
515
  self.geom.setup_nonbonded(self.refine_xyz) # if refine_xyz=False, no need to do it every time
293
- self.geom.geom.setup_target(self.refine_xyz, self.adp_mode)
516
+ self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
294
517
  logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
295
518
 
296
519
  def get_x(self):
297
- n_atoms = len(self.atoms)
520
+ n_atoms = self.geom.n_refine_atoms
298
521
  offset_b = n_atoms * 3 if self.refine_xyz else 0
522
+ offset_q = offset_b + n_atoms * {0: 0, 1: 1, 2: 6}[self.adp_mode]
299
523
  x = numpy.zeros(self.n_params())
300
- for i, a in enumerate(self.atoms):
524
+ for i, j in enumerate(self.geom.atom_pos):
525
+ if j < 0: continue
526
+ a = self.atoms[i]
301
527
  if self.refine_xyz:
302
- x[3*i:3*(i+1)] = a.pos.tolist()
528
+ x[3*j:3*(j+1)] = a.pos.tolist()
303
529
  if self.adp_mode == 1:
304
- x[offset_b + i] = self.atoms[i].b_iso
530
+ x[offset_b + j] = self.atoms[i].b_iso
305
531
  elif self.adp_mode == 2:
306
- x[offset_b + 6*i : offset_b + 6*(i+1)] = self.atoms[i].aniso.elements_pdb()
307
- x[offset_b + 6*i : offset_b + 6*(i+1)] *= u_to_b
532
+ x[offset_b + 6*j : offset_b + 6*(j+1)] = self.atoms[i].aniso.elements_pdb()
533
+ x[offset_b + 6*j : offset_b + 6*(j+1)] *= u_to_b
534
+ if self.refine_occ:
535
+ x[offset_q + j] = a.occ
308
536
 
309
537
  return x
310
538
  #@profile
@@ -317,7 +545,8 @@ class Refine:
317
545
  ll = self.ll.calc_target()
318
546
  logger.writeln(" ll= {}".format(ll))
319
547
  if not target_only:
320
- self.ll.calc_grad(self.refine_xyz, self.adp_mode, self.refine_h, self.geom.geom.specials)
548
+ self.ll.calc_grad(self.geom.atom_pos, self.refine_xyz, self.adp_mode, self.refine_occ,
549
+ self.refine_h, self.geom.geom.specials)
321
550
  else:
322
551
  ll = 0
323
552
 
@@ -372,19 +601,6 @@ class Refine:
372
601
  M = scipy.sparse.diags(rdiag)
373
602
  dx, self.gamma = cgsolve.cgsolve_rm(A=am, v=vn, M=M, gamma=self.gamma)
374
603
 
375
- if self.refine_xyz:
376
- dxx = dx[:len(self.atoms)*3]
377
- #logger.writeln("dx = {}".format(dxx))
378
- logger.writeln("min(dx) = {}".format(numpy.min(dxx)))
379
- logger.writeln("max(dx) = {}".format(numpy.max(dxx)))
380
- logger.writeln("mean(dx)= {}".format(numpy.mean(dxx)))
381
- if self.adp_mode > 0: # TODO for aniso
382
- db = dx[len(self.atoms)*3 if self.refine_xyz else 0:]
383
- #logger.writeln("dB = {}".format(db))
384
- logger.writeln("min(dB) = {}".format(numpy.min(db)))
385
- logger.writeln("max(dB) = {}".format(numpy.max(db)))
386
- logger.writeln("mean(dB)= {}".format(numpy.mean(db)))
387
-
388
604
  if 0: # to check hessian scale
389
605
  with open("minimise_line.dat", "w") as ofs:
390
606
  ofs.write("s f\n")
@@ -415,7 +631,7 @@ class Refine:
415
631
  self.print_weights()
416
632
  stats = [{"Ncyc": 0}]
417
633
  self.geom.setup_nonbonded(self.refine_xyz)
418
- self.geom.geom.setup_target(self.refine_xyz, self.adp_mode)
634
+ self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
419
635
  logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
420
636
  if self.refine_xyz and not self.unrestrained:
421
637
  stats[-1]["geom"] = self.geom.show_model_stats(show_outliers=True)["summary"]
@@ -429,11 +645,15 @@ class Refine:
429
645
  show_binstats(llstats["bin_stats"], 0)
430
646
  if self.adp_mode > 0:
431
647
  utils.model.adp_analysis(self.st)
648
+ occ_refine_flag = self.ll is not None and self.geom.group_occ.groups and self.geom.group_occ.ncycle > 0
432
649
 
433
650
  for i in range(ncycles):
434
651
  logger.writeln("\n====== CYCLE {:2d} ======\n".format(i+1))
435
- is_ok, shift_scale, fval = self.run_cycle(weight=weight)
436
- stats.append({"Ncyc": i+1, "shift_scale": shift_scale, "fval": fval, "fval_decreased": is_ok})
652
+ if self.refine_xyz or self.adp_mode > 0:
653
+ is_ok, shift_scale, fval = self.run_cycle(weight=weight)
654
+ stats.append({"Ncyc": len(stats), "shift_scale": shift_scale, "fval": fval, "fval_decreased": is_ok})
655
+ if occ_refine_flag:
656
+ stats[-1]["occ_refine"] = self.geom.group_occ.refine(self.ll, self.refine_h)
437
657
  if debug: utils.fileio.write_model(self.st, "refined_{:02d}".format(i+1), pdb=True)#, cif=True)
438
658
  if self.refine_xyz and not self.unrestrained:
439
659
  stats[-1]["geom"] = self.geom.show_model_stats(show_outliers=(i==ncycles-1))["summary"]
@@ -35,11 +35,14 @@ def add_arguments(parser):
35
35
  help='Automatically add links')
36
36
  parser.add_argument('--randomize', type=float, default=0,
37
37
  help='Shake coordinates with specified rmsd')
38
+ parser.add_argument('--ncsr', action='store_true',
39
+ help='Use local NCS restraints')
38
40
  parser.add_argument('--keywords', nargs='+', action="append",
39
41
  help="refmac keyword(s)")
40
42
  parser.add_argument('--keyword_file', nargs='+', action="append",
41
43
  help="refmac keyword file(s)")
42
- parser.add_argument('-o','--output_prefix')
44
+ parser.add_argument('-o','--output_prefix',
45
+ help="Output prefix")
43
46
 
44
47
  # add_arguments()
45
48
 
@@ -49,6 +52,29 @@ def parse_args(arg_list):
49
52
  return parser.parse_args(arg_list)
50
53
  # parse_args()
51
54
 
55
+ def add_program_info_to_dictionary(block, comp_id, program_name="servalcat", descriptor="optimization tool"):
56
+ tab = block.find("_pdbx_chem_comp_description_generator.", ["program_name", "program_version", "descriptor"])
57
+ # just overwrite version if it's there
58
+ for row in tab:
59
+ if row.str(0) == program_name and row.str(2) == descriptor:
60
+ row[1] = gemmi.cif.quote(servalcat.__version__)
61
+ return
62
+ loop = tab.loop
63
+ if not loop:
64
+ loop = block.init_loop("_pdbx_chem_comp_description_generator.", ["comp_id",
65
+ "program_name",
66
+ "program_version",
67
+ "descriptor"])
68
+ tags = [x[x.index(".")+1:] for x in loop.tags]
69
+ row = ["" for _ in range(len(tags))]
70
+ for tag, val in (("comp_id", comp_id),
71
+ ("program_name", program_name),
72
+ ("program_version", servalcat.__version__),
73
+ ("descriptor", descriptor)):
74
+ if tag in tags: row[tags.index(tag)] = val
75
+ loop.add_row(gemmi.cif.quote_list(row))
76
+ # add_program_info_to_dictionary()
77
+
52
78
  def refine_and_update_dictionary(cif_in, monomer_dir, output_prefix, randomize=0, ncycle1=10, ncycle2=30):
53
79
  doc = gemmi.cif.read(cif_in)
54
80
  for block in doc: # this block will be reused below
@@ -59,55 +85,52 @@ def refine_and_update_dictionary(cif_in, monomer_dir, output_prefix, randomize=0
59
85
  monlib = utils.restraints.load_monomer_library(st, monomer_dir=monomer_dir, # monlib is needed for ener_lib
60
86
  cif_files=[cif_in],
61
87
  stop_for_unknowns=True)
62
- try:
63
- topo, _ = utils.restraints.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.ReAdd,
64
- check_hydrogen=False)
65
- except RuntimeError as e:
66
- raise SystemExit("Error: {}".format(e))
88
+ all_stats = []
89
+ for i_macro in 0, 1:
90
+ try:
91
+ topo, _ = utils.restraints.prepare_topology(st, monlib, h_change=[gemmi.HydrogenChange.Remove, gemmi.HydrogenChange.ReAdd][i_macro],
92
+ check_hydrogen=(i_macro == 1))
93
+ except RuntimeError as e:
94
+ raise SystemExit("Error: {}".format(e))
67
95
 
68
- geom = Geom(st, topo, monlib, shake_rms=randomize)
69
- refiner = Refine(st, geom)
70
- logger.writeln("Running {} cycles with wchir=4 wvdw=2".format(ncycle1))
71
- geom.calc_kwds["wchir"] = 4
72
- geom.calc_kwds["wvdw"] = 2
73
- refiner.run_cycles(ncycle1)
96
+ geom = Geom(st, topo, monlib, shake_rms=randomize)
97
+ refiner = Refine(st, geom)
98
+ logger.writeln("Running {} cycles with wchir=4 wvdw=2 {} hydrogen".format(ncycle1, ["without","with"][i_macro]))
99
+ geom.calc_kwds["wchir"] = 4
100
+ geom.calc_kwds["wvdw"] = 2
101
+ all_stats.append(refiner.run_cycles(ncycle1))
74
102
 
75
- # re-add hydrogen may help
76
- topo = gemmi.prepare_topology(st, monlib, h_change=gemmi.HydrogenChange.ReAdd,
77
- warnings=logger)
78
- geom = Geom(st, topo, monlib)
79
- refiner = Refine(st, geom)
80
- logger.writeln("Running {} cycles with wchir=1 wvdw=2".format(ncycle2))
81
- geom.calc_kwds["wchir"] = 1
82
- geom.calc_kwds["wvdw"] = 2
83
- refiner.run_cycles(ncycle2)
103
+ logger.writeln("Running {} cycles with wchir=1 wvdw=2 {} hydrogen".format(ncycle2, ["without","with"][i_macro]))
104
+ geom.calc_kwds["wchir"] = 1
105
+ geom.calc_kwds["wvdw"] = 2
106
+ all_stats.append(refiner.run_cycles(ncycle2))
84
107
 
85
108
  # replace xyz
86
109
  pos = {cra.atom.name: cra.atom.pos.tolist() for cra in refiner.st[0].all()}
87
- for row in block.find("_chem_comp_atom.", ["atom_id", "x", "y", "z"]):
110
+ for row in block.find("_chem_comp_atom.", ["atom_id", "?x", "?y", "?z",
111
+ "?pdbx_model_Cartn_x_ideal",
112
+ "?pdbx_model_Cartn_y_ideal",
113
+ "?pdbx_model_Cartn_z_ideal"]):
88
114
  p = pos[row.str(0)]
89
115
  for i in range(3):
90
- row[i+1] = "{:.3f}".format(p[i])
116
+ if row.has(i+1):
117
+ row[i+1] = "{:.3f}".format(p[i])
118
+ if row.has(i+4):
119
+ row[i+4] = "{:.3f}".format(p[i])
91
120
  # add description
92
- loop = block.find_loop("_pdbx_chem_comp_description_generator.comp_id").get_loop()
93
- if not loop:
94
- loop = block.init_loop("_pdbx_chem_comp_description_generator.", ["comp_id",
95
- "program_name",
96
- "program_version",
97
- "descriptor"])
98
- tags = [x[x.index(".")+1:] for x in loop.tags]
99
- row = ["" for _ in range(len(tags))]
100
- for tag, val in (("comp_id", st[0][0][0].name),
101
- ("program_name", "servalcat"),
102
- ("program_version", servalcat.__version__),
103
- ("descriptor", "optimization tool")):
104
- if tag in tags: row[tags.index(tag)] = val
105
- loop.add_row(gemmi.cif.quote_list(row))
121
+ add_program_info_to_dictionary(block, st[0][0][0].name)
106
122
  doc.write_file(output_prefix + "_updated.cif", style=gemmi.cif.Style.Aligned)
123
+ logger.writeln("Updated dictionary saved: {}".format(output_prefix + "_updated.cif"))
124
+ with open(output_prefix + "_stats.json", "w") as ofs:
125
+ for stats in all_stats:
126
+ for s in stats:
127
+ s["geom"] = s["geom"].to_dict()
128
+ json.dump(all_stats, ofs, indent=2)
129
+ logger.writeln("Refinement statistics saved: {}".format(ofs.name))
107
130
  # refine_and_update_dictionary()
108
131
 
109
132
  def refine_geom(model_in, monomer_dir, cif_files, h_change, ncycle, output_prefix, randomize, refmac_keywords,
110
- find_links=False):
133
+ find_links=False, use_ncsr=False):
111
134
  st = utils.fileio.read_structure(model_in)
112
135
  utils.model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
113
136
  if st.ncs:
@@ -126,7 +149,11 @@ def refine_geom(model_in, monomer_dir, cif_files, h_change, ncycle, output_prefi
126
149
  except RuntimeError as e:
127
150
  raise SystemExit("Error: {}".format(e))
128
151
  refmac_keywords = metal_kws + refmac_keywords
129
- geom = Geom(st, topo, monlib, shake_rms=randomize, refmac_keywords=refmac_keywords)
152
+ if use_ncsr:
153
+ ncslist = utils.restraints.prepare_ncs_restraints(st)
154
+ else:
155
+ ncslist = False
156
+ geom = Geom(st, topo, monlib, shake_rms=randomize, refmac_keywords=refmac_keywords, ncslist=ncslist)
130
157
  refiner = Refine(st, geom)
131
158
  stats = refiner.run_cycles(ncycle)
132
159
  refiner.st.name = output_prefix
@@ -159,7 +186,8 @@ def main(args):
159
186
  output_prefix=args.output_prefix,
160
187
  randomize=args.randomize,
161
188
  refmac_keywords=keywords,
162
- find_links=args.find_links)
189
+ find_links=args.find_links,
190
+ use_ncsr=args.ncsr)
163
191
  else:
164
192
  if not args.output_prefix:
165
193
  args.output_prefix = decide_prefix(args.update_dictionary)