servalcat 0.4.60__cp38-cp38-win_amd64.whl → 0.4.72__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of servalcat might be problematic. Click here for more details.
- servalcat/__init__.py +2 -2
- servalcat/ext.cp38-win_amd64.pyd +0 -0
- servalcat/refine/refine.py +130 -42
- servalcat/refine/refine_geom.py +38 -31
- servalcat/refine/refine_spa.py +33 -24
- servalcat/refine/refine_xtal.py +25 -18
- servalcat/refine/spa.py +3 -2
- servalcat/refine/xtal.py +1 -1
- servalcat/refmac/exte.py +4 -6
- servalcat/refmac/refmac_keywords.py +123 -22
- servalcat/refmac/refmac_wrapper.py +22 -14
- servalcat/spa/fofc.py +1 -1
- servalcat/spa/run_refmac.py +8 -7
- servalcat/utils/commands.py +126 -6
- servalcat/utils/fileio.py +14 -25
- servalcat/utils/hkl.py +5 -4
- servalcat/utils/maps.py +10 -5
- servalcat/utils/model.py +12 -17
- servalcat/utils/refmac.py +9 -15
- servalcat/utils/restraints.py +173 -6
- servalcat/xtal/sigmaa.py +22 -11
- {servalcat-0.4.60.dist-info → servalcat-0.4.72.dist-info}/METADATA +3 -4
- servalcat-0.4.72.dist-info/RECORD +44 -0
- {servalcat-0.4.60.dist-info → servalcat-0.4.72.dist-info}/WHEEL +1 -1
- servalcat-0.4.60.dist-info/RECORD +0 -44
- {servalcat-0.4.60.dist-info → servalcat-0.4.72.dist-info}/entry_points.txt +0 -0
- {servalcat-0.4.60.dist-info → servalcat-0.4.72.dist-info}/licenses/LICENSE +0 -0
servalcat/__init__.py
CHANGED
servalcat/ext.cp38-win_amd64.pyd
CHANGED
|
Binary file
|
servalcat/refine/refine.py
CHANGED
|
@@ -10,13 +10,13 @@ import os
|
|
|
10
10
|
import re
|
|
11
11
|
import gemmi
|
|
12
12
|
import numpy
|
|
13
|
+
import json
|
|
13
14
|
import pandas
|
|
14
15
|
import scipy.sparse
|
|
15
16
|
import servalcat # for version
|
|
16
17
|
from servalcat.utils import logger
|
|
17
18
|
from servalcat import utils
|
|
18
19
|
from servalcat.refmac import exte
|
|
19
|
-
from servalcat.refmac.refmac_keywords import parse_keywords
|
|
20
20
|
from servalcat import ext
|
|
21
21
|
from . import cgsolve
|
|
22
22
|
u_to_b = utils.model.u_to_b
|
|
@@ -29,7 +29,7 @@ b_to_u = utils.model.b_to_u
|
|
|
29
29
|
|
|
30
30
|
class Geom:
|
|
31
31
|
def __init__(self, st, topo, monlib, adpr_w=1, shake_rms=0,
|
|
32
|
-
|
|
32
|
+
params=None, unrestrained=False, use_nucleus=False,
|
|
33
33
|
ncslist=None, atom_pos=None):
|
|
34
34
|
self.st = st
|
|
35
35
|
self.atoms = [None for _ in range(self.st[0].count_atom_sites())]
|
|
@@ -48,26 +48,29 @@ class Geom:
|
|
|
48
48
|
n_sym = len(images) + 1
|
|
49
49
|
self.geom.specials.append(ext.Geometry.Special(atom, matp, mata, n_sym))
|
|
50
50
|
self.adpr_w = adpr_w
|
|
51
|
+
self.occr_w = 1.
|
|
51
52
|
self.unrestrained = unrestrained
|
|
52
53
|
if shake_rms > 0:
|
|
53
54
|
numpy.random.seed(0)
|
|
54
55
|
utils.model.shake_structure(self.st, shake_rms, copy=False)
|
|
55
|
-
utils.fileio.write_model(self.st, "shaken", pdb=True, cif=True)
|
|
56
|
+
#utils.fileio.write_model(self.st, "shaken", pdb=True, cif=True)
|
|
57
|
+
self.use_nucleus = use_nucleus
|
|
58
|
+
self.calc_kwds = {"use_nucleus": self.use_nucleus}
|
|
59
|
+
if params is None:
|
|
60
|
+
params = {}
|
|
61
|
+
exte.read_external_restraints(params.get("exte", []), self.st, self.geom)
|
|
62
|
+
for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw", "wncs"):
|
|
63
|
+
if k in params:
|
|
64
|
+
self.calc_kwds[k] = params[k]
|
|
65
|
+
logger.writeln("setting geometry weight {}= {}".format(k, params[k]))
|
|
66
|
+
inc_tors, exc_tors = utils.restraints.make_torsion_rules(params.get("restr", {}))
|
|
67
|
+
rtors = utils.restraints.select_restrained_torsions(monlib, inc_tors, exc_tors)
|
|
68
|
+
self.geom.mon_tors_names = rtors["monomer"]
|
|
69
|
+
self.geom.link_tors_names = rtors["link"]
|
|
70
|
+
self.group_occ = GroupOccupancy(self.st, params.get("occu"))
|
|
56
71
|
if not self.unrestrained:
|
|
57
72
|
self.geom.load_topo(topo)
|
|
58
73
|
self.check_chemtypes(os.path.join(monlib.path(), "ener_lib.cif"), topo)
|
|
59
|
-
self.use_nucleus = use_nucleus
|
|
60
|
-
self.calc_kwds = {"use_nucleus": self.use_nucleus}
|
|
61
|
-
if refmac_keywords:
|
|
62
|
-
exte.read_external_restraints(refmac_keywords, self.st, self.geom)
|
|
63
|
-
kwds = parse_keywords(refmac_keywords)
|
|
64
|
-
for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw", "wncs"):
|
|
65
|
-
if k in kwds:
|
|
66
|
-
self.calc_kwds[k] = kwds[k]
|
|
67
|
-
logger.writeln("setting geometry weight {}= {}".format(k, kwds[k]))
|
|
68
|
-
self.group_occ = GroupOccupancy(self.st, kwds.get("occu"))
|
|
69
|
-
else:
|
|
70
|
-
self.group_occ = GroupOccupancy(self.st, None)
|
|
71
74
|
self.geom.finalize_restraints()
|
|
72
75
|
self.outlier_sigmas = dict(bond=5, angle=5, torsion=5, vdw=5, ncs=5, chir=5, plane=5, staca=5, stacd=5, per_atom=5)
|
|
73
76
|
self.parents = {}
|
|
@@ -104,20 +107,28 @@ class Geom:
|
|
|
104
107
|
return self.geom.calc(check_only=target_only, **self.calc_kwds)
|
|
105
108
|
def calc_adp_restraint(self, target_only):
|
|
106
109
|
return self.geom.calc_adp_restraint(target_only, self.adpr_w)
|
|
107
|
-
def
|
|
110
|
+
def calc_occ_restraint(self, target_only):
|
|
111
|
+
return self.geom.calc_occ_restraint(target_only, self.occr_w)
|
|
112
|
+
def calc_target(self, target_only, refine_xyz, adp_mode, use_occr):
|
|
108
113
|
self.geom.clear_target()
|
|
109
114
|
geom_x = self.calc(target_only) if refine_xyz else 0
|
|
110
115
|
geom_a = self.calc_adp_restraint(target_only) if adp_mode > 0 else 0
|
|
116
|
+
geom_q = self.calc_occ_restraint(target_only) if use_occr > 0 else 0
|
|
111
117
|
logger.writeln(" geom_x = {}".format(geom_x))
|
|
112
118
|
logger.writeln(" geom_a = {}".format(geom_a))
|
|
113
|
-
|
|
119
|
+
logger.writeln(" geom_q = {}".format(geom_q))
|
|
120
|
+
geom = geom_x + geom_a + geom_q
|
|
114
121
|
if not target_only:
|
|
115
122
|
self.geom.spec_correction()
|
|
116
123
|
return geom
|
|
117
124
|
|
|
118
|
-
def show_model_stats(self, show_outliers=True):
|
|
119
|
-
|
|
120
|
-
|
|
125
|
+
def show_model_stats(self, refine_xyz=True, adp_mode=1, use_occr=False, show_outliers=True):
|
|
126
|
+
if refine_xyz:
|
|
127
|
+
self.calc(True)
|
|
128
|
+
if adp_mode > 0:
|
|
129
|
+
self.calc_adp_restraint(True)
|
|
130
|
+
if use_occr:
|
|
131
|
+
self.calc_occ_restraint(True)
|
|
121
132
|
ret = {"outliers": {}}
|
|
122
133
|
if show_outliers:
|
|
123
134
|
get_table = dict(bond=self.geom.reporting.get_bond_outliers,
|
|
@@ -184,10 +195,12 @@ class Geom:
|
|
|
184
195
|
def show_binstats(df, cycle_number):
|
|
185
196
|
forplot = []
|
|
186
197
|
rlabs = [x for x in df if x.startswith("R")]
|
|
198
|
+
fsclabs = [x for x in df if x.startswith("fsc")]
|
|
187
199
|
cclabs = [x for x in df if x.startswith("CC")]
|
|
188
200
|
dlabs = [x for x in df if re.search("^D[0-9]*", x)]
|
|
189
201
|
if "fsc_model" in df: forplot.append(["FSC", ["fsc_model"]])
|
|
190
202
|
if rlabs: forplot.append(["R", rlabs])
|
|
203
|
+
if fsclabs: forplot.append(["FSC", fsclabs])
|
|
191
204
|
if cclabs: forplot.append(["CC", cclabs])
|
|
192
205
|
if dlabs: forplot.append(["ML parameters - D", dlabs])
|
|
193
206
|
if "S" in df: forplot.append(["ML parameters - Sigma", ["S"]])
|
|
@@ -197,6 +210,29 @@ def show_binstats(df, cycle_number):
|
|
|
197
210
|
logger.writeln(lstr)
|
|
198
211
|
# show_binstats()
|
|
199
212
|
|
|
213
|
+
def convert_stats_to_dicts(stats):
|
|
214
|
+
tmp = []
|
|
215
|
+
for s in stats: # stats must be a list of dict
|
|
216
|
+
tmp.append({})
|
|
217
|
+
for k in s:
|
|
218
|
+
if k == "geom":
|
|
219
|
+
tmp[-1]["geom"] = {"summary": s["geom"]["summary"].to_dict()}
|
|
220
|
+
for kk in s["geom"]["outliers"]:
|
|
221
|
+
tmp[-1]["geom"].setdefault("outliers", {})[kk] = s["geom"]["outliers"][kk].to_dict(orient="records")
|
|
222
|
+
else:
|
|
223
|
+
tmp[-1][k] = s[k]
|
|
224
|
+
return tmp
|
|
225
|
+
# convert_stats_to_dicts()
|
|
226
|
+
|
|
227
|
+
def write_stats_json_safe(stats, json_out):
|
|
228
|
+
tmp = convert_stats_to_dicts(stats)
|
|
229
|
+
out_tmp = json_out + ".part"
|
|
230
|
+
with open(out_tmp, "w") as ofs:
|
|
231
|
+
json.dump(tmp, ofs, indent=2)
|
|
232
|
+
os.replace(out_tmp, json_out)
|
|
233
|
+
logger.writeln(f"Refinement statistics saved: {json_out}")
|
|
234
|
+
# write_stats_json_safe()
|
|
235
|
+
|
|
200
236
|
class GroupOccupancy:
|
|
201
237
|
# TODO max may not be one. should check multiplicity
|
|
202
238
|
def __init__(self, st, params):
|
|
@@ -377,10 +413,11 @@ class GroupOccupancy:
|
|
|
377
413
|
|
|
378
414
|
class Refine:
|
|
379
415
|
def __init__(self, st, geom, ll=None, refine_xyz=True, adp_mode=1, refine_h=False, refine_occ=False,
|
|
380
|
-
unrestrained=False,
|
|
416
|
+
unrestrained=False, params=None):
|
|
381
417
|
assert adp_mode in (0, 1, 2) # 0=fix, 1=iso, 2=aniso
|
|
382
418
|
assert geom is not None
|
|
383
419
|
self.st = st # clone()?
|
|
420
|
+
self.st_traj = None
|
|
384
421
|
self.atoms = geom.atoms # not a copy
|
|
385
422
|
self.geom = geom
|
|
386
423
|
self.ll = ll
|
|
@@ -388,11 +425,15 @@ class Refine:
|
|
|
388
425
|
self.adp_mode = 0 if self.ll is None else adp_mode
|
|
389
426
|
self.refine_xyz = refine_xyz
|
|
390
427
|
self.refine_occ = refine_occ
|
|
428
|
+
self.use_occr = self.refine_occ # for now?
|
|
391
429
|
self.unrestrained = unrestrained
|
|
392
430
|
self.refine_h = refine_h
|
|
393
431
|
self.h_inherit_parent_adp = self.adp_mode > 0 and not self.refine_h and self.st[0].has_hydrogen()
|
|
394
432
|
if self.h_inherit_parent_adp:
|
|
395
433
|
self.geom.set_h_parents()
|
|
434
|
+
if params and params.get("write_trajectory"):
|
|
435
|
+
self.st_traj = self.st.clone()
|
|
436
|
+
self.st_traj[-1].name = "0"
|
|
396
437
|
assert self.geom.group_occ.groups or self.n_params() > 0
|
|
397
438
|
# __init__()
|
|
398
439
|
|
|
@@ -513,7 +554,7 @@ class Refine:
|
|
|
513
554
|
self.ll.update_fc()
|
|
514
555
|
|
|
515
556
|
self.geom.setup_nonbonded(self.refine_xyz) # if refine_xyz=False, no need to do it every time
|
|
516
|
-
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
|
|
557
|
+
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ, self.use_occr)
|
|
517
558
|
logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
|
|
518
559
|
|
|
519
560
|
def get_x(self):
|
|
@@ -540,7 +581,7 @@ class Refine:
|
|
|
540
581
|
N = self.n_params()
|
|
541
582
|
geom = self.geom.calc_target(target_only,
|
|
542
583
|
not self.unrestrained and self.refine_xyz,
|
|
543
|
-
self.adp_mode)
|
|
584
|
+
self.adp_mode, self.use_occr)
|
|
544
585
|
if self.ll is not None:
|
|
545
586
|
ll = self.ll.calc_target()
|
|
546
587
|
logger.writeln(" ll= {}".format(ll))
|
|
@@ -627,14 +668,17 @@ class Refine:
|
|
|
627
668
|
|
|
628
669
|
return ret, shift_scale, f1
|
|
629
670
|
|
|
630
|
-
def run_cycles(self, ncycles, weight=1, debug=False
|
|
671
|
+
def run_cycles(self, ncycles, weight=1, weight_adjust=False, debug=False,
|
|
672
|
+
weight_adjust_bond_rmsz_range=(0.5, 1.), stats_json_out=None):
|
|
631
673
|
self.print_weights()
|
|
632
674
|
stats = [{"Ncyc": 0}]
|
|
633
675
|
self.geom.setup_nonbonded(self.refine_xyz)
|
|
634
|
-
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
|
|
676
|
+
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ, self.use_occr)
|
|
635
677
|
logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
|
|
636
|
-
|
|
637
|
-
|
|
678
|
+
stats[-1]["geom"] = self.geom.show_model_stats(refine_xyz=self.refine_xyz and not self.unrestrained,
|
|
679
|
+
adp_mode=self.adp_mode,
|
|
680
|
+
use_occr=self.refine_occ,
|
|
681
|
+
show_outliers=True)
|
|
638
682
|
if self.ll is not None:
|
|
639
683
|
self.ll.update_fc()
|
|
640
684
|
self.ll.overall_scale()
|
|
@@ -645,18 +689,26 @@ class Refine:
|
|
|
645
689
|
show_binstats(llstats["bin_stats"], 0)
|
|
646
690
|
if self.adp_mode > 0:
|
|
647
691
|
utils.model.adp_analysis(self.st)
|
|
692
|
+
if stats_json_out:
|
|
693
|
+
write_stats_json_safe(stats, stats_json_out)
|
|
648
694
|
occ_refine_flag = self.ll is not None and self.geom.group_occ.groups and self.geom.group_occ.ncycle > 0
|
|
649
695
|
|
|
650
696
|
for i in range(ncycles):
|
|
651
697
|
logger.writeln("\n====== CYCLE {:2d} ======\n".format(i+1))
|
|
652
|
-
|
|
698
|
+
logger.writeln(f" weight = {weight:.4e}")
|
|
699
|
+
if self.refine_xyz or self.adp_mode > 0 or self.refine_occ:
|
|
653
700
|
is_ok, shift_scale, fval = self.run_cycle(weight=weight)
|
|
654
|
-
stats.append({"Ncyc": len(stats), "shift_scale": shift_scale, "fval": fval, "fval_decreased": is_ok
|
|
701
|
+
stats.append({"Ncyc": len(stats), "shift_scale": shift_scale, "fval": fval, "fval_decreased": is_ok,
|
|
702
|
+
"weight": weight})
|
|
703
|
+
elif occ_refine_flag:
|
|
704
|
+
stats.append({"Ncyc": len(stats)})
|
|
655
705
|
if occ_refine_flag:
|
|
656
706
|
stats[-1]["occ_refine"] = self.geom.group_occ.refine(self.ll, self.refine_h)
|
|
657
707
|
if debug: utils.fileio.write_model(self.st, "refined_{:02d}".format(i+1), pdb=True)#, cif=True)
|
|
658
|
-
|
|
659
|
-
|
|
708
|
+
stats[-1]["geom"] = self.geom.show_model_stats(refine_xyz=self.refine_xyz and not self.unrestrained,
|
|
709
|
+
adp_mode=self.adp_mode,
|
|
710
|
+
use_occr=self.refine_occ,
|
|
711
|
+
show_outliers=(i==ncycles-1))
|
|
660
712
|
if self.ll is not None:
|
|
661
713
|
self.ll.overall_scale()
|
|
662
714
|
f0 = self.ll.calc_target()
|
|
@@ -670,6 +722,22 @@ class Refine:
|
|
|
670
722
|
show_binstats(llstats["bin_stats"], i+1)
|
|
671
723
|
if self.adp_mode > 0:
|
|
672
724
|
utils.model.adp_analysis(self.st)
|
|
725
|
+
if (weight_adjust and self.refine_xyz and not self.unrestrained and self.ll is not None and
|
|
726
|
+
len(stats) > 2 and "Bond distances, non H" in stats[-1]["geom"]["summary"].index):
|
|
727
|
+
rmsz = stats[-1]["geom"]["summary"]["r.m.s.Z"]["Bond distances, non H"]
|
|
728
|
+
rmsz0 = stats[-2]["geom"]["summary"]["r.m.s.Z"]["Bond distances, non H"]
|
|
729
|
+
if rmsz > weight_adjust_bond_rmsz_range[1] and rmsz > rmsz0:
|
|
730
|
+
weight /= 1.1
|
|
731
|
+
elif rmsz < weight_adjust_bond_rmsz_range[0] and rmsz0 < weight_adjust_bond_rmsz_range[0] and rmsz < rmsz0:
|
|
732
|
+
weight *= 1.3
|
|
733
|
+
elif rmsz > 1.5 * rmsz0:
|
|
734
|
+
weight /= 1.1
|
|
735
|
+
if self.st_traj is not None:
|
|
736
|
+
self.st_traj.add_model(self.st[0])
|
|
737
|
+
self.st_traj[-1].name = str(i+1)
|
|
738
|
+
if stats_json_out:
|
|
739
|
+
write_stats_json_safe(stats, stats_json_out)
|
|
740
|
+
|
|
673
741
|
logger.writeln("")
|
|
674
742
|
|
|
675
743
|
# Make table
|
|
@@ -685,8 +753,8 @@ class Refine:
|
|
|
685
753
|
("r.m.s.Z", "Bond distances, non H", "zBOND"),
|
|
686
754
|
("r.m.s.d.", "Bond angles, non H", "rmsANGL"),
|
|
687
755
|
("r.m.s.Z", "Bond angles, non H", "zANGL")):
|
|
688
|
-
if k in d["geom"] and n in d["geom"][k]:
|
|
689
|
-
x[l] = d["geom"][k].get(n)
|
|
756
|
+
if k in d["geom"]["summary"] and n in d["geom"]["summary"][k]:
|
|
757
|
+
x[l] = d["geom"]["summary"][k].get(n)
|
|
690
758
|
geom_keys.add(l)
|
|
691
759
|
tmp.append(x)
|
|
692
760
|
df = pandas.DataFrame(tmp)
|
|
@@ -711,12 +779,20 @@ class Refine:
|
|
|
711
779
|
lstr = utils.make_loggraph_str(df, "stats vs cycle", forplot,
|
|
712
780
|
float_format="{:.4f}".format)
|
|
713
781
|
logger.writeln(lstr)
|
|
714
|
-
self.update_meta()
|
|
782
|
+
self.update_meta(stats[-1])
|
|
715
783
|
return stats
|
|
716
784
|
|
|
717
|
-
def update_meta(self):
|
|
785
|
+
def update_meta(self, stats):
|
|
718
786
|
# TODO write stats. probably geom.reporting.get_summary_table should return with _refine_ls_restr.type names
|
|
719
|
-
|
|
787
|
+
# should remove st.mod_residues?
|
|
788
|
+
self.st.helices.clear()
|
|
789
|
+
self.st.sheets.clear()
|
|
790
|
+
raw_remarks = [f'REMARK 3',
|
|
791
|
+
f'REMARK 3 REFINEMENT.',
|
|
792
|
+
f'REMARK 3 PROGRAM : SERVALCAT {servalcat.__version__}',
|
|
793
|
+
f'REMARK 3 AUTHORS : YAMASHITA,MURSHUDOV',
|
|
794
|
+
f'REMARK 3',
|
|
795
|
+
]
|
|
720
796
|
si = gemmi.SoftwareItem()
|
|
721
797
|
si.classification = gemmi.SoftwareItem.Classification.Refinement
|
|
722
798
|
si.name = "Servalcat"
|
|
@@ -724,10 +800,22 @@ class Refine:
|
|
|
724
800
|
si.date = servalcat.__date__
|
|
725
801
|
self.st.meta.software = [si]
|
|
726
802
|
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
803
|
+
ri = gemmi.RefinementInfo()
|
|
804
|
+
if "geom" in stats:
|
|
805
|
+
restr_stats = []
|
|
806
|
+
raw_remarks.append("REMARK 3 RMS DEVIATIONS FROM IDEAL VALUES COUNT RMS WEIGHT")
|
|
807
|
+
for k, n, l, pl in (("r.m.s.d.", "Bond distances, non H", "s_bond_nonh_d", "BOND LENGTHS REFINED ATOMS (A)"),
|
|
808
|
+
("r.m.s.d.", "Bond angles, non H", "s_angle_nonh_d", "BOND ANGLES REFINED ATOMS (DEGREES)")):
|
|
809
|
+
if k in stats["geom"]["summary"] and n in stats["geom"]["summary"][k]:
|
|
810
|
+
rr = gemmi.RefinementInfo.Restr(l)
|
|
811
|
+
rr.dev_ideal = stats["geom"]["summary"][k].get(n)
|
|
812
|
+
rr.count = stats["geom"]["summary"]["N restraints"].get(n)
|
|
813
|
+
rr.weight = stats["geom"]["summary"]["Mn(sigma)"].get(n)
|
|
814
|
+
restr_stats.append(rr)
|
|
815
|
+
raw_remarks.append(f"REMARK 3 {pl}:{rr.count:6d} ;{rr.dev_ideal:6.3f} ;{rr.weight:6.3f}")
|
|
816
|
+
ri.restr_stats = restr_stats
|
|
817
|
+
raw_remarks.append("REMARK 3")
|
|
818
|
+
self.st.meta.refinement = [ri]
|
|
819
|
+
self.st.raw_remarks = raw_remarks
|
|
820
|
+
|
|
733
821
|
# class Refine
|
servalcat/refine/refine_geom.py
CHANGED
|
@@ -14,7 +14,8 @@ import json
|
|
|
14
14
|
import servalcat # for version
|
|
15
15
|
from servalcat.utils import logger
|
|
16
16
|
from servalcat import utils
|
|
17
|
-
from servalcat.refine.refine import Geom, Refine
|
|
17
|
+
from servalcat.refine.refine import Geom, Refine, convert_stats_to_dicts
|
|
18
|
+
from servalcat.refmac import refmac_keywords
|
|
18
19
|
|
|
19
20
|
def add_arguments(parser):
|
|
20
21
|
group = parser.add_mutually_exclusive_group(required=True)
|
|
@@ -53,24 +54,32 @@ def parse_args(arg_list):
|
|
|
53
54
|
# parse_args()
|
|
54
55
|
|
|
55
56
|
def add_program_info_to_dictionary(block, comp_id, program_name="servalcat", descriptor="optimization tool"):
|
|
56
|
-
|
|
57
|
-
#
|
|
58
|
-
for
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
57
|
+
# old acedrg used _pdbx_chem_comp_description_generator. and descriptor
|
|
58
|
+
# new acedrg (>280?) uses _acedrg_chem_comp_descriptor. and type
|
|
59
|
+
for tag, name in (("_acedrg_chem_comp_descriptor.", "type"),
|
|
60
|
+
("_pdbx_chem_comp_description_generator.", "descriptor")):
|
|
61
|
+
tab = block.find(tag, ["program_name", "program_version", name])
|
|
62
|
+
if tab:
|
|
63
|
+
loop = tab.loop
|
|
64
|
+
# just overwrite version if it's there
|
|
65
|
+
for row in tab:
|
|
66
|
+
if row.str(0) == program_name and row.str(2) == descriptor:
|
|
67
|
+
row[1] = gemmi.cif.quote(servalcat.__version__)
|
|
68
|
+
return
|
|
69
|
+
break
|
|
70
|
+
else:
|
|
71
|
+
# it may be strange to say _acedrg in this case..
|
|
72
|
+
name = "type"
|
|
73
|
+
loop = block.init_loop("_acedrg_chem_comp_descriptor.", ["comp_id",
|
|
74
|
+
"program_name",
|
|
75
|
+
"program_version",
|
|
76
|
+
name])
|
|
68
77
|
tags = [x[x.index(".")+1:] for x in loop.tags]
|
|
69
78
|
row = ["" for _ in range(len(tags))]
|
|
70
79
|
for tag, val in (("comp_id", comp_id),
|
|
71
80
|
("program_name", program_name),
|
|
72
81
|
("program_version", servalcat.__version__),
|
|
73
|
-
(
|
|
82
|
+
(name, descriptor)):
|
|
74
83
|
if tag in tags: row[tags.index(tag)] = val
|
|
75
84
|
loop.add_row(gemmi.cif.quote_list(row))
|
|
76
85
|
# add_program_info_to_dictionary()
|
|
@@ -122,18 +131,16 @@ def refine_and_update_dictionary(cif_in, monomer_dir, output_prefix, randomize=0
|
|
|
122
131
|
doc.write_file(output_prefix + "_updated.cif", style=gemmi.cif.Style.Aligned)
|
|
123
132
|
logger.writeln("Updated dictionary saved: {}".format(output_prefix + "_updated.cif"))
|
|
124
133
|
with open(output_prefix + "_stats.json", "w") as ofs:
|
|
125
|
-
for
|
|
126
|
-
|
|
127
|
-
s["geom"] = s["geom"].to_dict()
|
|
128
|
-
json.dump(all_stats, ofs, indent=2)
|
|
134
|
+
json.dump([convert_stats_to_dicts(x) for x in all_stats],
|
|
135
|
+
ofs, indent=2)
|
|
129
136
|
logger.writeln("Refinement statistics saved: {}".format(ofs.name))
|
|
130
137
|
# refine_and_update_dictionary()
|
|
131
138
|
|
|
132
|
-
def refine_geom(model_in, monomer_dir, cif_files, h_change, ncycle, output_prefix, randomize,
|
|
139
|
+
def refine_geom(model_in, monomer_dir, cif_files, h_change, ncycle, output_prefix, randomize, params,
|
|
133
140
|
find_links=False, use_ncsr=False):
|
|
134
141
|
st = utils.fileio.read_structure(model_in)
|
|
135
142
|
utils.model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
|
|
136
|
-
if st.ncs:
|
|
143
|
+
if not all(op.given for op in st.ncs):
|
|
137
144
|
st2 = st.clone()
|
|
138
145
|
logger.writeln("Take NCS constraints into account.")
|
|
139
146
|
st2.expand_ncs(gemmi.HowToNameCopiedChain.Dup, merge_dist=0)
|
|
@@ -141,27 +148,26 @@ def refine_geom(model_in, monomer_dir, cif_files, h_change, ncycle, output_prefi
|
|
|
141
148
|
|
|
142
149
|
monlib = utils.restraints.load_monomer_library(st, monomer_dir=monomer_dir,
|
|
143
150
|
cif_files=cif_files,
|
|
144
|
-
stop_for_unknowns=True
|
|
151
|
+
stop_for_unknowns=True,
|
|
152
|
+
params=params)
|
|
145
153
|
utils.restraints.find_and_fix_links(st, monlib, add_found=find_links) # should remove unknown id here?
|
|
146
154
|
try:
|
|
147
|
-
topo,
|
|
148
|
-
|
|
155
|
+
topo, _ = utils.restraints.prepare_topology(st, monlib, h_change=h_change,
|
|
156
|
+
check_hydrogen=(h_change==gemmi.HydrogenChange.NoChange),
|
|
157
|
+
params=params)
|
|
149
158
|
except RuntimeError as e:
|
|
150
159
|
raise SystemExit("Error: {}".format(e))
|
|
151
|
-
|
|
160
|
+
|
|
152
161
|
if use_ncsr:
|
|
153
162
|
ncslist = utils.restraints.prepare_ncs_restraints(st)
|
|
154
163
|
else:
|
|
155
164
|
ncslist = False
|
|
156
|
-
geom = Geom(st, topo, monlib, shake_rms=randomize,
|
|
165
|
+
geom = Geom(st, topo, monlib, shake_rms=randomize, params=params, ncslist=ncslist)
|
|
157
166
|
refiner = Refine(st, geom)
|
|
158
|
-
stats = refiner.run_cycles(ncycle
|
|
167
|
+
stats = refiner.run_cycles(ncycle,
|
|
168
|
+
stats_json_out=output_prefix + "_stats.json")
|
|
159
169
|
refiner.st.name = output_prefix
|
|
160
170
|
utils.fileio.write_model(refiner.st, output_prefix, pdb=True, cif=True)
|
|
161
|
-
with open(output_prefix + "_stats.json", "w") as ofs:
|
|
162
|
-
for s in stats: s["geom"] = s["geom"].to_dict()
|
|
163
|
-
json.dump(stats, ofs, indent=2)
|
|
164
|
-
logger.writeln("Refinement statistics saved: {}".format(ofs.name))
|
|
165
171
|
# refine_geom()
|
|
166
172
|
|
|
167
173
|
def main(args):
|
|
@@ -170,6 +176,7 @@ def main(args):
|
|
|
170
176
|
if args.keyword_file: keywords.extend(l for f in sum(args.keyword_file, []) for l in open(f))
|
|
171
177
|
decide_prefix = lambda f: utils.fileio.splitext(os.path.basename(f))[0] + "_refined"
|
|
172
178
|
if args.model:
|
|
179
|
+
params = refmac_keywords.parse_keywords(keywords)
|
|
173
180
|
if not args.output_prefix:
|
|
174
181
|
args.output_prefix = decide_prefix(args.model)
|
|
175
182
|
if args.ligand:
|
|
@@ -185,7 +192,7 @@ def main(args):
|
|
|
185
192
|
ncycle=args.ncycle,
|
|
186
193
|
output_prefix=args.output_prefix,
|
|
187
194
|
randomize=args.randomize,
|
|
188
|
-
|
|
195
|
+
params=params,
|
|
189
196
|
find_links=args.find_links,
|
|
190
197
|
use_ncsr=args.ncsr)
|
|
191
198
|
else:
|
servalcat/refine/refine_spa.py
CHANGED
|
@@ -8,7 +8,6 @@ Mozilla Public License, version 2.0; see LICENSE.
|
|
|
8
8
|
from __future__ import absolute_import, division, print_function, generators
|
|
9
9
|
import gemmi
|
|
10
10
|
import argparse
|
|
11
|
-
import json
|
|
12
11
|
import numpy
|
|
13
12
|
from servalcat.utils import logger
|
|
14
13
|
from servalcat import utils
|
|
@@ -16,6 +15,7 @@ from servalcat.spa.run_refmac import check_args, process_input, calc_fsc, calc_f
|
|
|
16
15
|
from servalcat.spa import fofc
|
|
17
16
|
from servalcat.refine import spa
|
|
18
17
|
from servalcat.refine.refine import Geom, Refine
|
|
18
|
+
from servalcat.refmac import refmac_keywords
|
|
19
19
|
b_to_u = utils.model.b_to_u
|
|
20
20
|
|
|
21
21
|
def add_arguments(parser):
|
|
@@ -56,6 +56,7 @@ def add_arguments(parser):
|
|
|
56
56
|
parser.add_argument('--hydrogen', default="all", choices=["all", "yes", "no"],
|
|
57
57
|
help="all: add riding hydrogen atoms, yes: use hydrogen atoms if present, no: remove hydrogen atoms in input. "
|
|
58
58
|
"Default: %(default)s")
|
|
59
|
+
parser.add_argument('--hout', action='store_true', help="write hydrogen atoms in the output model")
|
|
59
60
|
parser.add_argument('--jellybody', action='store_true',
|
|
60
61
|
help="Use jelly body restraints")
|
|
61
62
|
parser.add_argument('--jellybody_params', nargs=2, type=float,
|
|
@@ -85,6 +86,10 @@ def add_arguments(parser):
|
|
|
85
86
|
help="number of CG cycles (default: %(default)d)")
|
|
86
87
|
parser.add_argument('--weight', type=float,
|
|
87
88
|
help="refinement weight. default: automatic")
|
|
89
|
+
parser.add_argument('--no_weight_adjust', action='store_true',
|
|
90
|
+
help='Do not adjust weight during refinement')
|
|
91
|
+
parser.add_argument('--target_bond_rmsz_range', nargs=2, type=float, default=[0.5, 1.],
|
|
92
|
+
help='Bond rmsz range for weight adjustment (default: %(default)s)')
|
|
88
93
|
parser.add_argument('--adpr_weight', type=float, default=1.,
|
|
89
94
|
help="ADP restraint weight in B (default: %(default)f)")
|
|
90
95
|
parser.add_argument('--ncsr', action='store_true',
|
|
@@ -93,6 +98,7 @@ def add_arguments(parser):
|
|
|
93
98
|
help="reset all atomic B values to specified value")
|
|
94
99
|
parser.add_argument('--fix_xyz', action="store_true")
|
|
95
100
|
parser.add_argument('--adp', choices=["fix", "iso", "aniso"], default="iso")
|
|
101
|
+
parser.add_argument('--refine_all_occ', action="store_true")
|
|
96
102
|
parser.add_argument('--max_dist_for_adp_restraint', type=float, default=4.)
|
|
97
103
|
parser.add_argument('--adp_restraint_power', type=float)
|
|
98
104
|
parser.add_argument('--adp_restraint_exp_fac', type=float)
|
|
@@ -112,6 +118,8 @@ def add_arguments(parser):
|
|
|
112
118
|
help="Use scattering factor for charged atoms. Use it with care.")
|
|
113
119
|
parser.add_argument("--keep_entities", action='store_true',
|
|
114
120
|
help="Do not override entities")
|
|
121
|
+
parser.add_argument("--write_trajectory", action='store_true',
|
|
122
|
+
help="Write all output from cycles")
|
|
115
123
|
# add_arguments()
|
|
116
124
|
|
|
117
125
|
def parse_args(arg_list):
|
|
@@ -125,13 +133,14 @@ def main(args):
|
|
|
125
133
|
args.invert_mask = False
|
|
126
134
|
args.trim_fofc_mtz = args.mask_for_fofc is not None
|
|
127
135
|
args.cross_validation_method = "throughout"
|
|
128
|
-
check_args(args)
|
|
129
|
-
|
|
136
|
+
check_args(args)
|
|
137
|
+
params = refmac_keywords.parse_keywords(args.keywords + [l for f in args.keyword_file for l in open(f)])
|
|
138
|
+
params["write_trajectory"] = args.write_trajectory
|
|
130
139
|
|
|
131
140
|
st = utils.fileio.read_structure(args.model)
|
|
132
141
|
try:
|
|
133
142
|
monlib = utils.restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
|
|
134
|
-
stop_for_unknowns=True)
|
|
143
|
+
stop_for_unknowns=True, params=params)
|
|
135
144
|
except RuntimeError as e:
|
|
136
145
|
raise SystemExit("Error: {}".format(e))
|
|
137
146
|
if not args.keep_entities:
|
|
@@ -165,14 +174,14 @@ def main(args):
|
|
|
165
174
|
"yes":gemmi.HydrogenChange.NoChange,
|
|
166
175
|
"no":gemmi.HydrogenChange.Remove}[args.hydrogen]
|
|
167
176
|
try:
|
|
168
|
-
topo,
|
|
169
|
-
|
|
177
|
+
topo, _ = utils.restraints.prepare_topology(st, monlib, h_change=h_change,
|
|
178
|
+
check_hydrogen=(args.hydrogen=="yes"),
|
|
179
|
+
params=params)
|
|
170
180
|
except RuntimeError as e:
|
|
171
181
|
raise SystemExit("Error: {}".format(e))
|
|
172
|
-
|
|
182
|
+
|
|
173
183
|
# initialize ADP
|
|
174
|
-
|
|
175
|
-
utils.model.reset_adp(st[0], args.bfactor, args.adp == "aniso")
|
|
184
|
+
utils.model.reset_adp(st[0], args.bfactor, args.adp)
|
|
176
185
|
|
|
177
186
|
# auto weight
|
|
178
187
|
if args.weight is None:
|
|
@@ -200,7 +209,7 @@ def main(args):
|
|
|
200
209
|
else:
|
|
201
210
|
ncslist = False
|
|
202
211
|
geom = Geom(st, topo, monlib, shake_rms=args.randomize, adpr_w=args.adpr_weight,
|
|
203
|
-
|
|
212
|
+
params=params, unrestrained=args.jellyonly,
|
|
204
213
|
ncslist=ncslist)
|
|
205
214
|
ll = spa.LL_SPA(hkldata, st, monlib,
|
|
206
215
|
lab_obs="F_map1" if args.cross_validation else "FP",
|
|
@@ -209,7 +218,8 @@ def main(args):
|
|
|
209
218
|
refine_xyz=not args.fix_xyz,
|
|
210
219
|
adp_mode=dict(fix=0, iso=1, aniso=2)[args.adp],
|
|
211
220
|
refine_h=args.refine_h,
|
|
212
|
-
|
|
221
|
+
params=params,
|
|
222
|
+
refine_occ=args.refine_all_occ)
|
|
213
223
|
|
|
214
224
|
geom.geom.adpr_max_dist = args.max_dist_for_adp_restraint
|
|
215
225
|
if args.adp_restraint_power is not None: geom.geom.adpr_d_power = args.adp_restraint_power
|
|
@@ -223,19 +233,18 @@ def main(args):
|
|
|
223
233
|
#for cra in st[0].all():
|
|
224
234
|
# cra.atom.pos += gemmi.Position(0.3,0,0)
|
|
225
235
|
|
|
226
|
-
stats = refiner.run_cycles(args.ncycle, weight=args.weight
|
|
236
|
+
stats = refiner.run_cycles(args.ncycle, weight=args.weight,
|
|
237
|
+
weight_adjust=not args.no_weight_adjust,
|
|
238
|
+
weight_adjust_bond_rmsz_range=args.target_bond_rmsz_range,
|
|
239
|
+
stats_json_out=args.output_prefix + "_stats.json")
|
|
227
240
|
if not args.hklin and not args.no_trim:
|
|
228
241
|
refiner.st.cell = maps[0][0].unit_cell
|
|
229
242
|
refiner.st.setup_cell_images()
|
|
230
243
|
|
|
231
244
|
refiner.st.name = args.output_prefix
|
|
232
|
-
utils.fileio.write_model(refiner.st, args.output_prefix, pdb=True, cif=True)
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
if "geom" in s: s["geom"] = s["geom"].to_dict()
|
|
236
|
-
json.dump(stats, ofs, indent=2)
|
|
237
|
-
logger.writeln("Refinement statistics saved: {}".format(ofs.name))
|
|
238
|
-
|
|
245
|
+
utils.fileio.write_model(refiner.st, args.output_prefix, pdb=True, cif=True, hout=args.hout)
|
|
246
|
+
if params["write_trajectory"]:
|
|
247
|
+
utils.fileio.write_model(refiner.st_traj, args.output_prefix + "_traj", cif=True)
|
|
239
248
|
if args.hklin:
|
|
240
249
|
return
|
|
241
250
|
|
|
@@ -243,7 +252,7 @@ def main(args):
|
|
|
243
252
|
st_expanded = refiner.st.clone()
|
|
244
253
|
if not all(op.given for op in st.ncs):
|
|
245
254
|
utils.model.expand_ncs(st_expanded)
|
|
246
|
-
utils.fileio.write_model(st_expanded, args.output_prefix+"_expanded", pdb=True, cif=True)
|
|
255
|
+
utils.fileio.write_model(st_expanded, args.output_prefix+"_expanded", pdb=True, cif=True, hout=args.hout)
|
|
247
256
|
|
|
248
257
|
# Calc FSC
|
|
249
258
|
mask = utils.fileio.read_ccp4_map(args.mask)[0] if args.mask else None
|
|
@@ -252,7 +261,7 @@ def main(args):
|
|
|
252
261
|
soft_edge=args.mask_soft_edge,
|
|
253
262
|
b_before_mask=args.b_before_mask,
|
|
254
263
|
no_sharpen_before_mask=args.no_sharpen_before_mask,
|
|
255
|
-
make_hydrogen=
|
|
264
|
+
make_hydrogen="yes", # no change needed in the model
|
|
256
265
|
monlib=monlib,
|
|
257
266
|
blur=args.blur,
|
|
258
267
|
d_min_fsc=args.fsc_resolution,
|
|
@@ -273,9 +282,9 @@ def main(args):
|
|
|
273
282
|
adpstats_txt += " Chain {0:{1}s}".format(chain, max_chain_len) if chain!="*" else " {0:{1}s}".format("All", max_chain_len+6)
|
|
274
283
|
adpstats_txt += " ({0:{1}d} atoms) min={2:5.1f} median={3:5.1f} max={4:5.1f} A^2\n".format(natoms, max_num_len, qs[0],qs[2],qs[4])
|
|
275
284
|
|
|
276
|
-
if "geom" in stats[-1]:
|
|
277
|
-
rmsbond = stats[-1]["geom"]["r.m.s.d."]["Bond distances, non H"]
|
|
278
|
-
rmsangle = stats[-1]["geom"]["r.m.s.d."]["Bond angles, non H"]
|
|
285
|
+
if "geom" in stats[-1] and "Bond distances, non H" in stats[-1]["geom"]["summary"].index:
|
|
286
|
+
rmsbond = stats[-1]["geom"]["summary"]["r.m.s.d."]["Bond distances, non H"]
|
|
287
|
+
rmsangle = stats[-1]["geom"]["summary"]["r.m.s.d."]["Bond angles, non H"]
|
|
279
288
|
else:
|
|
280
289
|
rmsbond, rmsangle = numpy.nan, numpy.nan
|
|
281
290
|
if args.mask_for_fofc:
|