servalcat 0.4.60__cp38-cp38-win_amd64.whl → 0.4.88__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of servalcat might be problematic. Click here for more details.
- servalcat/__init__.py +2 -2
- servalcat/ext.cp38-win_amd64.pyd +0 -0
- servalcat/refine/refine.py +156 -66
- servalcat/refine/refine_geom.py +46 -33
- servalcat/refine/refine_spa.py +53 -35
- servalcat/refine/refine_xtal.py +51 -25
- servalcat/refine/spa.py +6 -2
- servalcat/refine/xtal.py +143 -97
- servalcat/refmac/exte.py +11 -11
- servalcat/refmac/refmac_keywords.py +134 -31
- servalcat/refmac/refmac_wrapper.py +107 -64
- servalcat/spa/fofc.py +12 -1
- servalcat/spa/fsc.py +3 -1
- servalcat/spa/run_refmac.py +19 -8
- servalcat/utils/commands.py +126 -6
- servalcat/utils/fileio.py +16 -24
- servalcat/utils/hkl.py +25 -12
- servalcat/utils/maps.py +10 -5
- servalcat/utils/model.py +25 -17
- servalcat/utils/refmac.py +28 -15
- servalcat/utils/restraints.py +192 -15
- servalcat/xtal/french_wilson.py +34 -28
- servalcat/xtal/sigmaa.py +358 -139
- servalcat/xtal/twin.py +115 -0
- {servalcat-0.4.60.dist-info → servalcat-0.4.88.dist-info}/METADATA +3 -4
- servalcat-0.4.88.dist-info/RECORD +45 -0
- {servalcat-0.4.60.dist-info → servalcat-0.4.88.dist-info}/WHEEL +1 -1
- servalcat-0.4.60.dist-info/RECORD +0 -44
- {servalcat-0.4.60.dist-info → servalcat-0.4.88.dist-info}/entry_points.txt +0 -0
- {servalcat-0.4.60.dist-info → servalcat-0.4.88.dist-info}/licenses/LICENSE +0 -0
servalcat/__init__.py
CHANGED
servalcat/ext.cp38-win_amd64.pyd
CHANGED
|
Binary file
|
servalcat/refine/refine.py
CHANGED
|
@@ -10,13 +10,13 @@ import os
|
|
|
10
10
|
import re
|
|
11
11
|
import gemmi
|
|
12
12
|
import numpy
|
|
13
|
+
import json
|
|
13
14
|
import pandas
|
|
14
15
|
import scipy.sparse
|
|
15
16
|
import servalcat # for version
|
|
16
17
|
from servalcat.utils import logger
|
|
17
18
|
from servalcat import utils
|
|
18
19
|
from servalcat.refmac import exte
|
|
19
|
-
from servalcat.refmac.refmac_keywords import parse_keywords
|
|
20
20
|
from servalcat import ext
|
|
21
21
|
from . import cgsolve
|
|
22
22
|
u_to_b = utils.model.u_to_b
|
|
@@ -29,7 +29,7 @@ b_to_u = utils.model.b_to_u
|
|
|
29
29
|
|
|
30
30
|
class Geom:
|
|
31
31
|
def __init__(self, st, topo, monlib, adpr_w=1, shake_rms=0,
|
|
32
|
-
|
|
32
|
+
params=None, unrestrained=False, use_nucleus=False,
|
|
33
33
|
ncslist=None, atom_pos=None):
|
|
34
34
|
self.st = st
|
|
35
35
|
self.atoms = [None for _ in range(self.st[0].count_atom_sites())]
|
|
@@ -40,7 +40,12 @@ class Geom:
|
|
|
40
40
|
self.atom_pos = list(range(len(self.atoms)))
|
|
41
41
|
self.n_refine_atoms = max(self.atom_pos) + 1
|
|
42
42
|
self.lookup = {x.atom: x for x in self.st[0].all()}
|
|
43
|
-
|
|
43
|
+
try:
|
|
44
|
+
self.geom = ext.Geometry(self.st, self.atom_pos, monlib.ener_lib)
|
|
45
|
+
except TypeError as e:
|
|
46
|
+
raise SystemExit(f"An error occurred while creating the Geometry object:\n{e}\n\n"
|
|
47
|
+
"This likely indicates an installation issue. "
|
|
48
|
+
"Please verify that you have the correct version of gemmi installed and that both gemmi and servalcat were compiled in the same environment.")
|
|
44
49
|
self.specs = utils.model.find_special_positions(self.st)
|
|
45
50
|
#cs_count = len(self.st.find_spacegroup().operations())
|
|
46
51
|
for atom, images, matp, mata in self.specs:
|
|
@@ -48,45 +53,34 @@ class Geom:
|
|
|
48
53
|
n_sym = len(images) + 1
|
|
49
54
|
self.geom.specials.append(ext.Geometry.Special(atom, matp, mata, n_sym))
|
|
50
55
|
self.adpr_w = adpr_w
|
|
56
|
+
self.occr_w = 1.
|
|
51
57
|
self.unrestrained = unrestrained
|
|
52
58
|
if shake_rms > 0:
|
|
53
59
|
numpy.random.seed(0)
|
|
54
60
|
utils.model.shake_structure(self.st, shake_rms, copy=False)
|
|
55
|
-
utils.fileio.write_model(self.st, "shaken", pdb=True, cif=True)
|
|
56
|
-
if not self.unrestrained:
|
|
57
|
-
self.geom.load_topo(topo)
|
|
58
|
-
self.check_chemtypes(os.path.join(monlib.path(), "ener_lib.cif"), topo)
|
|
61
|
+
#utils.fileio.write_model(self.st, "shaken", pdb=True, cif=True)
|
|
59
62
|
self.use_nucleus = use_nucleus
|
|
60
63
|
self.calc_kwds = {"use_nucleus": self.use_nucleus}
|
|
61
|
-
if
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
64
|
+
if params is None:
|
|
65
|
+
params = {}
|
|
66
|
+
for k in ("wbond", "wangle", "wtors", "wplane", "wchir", "wvdw", "wncs"):
|
|
67
|
+
if k in params:
|
|
68
|
+
self.calc_kwds[k] = params[k]
|
|
69
|
+
logger.writeln("setting geometry weight {}= {}".format(k, params[k]))
|
|
70
|
+
inc_tors, exc_tors = utils.restraints.make_torsion_rules(params.get("restr", {}))
|
|
71
|
+
rtors = utils.restraints.select_restrained_torsions(monlib, inc_tors, exc_tors)
|
|
72
|
+
self.geom.mon_tors_names = rtors["monomer"]
|
|
73
|
+
self.geom.link_tors_names = rtors["link"]
|
|
74
|
+
self.group_occ = GroupOccupancy(self.st, params.get("occu"))
|
|
75
|
+
if not self.unrestrained:
|
|
76
|
+
self.geom.load_topo(topo)
|
|
77
|
+
exte.read_external_restraints(params.get("exte", []), self.st, self.geom)
|
|
71
78
|
self.geom.finalize_restraints()
|
|
72
79
|
self.outlier_sigmas = dict(bond=5, angle=5, torsion=5, vdw=5, ncs=5, chir=5, plane=5, staca=5, stacd=5, per_atom=5)
|
|
73
80
|
self.parents = {}
|
|
74
81
|
self.ncslist = ncslist
|
|
75
82
|
# __init__()
|
|
76
83
|
|
|
77
|
-
def check_chemtypes(self, enerlib_path, topo):
|
|
78
|
-
block = gemmi.cif.read(enerlib_path).sole_block()
|
|
79
|
-
all_types = set(block.find_values("_lib_atom.type"))
|
|
80
|
-
for ci in topo.chain_infos:
|
|
81
|
-
for ri in ci.res_infos:
|
|
82
|
-
cc_all = {x: ri.get_final_chemcomp(x) for x in set(a.altloc for a in ri.res)}
|
|
83
|
-
for a in ri.res:
|
|
84
|
-
cca = cc_all[a.altloc].find_atom(a.name)
|
|
85
|
-
if cca is None: # I believe it won't happen..
|
|
86
|
-
logger.writeln("WARNING: restraint for {} not found.".format(self.lookup[a]))
|
|
87
|
-
elif cca.chem_type not in all_types:
|
|
88
|
-
raise RuntimeError("Energy type {} of {} not found in ener_lib.".format(cca.chem_type,
|
|
89
|
-
self.lookup[a]))
|
|
90
84
|
def set_h_parents(self):
|
|
91
85
|
self.parents = {}
|
|
92
86
|
for bond in self.geom.bonds:
|
|
@@ -104,20 +98,28 @@ class Geom:
|
|
|
104
98
|
return self.geom.calc(check_only=target_only, **self.calc_kwds)
|
|
105
99
|
def calc_adp_restraint(self, target_only):
|
|
106
100
|
return self.geom.calc_adp_restraint(target_only, self.adpr_w)
|
|
107
|
-
def
|
|
101
|
+
def calc_occ_restraint(self, target_only):
|
|
102
|
+
return self.geom.calc_occ_restraint(target_only, self.occr_w)
|
|
103
|
+
def calc_target(self, target_only, refine_xyz, adp_mode, use_occr):
|
|
108
104
|
self.geom.clear_target()
|
|
109
105
|
geom_x = self.calc(target_only) if refine_xyz else 0
|
|
110
106
|
geom_a = self.calc_adp_restraint(target_only) if adp_mode > 0 else 0
|
|
107
|
+
geom_q = self.calc_occ_restraint(target_only) if use_occr > 0 else 0
|
|
111
108
|
logger.writeln(" geom_x = {}".format(geom_x))
|
|
112
109
|
logger.writeln(" geom_a = {}".format(geom_a))
|
|
113
|
-
|
|
110
|
+
logger.writeln(" geom_q = {}".format(geom_q))
|
|
111
|
+
geom = geom_x + geom_a + geom_q
|
|
114
112
|
if not target_only:
|
|
115
113
|
self.geom.spec_correction()
|
|
116
114
|
return geom
|
|
117
115
|
|
|
118
|
-
def show_model_stats(self, show_outliers=True):
|
|
119
|
-
|
|
120
|
-
|
|
116
|
+
def show_model_stats(self, refine_xyz=True, adp_mode=1, use_occr=False, show_outliers=True):
|
|
117
|
+
if refine_xyz:
|
|
118
|
+
self.calc(True)
|
|
119
|
+
if adp_mode > 0:
|
|
120
|
+
self.calc_adp_restraint(True)
|
|
121
|
+
if use_occr:
|
|
122
|
+
self.calc_occ_restraint(True)
|
|
121
123
|
ret = {"outliers": {}}
|
|
122
124
|
if show_outliers:
|
|
123
125
|
get_table = dict(bond=self.geom.reporting.get_bond_outliers,
|
|
@@ -165,15 +167,16 @@ class Geom:
|
|
|
165
167
|
logger.writeln(df.to_string(float_format="{:.3f}".format, index=False) + "\n")
|
|
166
168
|
|
|
167
169
|
# Per-atom score
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
df.
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
170
|
+
if 0:
|
|
171
|
+
peratom = self.geom.reporting.per_atom_score(len(self.atoms), self.use_nucleus, "mean")
|
|
172
|
+
df = pandas.DataFrame(peratom)
|
|
173
|
+
df.insert(0, "atom", [str(self.lookup[x]) for x in self.atoms])
|
|
174
|
+
df = df[df["total"] >= self.outlier_sigmas["per_atom"]]
|
|
175
|
+
if show_outliers and len(df.index) > 0:
|
|
176
|
+
df.sort_values("total", ascending=False, inplace=True)
|
|
177
|
+
ret["outliers"]["per_atom"] = df
|
|
178
|
+
logger.writeln(" *** Per-atom violations (Z >= {}) ***\n".format(self.outlier_sigmas["per_atom"]))
|
|
179
|
+
logger.writeln(df.to_string(float_format="{:.2f}".format, index=False) + "\n")
|
|
177
180
|
|
|
178
181
|
df = pandas.DataFrame(self.geom.reporting.get_summary_table(self.use_nucleus))
|
|
179
182
|
df = df.set_index("Restraint type").rename_axis(index=None)
|
|
@@ -184,10 +187,12 @@ class Geom:
|
|
|
184
187
|
def show_binstats(df, cycle_number):
|
|
185
188
|
forplot = []
|
|
186
189
|
rlabs = [x for x in df if x.startswith("R")]
|
|
190
|
+
fsclabs = [x for x in df if x.startswith("fsc")]
|
|
187
191
|
cclabs = [x for x in df if x.startswith("CC")]
|
|
188
192
|
dlabs = [x for x in df if re.search("^D[0-9]*", x)]
|
|
189
193
|
if "fsc_model" in df: forplot.append(["FSC", ["fsc_model"]])
|
|
190
194
|
if rlabs: forplot.append(["R", rlabs])
|
|
195
|
+
if fsclabs: forplot.append(["FSC", fsclabs])
|
|
191
196
|
if cclabs: forplot.append(["CC", cclabs])
|
|
192
197
|
if dlabs: forplot.append(["ML parameters - D", dlabs])
|
|
193
198
|
if "S" in df: forplot.append(["ML parameters - Sigma", ["S"]])
|
|
@@ -197,6 +202,29 @@ def show_binstats(df, cycle_number):
|
|
|
197
202
|
logger.writeln(lstr)
|
|
198
203
|
# show_binstats()
|
|
199
204
|
|
|
205
|
+
def convert_stats_to_dicts(stats):
|
|
206
|
+
tmp = []
|
|
207
|
+
for s in stats: # stats must be a list of dict
|
|
208
|
+
tmp.append({})
|
|
209
|
+
for k in s:
|
|
210
|
+
if k == "geom":
|
|
211
|
+
tmp[-1]["geom"] = {"summary": s["geom"]["summary"].to_dict()}
|
|
212
|
+
for kk in s["geom"]["outliers"]:
|
|
213
|
+
tmp[-1]["geom"].setdefault("outliers", {})[kk] = s["geom"]["outliers"][kk].to_dict(orient="records")
|
|
214
|
+
else:
|
|
215
|
+
tmp[-1][k] = s[k]
|
|
216
|
+
return tmp
|
|
217
|
+
# convert_stats_to_dicts()
|
|
218
|
+
|
|
219
|
+
def write_stats_json_safe(stats, json_out):
|
|
220
|
+
tmp = convert_stats_to_dicts(stats)
|
|
221
|
+
out_tmp = json_out + ".part"
|
|
222
|
+
with open(out_tmp, "w") as ofs:
|
|
223
|
+
json.dump(tmp, ofs, indent=2)
|
|
224
|
+
os.replace(out_tmp, json_out)
|
|
225
|
+
logger.writeln(f"Refinement statistics saved: {json_out}")
|
|
226
|
+
# write_stats_json_safe()
|
|
227
|
+
|
|
200
228
|
class GroupOccupancy:
|
|
201
229
|
# TODO max may not be one. should check multiplicity
|
|
202
230
|
def __init__(self, st, params):
|
|
@@ -377,10 +405,11 @@ class GroupOccupancy:
|
|
|
377
405
|
|
|
378
406
|
class Refine:
|
|
379
407
|
def __init__(self, st, geom, ll=None, refine_xyz=True, adp_mode=1, refine_h=False, refine_occ=False,
|
|
380
|
-
unrestrained=False,
|
|
408
|
+
unrestrained=False, params=None):
|
|
381
409
|
assert adp_mode in (0, 1, 2) # 0=fix, 1=iso, 2=aniso
|
|
382
410
|
assert geom is not None
|
|
383
411
|
self.st = st # clone()?
|
|
412
|
+
self.st_traj = None
|
|
384
413
|
self.atoms = geom.atoms # not a copy
|
|
385
414
|
self.geom = geom
|
|
386
415
|
self.ll = ll
|
|
@@ -388,11 +417,15 @@ class Refine:
|
|
|
388
417
|
self.adp_mode = 0 if self.ll is None else adp_mode
|
|
389
418
|
self.refine_xyz = refine_xyz
|
|
390
419
|
self.refine_occ = refine_occ
|
|
420
|
+
self.use_occr = self.refine_occ # for now?
|
|
391
421
|
self.unrestrained = unrestrained
|
|
392
422
|
self.refine_h = refine_h
|
|
393
423
|
self.h_inherit_parent_adp = self.adp_mode > 0 and not self.refine_h and self.st[0].has_hydrogen()
|
|
394
424
|
if self.h_inherit_parent_adp:
|
|
395
425
|
self.geom.set_h_parents()
|
|
426
|
+
if params and params.get("write_trajectory"):
|
|
427
|
+
self.st_traj = self.st.clone()
|
|
428
|
+
self.st_traj[-1].name = "0"
|
|
396
429
|
assert self.geom.group_occ.groups or self.n_params() > 0
|
|
397
430
|
# __init__()
|
|
398
431
|
|
|
@@ -513,8 +546,10 @@ class Refine:
|
|
|
513
546
|
self.ll.update_fc()
|
|
514
547
|
|
|
515
548
|
self.geom.setup_nonbonded(self.refine_xyz) # if refine_xyz=False, no need to do it every time
|
|
516
|
-
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
|
|
549
|
+
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ, self.use_occr)
|
|
517
550
|
logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
|
|
551
|
+
logger.writeln(f"atoms = {self.geom.geom.target.n_atoms()}")
|
|
552
|
+
logger.writeln(f"pairs = {self.geom.geom.target.n_pairs()}")
|
|
518
553
|
|
|
519
554
|
def get_x(self):
|
|
520
555
|
n_atoms = self.geom.n_refine_atoms
|
|
@@ -540,7 +575,7 @@ class Refine:
|
|
|
540
575
|
N = self.n_params()
|
|
541
576
|
geom = self.geom.calc_target(target_only,
|
|
542
577
|
not self.unrestrained and self.refine_xyz,
|
|
543
|
-
self.adp_mode)
|
|
578
|
+
self.adp_mode, self.use_occr)
|
|
544
579
|
if self.ll is not None:
|
|
545
580
|
ll = self.ll.calc_target()
|
|
546
581
|
logger.writeln(" ll= {}".format(ll))
|
|
@@ -627,49 +662,84 @@ class Refine:
|
|
|
627
662
|
|
|
628
663
|
return ret, shift_scale, f1
|
|
629
664
|
|
|
630
|
-
def run_cycles(self, ncycles, weight=1, debug=False
|
|
665
|
+
def run_cycles(self, ncycles, weight=1, weight_adjust=False, debug=False,
|
|
666
|
+
weight_adjust_bond_rmsz_range=(0.5, 1.), stats_json_out=None):
|
|
631
667
|
self.print_weights()
|
|
632
668
|
stats = [{"Ncyc": 0}]
|
|
633
669
|
self.geom.setup_nonbonded(self.refine_xyz)
|
|
634
|
-
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ)
|
|
670
|
+
self.geom.geom.setup_target(self.refine_xyz, self.adp_mode, self.refine_occ, self.use_occr)
|
|
635
671
|
logger.writeln("vdws = {}".format(len(self.geom.geom.vdws)))
|
|
636
|
-
|
|
637
|
-
|
|
672
|
+
logger.writeln(f"atoms = {self.geom.geom.target.n_atoms()}")
|
|
673
|
+
logger.writeln(f"pairs = {self.geom.geom.target.n_pairs()}")
|
|
674
|
+
stats[-1]["geom"] = self.geom.show_model_stats(refine_xyz=self.refine_xyz and not self.unrestrained,
|
|
675
|
+
adp_mode=self.adp_mode,
|
|
676
|
+
use_occr=self.refine_occ,
|
|
677
|
+
show_outliers=True)
|
|
638
678
|
if self.ll is not None:
|
|
639
679
|
self.ll.update_fc()
|
|
640
680
|
self.ll.overall_scale()
|
|
641
681
|
self.ll.update_ml_params()
|
|
682
|
+
self.ll.prepare_target()
|
|
642
683
|
llstats = self.ll.calc_stats(bin_stats=True)
|
|
643
684
|
stats[-1]["data"] = {"summary": llstats["summary"],
|
|
644
685
|
"binned": llstats["bin_stats"].to_dict(orient="records")}
|
|
686
|
+
if "twin_alpha" in llstats:
|
|
687
|
+
stats[-1]["twin_alpha"] = llstats["twin_alpha"]
|
|
645
688
|
show_binstats(llstats["bin_stats"], 0)
|
|
646
689
|
if self.adp_mode > 0:
|
|
647
690
|
utils.model.adp_analysis(self.st)
|
|
691
|
+
if stats_json_out:
|
|
692
|
+
write_stats_json_safe(stats, stats_json_out)
|
|
648
693
|
occ_refine_flag = self.ll is not None and self.geom.group_occ.groups and self.geom.group_occ.ncycle > 0
|
|
649
694
|
|
|
650
695
|
for i in range(ncycles):
|
|
651
696
|
logger.writeln("\n====== CYCLE {:2d} ======\n".format(i+1))
|
|
652
|
-
|
|
697
|
+
logger.writeln(f" weight = {weight:.4e}")
|
|
698
|
+
if self.refine_xyz or self.adp_mode > 0 or self.refine_occ:
|
|
653
699
|
is_ok, shift_scale, fval = self.run_cycle(weight=weight)
|
|
654
|
-
stats.append({"Ncyc": len(stats), "shift_scale": shift_scale, "fval": fval, "fval_decreased": is_ok
|
|
700
|
+
stats.append({"Ncyc": len(stats), "shift_scale": shift_scale, "fval": fval, "fval_decreased": is_ok,
|
|
701
|
+
"weight": weight})
|
|
702
|
+
elif occ_refine_flag:
|
|
703
|
+
stats.append({"Ncyc": len(stats)})
|
|
655
704
|
if occ_refine_flag:
|
|
656
705
|
stats[-1]["occ_refine"] = self.geom.group_occ.refine(self.ll, self.refine_h)
|
|
657
706
|
if debug: utils.fileio.write_model(self.st, "refined_{:02d}".format(i+1), pdb=True)#, cif=True)
|
|
658
|
-
|
|
659
|
-
|
|
707
|
+
stats[-1]["geom"] = self.geom.show_model_stats(refine_xyz=self.refine_xyz and not self.unrestrained,
|
|
708
|
+
adp_mode=self.adp_mode,
|
|
709
|
+
use_occr=self.refine_occ,
|
|
710
|
+
show_outliers=(i==ncycles-1))
|
|
660
711
|
if self.ll is not None:
|
|
661
712
|
self.ll.overall_scale()
|
|
662
713
|
f0 = self.ll.calc_target()
|
|
663
714
|
self.ll.update_ml_params()
|
|
715
|
+
self.ll.prepare_target()
|
|
664
716
|
llstats = self.ll.calc_stats(bin_stats=True)#(i==ncycles-1))
|
|
665
717
|
if llstats["summary"]["-LL"] > f0:
|
|
666
718
|
logger.writeln("WARNING: -LL has increased after ML parameter optimization:"
|
|
667
719
|
"{} to {}".format(f0, llstats["summary"]["-LL"]))
|
|
668
720
|
stats[-1]["data"] = {"summary": llstats["summary"],
|
|
669
721
|
"binned": llstats["bin_stats"].to_dict(orient="records")}
|
|
722
|
+
if "twin_alpha" in llstats:
|
|
723
|
+
stats[-1]["twin_alpha"] = llstats["twin_alpha"]
|
|
670
724
|
show_binstats(llstats["bin_stats"], i+1)
|
|
671
725
|
if self.adp_mode > 0:
|
|
672
726
|
utils.model.adp_analysis(self.st)
|
|
727
|
+
if (weight_adjust and self.refine_xyz and not self.unrestrained and self.ll is not None and
|
|
728
|
+
len(stats) > 2 and "Bond distances, non H" in stats[-1]["geom"]["summary"].index):
|
|
729
|
+
rmsz = stats[-1]["geom"]["summary"]["r.m.s.Z"]["Bond distances, non H"]
|
|
730
|
+
rmsz0 = stats[-2]["geom"]["summary"]["r.m.s.Z"]["Bond distances, non H"]
|
|
731
|
+
if rmsz > weight_adjust_bond_rmsz_range[1] and rmsz > rmsz0:
|
|
732
|
+
weight /= 1.1
|
|
733
|
+
elif rmsz < weight_adjust_bond_rmsz_range[0] and rmsz0 < weight_adjust_bond_rmsz_range[0] and rmsz < rmsz0:
|
|
734
|
+
weight *= 1.3
|
|
735
|
+
elif rmsz > 1.5 * rmsz0:
|
|
736
|
+
weight /= 1.1
|
|
737
|
+
if self.st_traj is not None:
|
|
738
|
+
self.st_traj.add_model(self.st[0])
|
|
739
|
+
self.st_traj[-1].name = str(len(self.st_traj))
|
|
740
|
+
if stats_json_out:
|
|
741
|
+
write_stats_json_safe(stats, stats_json_out)
|
|
742
|
+
|
|
673
743
|
logger.writeln("")
|
|
674
744
|
|
|
675
745
|
# Make table
|
|
@@ -685,8 +755,8 @@ class Refine:
|
|
|
685
755
|
("r.m.s.Z", "Bond distances, non H", "zBOND"),
|
|
686
756
|
("r.m.s.d.", "Bond angles, non H", "rmsANGL"),
|
|
687
757
|
("r.m.s.Z", "Bond angles, non H", "zANGL")):
|
|
688
|
-
if k in d["geom"] and n in d["geom"][k]:
|
|
689
|
-
x[l] = d["geom"][k].get(n)
|
|
758
|
+
if k in d["geom"]["summary"] and n in d["geom"]["summary"][k]:
|
|
759
|
+
x[l] = d["geom"]["summary"][k].get(n)
|
|
690
760
|
geom_keys.add(l)
|
|
691
761
|
tmp.append(x)
|
|
692
762
|
df = pandas.DataFrame(tmp)
|
|
@@ -711,12 +781,20 @@ class Refine:
|
|
|
711
781
|
lstr = utils.make_loggraph_str(df, "stats vs cycle", forplot,
|
|
712
782
|
float_format="{:.4f}".format)
|
|
713
783
|
logger.writeln(lstr)
|
|
714
|
-
self.update_meta()
|
|
784
|
+
self.update_meta(stats[-1])
|
|
715
785
|
return stats
|
|
716
786
|
|
|
717
|
-
def update_meta(self):
|
|
787
|
+
def update_meta(self, stats):
|
|
718
788
|
# TODO write stats. probably geom.reporting.get_summary_table should return with _refine_ls_restr.type names
|
|
719
|
-
|
|
789
|
+
# should remove st.mod_residues?
|
|
790
|
+
self.st.helices.clear()
|
|
791
|
+
self.st.sheets.clear()
|
|
792
|
+
raw_remarks = [f'REMARK 3',
|
|
793
|
+
f'REMARK 3 REFINEMENT.',
|
|
794
|
+
f'REMARK 3 PROGRAM : SERVALCAT {servalcat.__version__}',
|
|
795
|
+
f'REMARK 3 AUTHORS : YAMASHITA,MURSHUDOV',
|
|
796
|
+
f'REMARK 3',
|
|
797
|
+
]
|
|
720
798
|
si = gemmi.SoftwareItem()
|
|
721
799
|
si.classification = gemmi.SoftwareItem.Classification.Refinement
|
|
722
800
|
si.name = "Servalcat"
|
|
@@ -724,10 +802,22 @@ class Refine:
|
|
|
724
802
|
si.date = servalcat.__date__
|
|
725
803
|
self.st.meta.software = [si]
|
|
726
804
|
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
805
|
+
ri = gemmi.RefinementInfo()
|
|
806
|
+
if "geom" in stats:
|
|
807
|
+
restr_stats = []
|
|
808
|
+
raw_remarks.append("REMARK 3 RMS DEVIATIONS FROM IDEAL VALUES COUNT RMS WEIGHT")
|
|
809
|
+
for k, n, l, pl in (("r.m.s.d.", "Bond distances, non H", "s_bond_nonh_d", "BOND LENGTHS REFINED ATOMS (A)"),
|
|
810
|
+
("r.m.s.d.", "Bond angles, non H", "s_angle_nonh_d", "BOND ANGLES REFINED ATOMS (DEGREES)")):
|
|
811
|
+
if k in stats["geom"]["summary"] and n in stats["geom"]["summary"][k]:
|
|
812
|
+
rr = gemmi.RefinementInfo.Restr(l)
|
|
813
|
+
rr.dev_ideal = stats["geom"]["summary"][k].get(n)
|
|
814
|
+
rr.count = stats["geom"]["summary"]["N restraints"].get(n)
|
|
815
|
+
rr.weight = stats["geom"]["summary"]["Mn(sigma)"].get(n)
|
|
816
|
+
restr_stats.append(rr)
|
|
817
|
+
raw_remarks.append(f"REMARK 3 {pl}:{rr.count:6d} ;{rr.dev_ideal:6.3f} ;{rr.weight:6.3f}")
|
|
818
|
+
ri.restr_stats = restr_stats
|
|
819
|
+
raw_remarks.append("REMARK 3")
|
|
820
|
+
self.st.meta.refinement = [ri]
|
|
821
|
+
self.st.raw_remarks = raw_remarks
|
|
822
|
+
|
|
733
823
|
# class Refine
|
servalcat/refine/refine_geom.py
CHANGED
|
@@ -14,7 +14,8 @@ import json
|
|
|
14
14
|
import servalcat # for version
|
|
15
15
|
from servalcat.utils import logger
|
|
16
16
|
from servalcat import utils
|
|
17
|
-
from servalcat.refine.refine import Geom, Refine
|
|
17
|
+
from servalcat.refine.refine import Geom, Refine, convert_stats_to_dicts
|
|
18
|
+
from servalcat.refmac import refmac_keywords
|
|
18
19
|
|
|
19
20
|
def add_arguments(parser):
|
|
20
21
|
group = parser.add_mutually_exclusive_group(required=True)
|
|
@@ -43,6 +44,8 @@ def add_arguments(parser):
|
|
|
43
44
|
help="refmac keyword file(s)")
|
|
44
45
|
parser.add_argument('-o','--output_prefix',
|
|
45
46
|
help="Output prefix")
|
|
47
|
+
parser.add_argument("--write_trajectory", action='store_true',
|
|
48
|
+
help="Write all output from cycles")
|
|
46
49
|
|
|
47
50
|
# add_arguments()
|
|
48
51
|
|
|
@@ -53,24 +56,32 @@ def parse_args(arg_list):
|
|
|
53
56
|
# parse_args()
|
|
54
57
|
|
|
55
58
|
def add_program_info_to_dictionary(block, comp_id, program_name="servalcat", descriptor="optimization tool"):
|
|
56
|
-
|
|
57
|
-
#
|
|
58
|
-
for
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
59
|
+
# old acedrg used _pdbx_chem_comp_description_generator. and descriptor
|
|
60
|
+
# new acedrg (>280?) uses _acedrg_chem_comp_descriptor. and type
|
|
61
|
+
for tag, name in (("_acedrg_chem_comp_descriptor.", "type"),
|
|
62
|
+
("_pdbx_chem_comp_description_generator.", "descriptor")):
|
|
63
|
+
tab = block.find(tag, ["program_name", "program_version", name])
|
|
64
|
+
if tab:
|
|
65
|
+
loop = tab.loop
|
|
66
|
+
# just overwrite version if it's there
|
|
67
|
+
for row in tab:
|
|
68
|
+
if row.str(0) == program_name and row.str(2) == descriptor:
|
|
69
|
+
row[1] = gemmi.cif.quote(servalcat.__version__)
|
|
70
|
+
return
|
|
71
|
+
break
|
|
72
|
+
else:
|
|
73
|
+
# it may be strange to say _acedrg in this case..
|
|
74
|
+
name = "type"
|
|
75
|
+
loop = block.init_loop("_acedrg_chem_comp_descriptor.", ["comp_id",
|
|
76
|
+
"program_name",
|
|
77
|
+
"program_version",
|
|
78
|
+
name])
|
|
68
79
|
tags = [x[x.index(".")+1:] for x in loop.tags]
|
|
69
80
|
row = ["" for _ in range(len(tags))]
|
|
70
81
|
for tag, val in (("comp_id", comp_id),
|
|
71
82
|
("program_name", program_name),
|
|
72
83
|
("program_version", servalcat.__version__),
|
|
73
|
-
(
|
|
84
|
+
(name, descriptor)):
|
|
74
85
|
if tag in tags: row[tags.index(tag)] = val
|
|
75
86
|
loop.add_row(gemmi.cif.quote_list(row))
|
|
76
87
|
# add_program_info_to_dictionary()
|
|
@@ -122,18 +133,16 @@ def refine_and_update_dictionary(cif_in, monomer_dir, output_prefix, randomize=0
|
|
|
122
133
|
doc.write_file(output_prefix + "_updated.cif", style=gemmi.cif.Style.Aligned)
|
|
123
134
|
logger.writeln("Updated dictionary saved: {}".format(output_prefix + "_updated.cif"))
|
|
124
135
|
with open(output_prefix + "_stats.json", "w") as ofs:
|
|
125
|
-
for
|
|
126
|
-
|
|
127
|
-
s["geom"] = s["geom"].to_dict()
|
|
128
|
-
json.dump(all_stats, ofs, indent=2)
|
|
136
|
+
json.dump([convert_stats_to_dicts(x) for x in all_stats],
|
|
137
|
+
ofs, indent=2)
|
|
129
138
|
logger.writeln("Refinement statistics saved: {}".format(ofs.name))
|
|
130
139
|
# refine_and_update_dictionary()
|
|
131
140
|
|
|
132
|
-
def refine_geom(model_in, monomer_dir, cif_files, h_change, ncycle, output_prefix, randomize,
|
|
141
|
+
def refine_geom(model_in, monomer_dir, cif_files, h_change, ncycle, output_prefix, randomize, params,
|
|
133
142
|
find_links=False, use_ncsr=False):
|
|
134
143
|
st = utils.fileio.read_structure(model_in)
|
|
135
144
|
utils.model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
|
|
136
|
-
if st.ncs:
|
|
145
|
+
if not all(op.given for op in st.ncs):
|
|
137
146
|
st2 = st.clone()
|
|
138
147
|
logger.writeln("Take NCS constraints into account.")
|
|
139
148
|
st2.expand_ncs(gemmi.HowToNameCopiedChain.Dup, merge_dist=0)
|
|
@@ -141,27 +150,29 @@ def refine_geom(model_in, monomer_dir, cif_files, h_change, ncycle, output_prefi
|
|
|
141
150
|
|
|
142
151
|
monlib = utils.restraints.load_monomer_library(st, monomer_dir=monomer_dir,
|
|
143
152
|
cif_files=cif_files,
|
|
144
|
-
stop_for_unknowns=True
|
|
145
|
-
|
|
153
|
+
stop_for_unknowns=True,
|
|
154
|
+
params=params)
|
|
155
|
+
utils.restraints.find_and_fix_links(st, monlib, find_metal_links=find_links,
|
|
156
|
+
add_found=find_links) # should remove unknown id here?
|
|
146
157
|
try:
|
|
147
|
-
topo,
|
|
148
|
-
|
|
158
|
+
topo, _ = utils.restraints.prepare_topology(st, monlib, h_change=h_change,
|
|
159
|
+
check_hydrogen=(h_change==gemmi.HydrogenChange.NoChange),
|
|
160
|
+
params=params)
|
|
149
161
|
except RuntimeError as e:
|
|
150
162
|
raise SystemExit("Error: {}".format(e))
|
|
151
|
-
|
|
163
|
+
|
|
152
164
|
if use_ncsr:
|
|
153
165
|
ncslist = utils.restraints.prepare_ncs_restraints(st)
|
|
154
166
|
else:
|
|
155
167
|
ncslist = False
|
|
156
|
-
geom = Geom(st, topo, monlib, shake_rms=randomize,
|
|
157
|
-
refiner = Refine(st, geom)
|
|
158
|
-
stats = refiner.run_cycles(ncycle
|
|
168
|
+
geom = Geom(st, topo, monlib, shake_rms=randomize, params=params, ncslist=ncslist)
|
|
169
|
+
refiner = Refine(st, geom, params=params)
|
|
170
|
+
stats = refiner.run_cycles(ncycle,
|
|
171
|
+
stats_json_out=output_prefix + "_stats.json")
|
|
159
172
|
refiner.st.name = output_prefix
|
|
160
173
|
utils.fileio.write_model(refiner.st, output_prefix, pdb=True, cif=True)
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
json.dump(stats, ofs, indent=2)
|
|
164
|
-
logger.writeln("Refinement statistics saved: {}".format(ofs.name))
|
|
174
|
+
if params["write_trajectory"]:
|
|
175
|
+
utils.fileio.write_model(refiner.st_traj, output_prefix + "_traj", cif=True)
|
|
165
176
|
# refine_geom()
|
|
166
177
|
|
|
167
178
|
def main(args):
|
|
@@ -170,6 +181,8 @@ def main(args):
|
|
|
170
181
|
if args.keyword_file: keywords.extend(l for f in sum(args.keyword_file, []) for l in open(f))
|
|
171
182
|
decide_prefix = lambda f: utils.fileio.splitext(os.path.basename(f))[0] + "_refined"
|
|
172
183
|
if args.model:
|
|
184
|
+
params = refmac_keywords.parse_keywords(keywords)
|
|
185
|
+
params["write_trajectory"] = args.write_trajectory
|
|
173
186
|
if not args.output_prefix:
|
|
174
187
|
args.output_prefix = decide_prefix(args.model)
|
|
175
188
|
if args.ligand:
|
|
@@ -185,7 +198,7 @@ def main(args):
|
|
|
185
198
|
ncycle=args.ncycle,
|
|
186
199
|
output_prefix=args.output_prefix,
|
|
187
200
|
randomize=args.randomize,
|
|
188
|
-
|
|
201
|
+
params=params,
|
|
189
202
|
find_links=args.find_links,
|
|
190
203
|
use_ncsr=args.ncsr)
|
|
191
204
|
else:
|