servalcat 0.4.60__cp312-cp312-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of servalcat might be problematic. Click here for more details.
- servalcat/__init__.py +10 -0
- servalcat/__main__.py +120 -0
- servalcat/ext.cp312-win_amd64.pyd +0 -0
- servalcat/refine/__init__.py +0 -0
- servalcat/refine/cgsolve.py +100 -0
- servalcat/refine/refine.py +733 -0
- servalcat/refine/refine_geom.py +207 -0
- servalcat/refine/refine_spa.py +327 -0
- servalcat/refine/refine_xtal.py +242 -0
- servalcat/refine/spa.py +132 -0
- servalcat/refine/xtal.py +227 -0
- servalcat/refmac/__init__.py +0 -0
- servalcat/refmac/exte.py +182 -0
- servalcat/refmac/refmac_keywords.py +536 -0
- servalcat/refmac/refmac_wrapper.py +360 -0
- servalcat/spa/__init__.py +0 -0
- servalcat/spa/fofc.py +462 -0
- servalcat/spa/fsc.py +385 -0
- servalcat/spa/localcc.py +188 -0
- servalcat/spa/realspcc_from_var.py +128 -0
- servalcat/spa/run_refmac.py +961 -0
- servalcat/spa/shift_maps.py +293 -0
- servalcat/spa/shiftback.py +137 -0
- servalcat/spa/translate.py +129 -0
- servalcat/utils/__init__.py +35 -0
- servalcat/utils/commands.py +1277 -0
- servalcat/utils/fileio.py +745 -0
- servalcat/utils/generate_operators.py +296 -0
- servalcat/utils/hkl.py +699 -0
- servalcat/utils/logger.py +116 -0
- servalcat/utils/maps.py +340 -0
- servalcat/utils/model.py +774 -0
- servalcat/utils/refmac.py +747 -0
- servalcat/utils/restraints.py +605 -0
- servalcat/utils/symmetry.py +295 -0
- servalcat/xtal/__init__.py +0 -0
- servalcat/xtal/french_wilson.py +250 -0
- servalcat/xtal/run_refmac_small.py +240 -0
- servalcat/xtal/sigmaa.py +1403 -0
- servalcat-0.4.60.dist-info/METADATA +56 -0
- servalcat-0.4.60.dist-info/RECORD +44 -0
- servalcat-0.4.60.dist-info/WHEEL +5 -0
- servalcat-0.4.60.dist-info/entry_points.txt +4 -0
- servalcat-0.4.60.dist-info/licenses/LICENSE +373 -0
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Author: "Keitaro Yamashita, Garib N. Murshudov"
|
|
3
|
+
MRC Laboratory of Molecular Biology
|
|
4
|
+
|
|
5
|
+
This software is released under the
|
|
6
|
+
Mozilla Public License, version 2.0; see LICENSE.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import absolute_import, division, print_function, generators
|
|
9
|
+
import gemmi
|
|
10
|
+
import numpy
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
import shutil
|
|
14
|
+
import argparse
|
|
15
|
+
from servalcat.utils import logger
|
|
16
|
+
from servalcat import utils
|
|
17
|
+
from servalcat.xtal.sigmaa import decide_mtz_labels, process_input, calculate_maps, calculate_maps_int
|
|
18
|
+
from servalcat.refine.xtal import LL_Xtal
|
|
19
|
+
from servalcat.refine.refine import Geom, Refine
|
|
20
|
+
b_to_u = utils.model.b_to_u
|
|
21
|
+
|
|
22
|
+
def add_arguments(parser):
|
|
23
|
+
parser.description = "EXPERIMENTAL program to refine crystallographic structures"
|
|
24
|
+
parser.add_argument("--hklin", required=True)
|
|
25
|
+
parser.add_argument("-d", '--d_min', type=float)
|
|
26
|
+
parser.add_argument('--d_max', type=float)
|
|
27
|
+
parser.add_argument('--nbins', type=int,
|
|
28
|
+
help="Number of bins (default: auto)")
|
|
29
|
+
parser.add_argument("--labin", help="F,SIGF,FREE input")
|
|
30
|
+
parser.add_argument('--free', type=int,
|
|
31
|
+
help='flag number for test set')
|
|
32
|
+
parser.add_argument('--model', required=True,
|
|
33
|
+
help='Input atomic model file')
|
|
34
|
+
parser.add_argument("--monlib",
|
|
35
|
+
help="Monomer library path. Default: $CLIBD_MON")
|
|
36
|
+
parser.add_argument('--ligand', nargs="*", action="append",
|
|
37
|
+
help="restraint dictionary cif file(s)")
|
|
38
|
+
parser.add_argument('--hydrogen', default="all", choices=["all", "yes", "no"],
|
|
39
|
+
help="all: add riding hydrogen atoms, yes: use hydrogen atoms if present, no: remove hydrogen atoms in input. "
|
|
40
|
+
"Default: %(default)s")
|
|
41
|
+
parser.add_argument('--jellybody', action='store_true',
|
|
42
|
+
help="Use jelly body restraints")
|
|
43
|
+
parser.add_argument('--jellybody_params', nargs=2, type=float,
|
|
44
|
+
metavar=("sigma", "dmax"), default=[0.01, 4.2],
|
|
45
|
+
help="Jelly body sigma and dmax (default: %(default)s)")
|
|
46
|
+
parser.add_argument('--jellyonly', action='store_true',
|
|
47
|
+
help="Jelly body only (experimental, may not be useful)")
|
|
48
|
+
parser.add_argument('--find_links', action='store_true',
|
|
49
|
+
help='Automatically add links')
|
|
50
|
+
parser.add_argument('--keywords', nargs='+', action="append",
|
|
51
|
+
help="refmac keyword(s)")
|
|
52
|
+
parser.add_argument('--keyword_file', nargs='+', action="append",
|
|
53
|
+
help="refmac keyword file(s)")
|
|
54
|
+
parser.add_argument('--randomize', type=float, default=0,
|
|
55
|
+
help='Shake coordinates with specified rmsd')
|
|
56
|
+
parser.add_argument('--ncycle', type=int, default=10,
|
|
57
|
+
help="number of CG cycles (default: %(default)d)")
|
|
58
|
+
parser.add_argument('--weight', type=float,
|
|
59
|
+
help="refinement weight (default: auto)")
|
|
60
|
+
parser.add_argument('--ncsr', action='store_true',
|
|
61
|
+
help='Use local NCS restraints')
|
|
62
|
+
parser.add_argument('--adpr_weight', type=float, default=1.,
|
|
63
|
+
help="ADP restraint weight (default: %(default)f)")
|
|
64
|
+
parser.add_argument('--bfactor', type=float,
|
|
65
|
+
help="reset all atomic B values to specified value")
|
|
66
|
+
parser.add_argument('--fix_xyz', action="store_true")
|
|
67
|
+
parser.add_argument('--adp', choices=["fix", "iso", "aniso"], default="iso")
|
|
68
|
+
parser.add_argument('--max_dist_for_adp_restraint', type=float, default=4.)
|
|
69
|
+
parser.add_argument('--adp_restraint_power', type=float)
|
|
70
|
+
parser.add_argument('--adp_restraint_exp_fac', type=float)
|
|
71
|
+
parser.add_argument('--adp_restraint_no_long_range', action='store_true')
|
|
72
|
+
parser.add_argument('--adp_restraint_mode', choices=["diff", "kldiv"], default="kldiv")
|
|
73
|
+
parser.add_argument('--unrestrained', action='store_true', help="No positional restraints")
|
|
74
|
+
parser.add_argument('--refine_h', action="store_true", help="Refine hydrogen (default: restraints only)")
|
|
75
|
+
parser.add_argument("-s", "--source", choices=["electron", "xray", "neutron"], required=True)
|
|
76
|
+
parser.add_argument('--no_solvent', action='store_true',
|
|
77
|
+
help="Do not consider bulk solvent contribution")
|
|
78
|
+
parser.add_argument('--use_work_in_est', action='store_true',
|
|
79
|
+
help="Use work reflections in ML parameter estimates")
|
|
80
|
+
parser.add_argument('--keep_charges', action='store_true',
|
|
81
|
+
help="Use scattering factor for charged atoms. Use it with care.")
|
|
82
|
+
parser.add_argument('-o','--output_prefix')
|
|
83
|
+
# add_arguments()
|
|
84
|
+
|
|
85
|
+
def parse_args(arg_list):
|
|
86
|
+
parser = argparse.ArgumentParser()
|
|
87
|
+
add_arguments(parser)
|
|
88
|
+
return parser.parse_args(arg_list)
|
|
89
|
+
# parse_args()
|
|
90
|
+
|
|
91
|
+
def main(args):
|
|
92
|
+
if args.source == "neutron": assert not args.refine_h # we need deuterium fraction handling in LL
|
|
93
|
+
if args.ligand: args.ligand = sum(args.ligand, [])
|
|
94
|
+
if not args.output_prefix:
|
|
95
|
+
args.output_prefix = utils.fileio.splitext(os.path.basename(args.model))[0] + "_refined"
|
|
96
|
+
|
|
97
|
+
keywords = []
|
|
98
|
+
if args.keywords or args.keyword_file:
|
|
99
|
+
if args.keywords: keywords = sum(args.keywords, [])
|
|
100
|
+
if args.keyword_file: keywords.extend(l for f in sum(args.keyword_file, []) for l in open(f))
|
|
101
|
+
|
|
102
|
+
hklin = args.hklin
|
|
103
|
+
labin = args.labin
|
|
104
|
+
if labin is not None:
|
|
105
|
+
labin = labin.split(",")
|
|
106
|
+
elif utils.fileio.is_mmhkl_file(hklin):
|
|
107
|
+
hklin = utils.fileio.read_mmhkl(hklin)
|
|
108
|
+
labin = decide_mtz_labels(hklin)
|
|
109
|
+
|
|
110
|
+
if labin and len(labin) == 3: # with test flags
|
|
111
|
+
use_in_target = "work"
|
|
112
|
+
if args.use_work_in_est:
|
|
113
|
+
use_in_est = "work"
|
|
114
|
+
n_per_bin = 100
|
|
115
|
+
else:
|
|
116
|
+
use_in_est = "test"
|
|
117
|
+
n_per_bin = 50
|
|
118
|
+
else:
|
|
119
|
+
use_in_est = "all"
|
|
120
|
+
use_in_target = "all"
|
|
121
|
+
n_per_bin = 100
|
|
122
|
+
|
|
123
|
+
try:
|
|
124
|
+
hkldata, sts, fc_labs, centric_and_selections, args.free = process_input(hklin=hklin,
|
|
125
|
+
labin=labin,
|
|
126
|
+
n_bins=args.nbins,
|
|
127
|
+
free=args.free,
|
|
128
|
+
xyzins=[args.model],
|
|
129
|
+
source=args.source,
|
|
130
|
+
d_max=args.d_max,
|
|
131
|
+
d_min=args.d_min,
|
|
132
|
+
n_per_bin=n_per_bin,
|
|
133
|
+
use=use_in_est,
|
|
134
|
+
max_bins=30,
|
|
135
|
+
keep_charges=args.keep_charges)
|
|
136
|
+
except RuntimeError as e:
|
|
137
|
+
raise SystemExit("Error: {}".format(e))
|
|
138
|
+
|
|
139
|
+
is_int = "I" in hkldata.df
|
|
140
|
+
st = sts[0]
|
|
141
|
+
utils.model.fix_deuterium_residues(st)
|
|
142
|
+
if args.unrestrained:
|
|
143
|
+
monlib = gemmi.MonLib()
|
|
144
|
+
topo = None
|
|
145
|
+
if args.hydrogen == "all":
|
|
146
|
+
logger.writeln("WARNING: in unrestrained refinement hydrogen atoms are not generated.")
|
|
147
|
+
elif args.hydrogen == "no":
|
|
148
|
+
st.remove_hydrogens()
|
|
149
|
+
for i, cra in enumerate(st[0].all()):
|
|
150
|
+
cra.atom.serial = i + 1
|
|
151
|
+
else:
|
|
152
|
+
try:
|
|
153
|
+
monlib = utils.restraints.load_monomer_library(st, monomer_dir=args.monlib, cif_files=args.ligand,
|
|
154
|
+
stop_for_unknowns=True)
|
|
155
|
+
except RuntimeError as e:
|
|
156
|
+
raise SystemExit("Error: {}".format(e))
|
|
157
|
+
utils.model.setup_entities(st, clear=True, force_subchain_names=True, overwrite_entity_type=True)
|
|
158
|
+
utils.restraints.find_and_fix_links(st, monlib, add_found=args.find_links)
|
|
159
|
+
h_change = {"all":gemmi.HydrogenChange.ReAddKnown,
|
|
160
|
+
"yes":gemmi.HydrogenChange.NoChange,
|
|
161
|
+
"no":gemmi.HydrogenChange.Remove}[args.hydrogen]
|
|
162
|
+
try:
|
|
163
|
+
topo, metal_kws = utils.restraints.prepare_topology(st, monlib, h_change=h_change,
|
|
164
|
+
check_hydrogen=(args.hydrogen=="yes"))
|
|
165
|
+
except RuntimeError as e:
|
|
166
|
+
raise SystemExit("Error: {}".format(e))
|
|
167
|
+
keywords = metal_kws + keywords
|
|
168
|
+
# initialize ADP
|
|
169
|
+
if args.adp != "fix":
|
|
170
|
+
utils.model.reset_adp(st[0], args.bfactor, args.adp == "aniso")
|
|
171
|
+
|
|
172
|
+
# auto weight
|
|
173
|
+
if args.weight is None:
|
|
174
|
+
logger.writeln("Estimating weight using resolution")
|
|
175
|
+
reso = hkldata.d_min_max()[0]
|
|
176
|
+
args.weight = numpy.exp(reso * 0.9104 + 0.2162)
|
|
177
|
+
logger.writeln(" Will use weight= {:.2f}".format(args.weight))
|
|
178
|
+
|
|
179
|
+
if args.ncsr:
|
|
180
|
+
ncslist = utils.restraints.prepare_ncs_restraints(st)
|
|
181
|
+
else:
|
|
182
|
+
ncslist = False
|
|
183
|
+
geom = Geom(st, topo, monlib, shake_rms=args.randomize, adpr_w=args.adpr_weight, refmac_keywords=keywords,
|
|
184
|
+
unrestrained=args.unrestrained or args.jellyonly, use_nucleus=(args.source=="neutron"),
|
|
185
|
+
ncslist=ncslist)
|
|
186
|
+
geom.geom.adpr_max_dist = args.max_dist_for_adp_restraint
|
|
187
|
+
if args.adp_restraint_power is not None: geom.geom.adpr_d_power = args.adp_restraint_power
|
|
188
|
+
if args.adp_restraint_exp_fac is not None: geom.geom.adpr_exp_fac = args.adp_restraint_exp_fac
|
|
189
|
+
if args.adp_restraint_no_long_range: geom.geom.adpr_long_range = False
|
|
190
|
+
geom.geom.adpr_mode = args.adp_restraint_mode
|
|
191
|
+
if args.jellybody or args.jellyonly:
|
|
192
|
+
geom.geom.ridge_sigma, geom.geom.ridge_dmax = args.jellybody_params
|
|
193
|
+
if args.jellyonly: geom.geom.ridge_exclude_short_dist = False
|
|
194
|
+
|
|
195
|
+
ll = LL_Xtal(hkldata, centric_and_selections, args.free, st, monlib, source=args.source,
|
|
196
|
+
use_solvent=not args.no_solvent, use_in_est=use_in_est, use_in_target=use_in_target)
|
|
197
|
+
refiner = Refine(st, geom, ll=ll,
|
|
198
|
+
refine_xyz=not args.fix_xyz,
|
|
199
|
+
adp_mode=dict(fix=0, iso=1, aniso=2)[args.adp],
|
|
200
|
+
#refine_occ=True,
|
|
201
|
+
refine_h=args.refine_h,
|
|
202
|
+
unrestrained=args.unrestrained,
|
|
203
|
+
refmac_keywords=keywords)
|
|
204
|
+
|
|
205
|
+
stats = refiner.run_cycles(args.ncycle, weight=args.weight)
|
|
206
|
+
refiner.st.name = args.output_prefix
|
|
207
|
+
utils.fileio.write_model(refiner.st, args.output_prefix, pdb=True, cif=True)
|
|
208
|
+
with open(args.output_prefix + "_stats.json", "w") as ofs:
|
|
209
|
+
for s in stats:
|
|
210
|
+
if "geom" in s: s["geom"] = s["geom"].to_dict()
|
|
211
|
+
json.dump(stats, ofs, indent=2)
|
|
212
|
+
logger.writeln("Refinement statistics saved: {}".format(ofs.name))
|
|
213
|
+
|
|
214
|
+
if is_int:
|
|
215
|
+
calculate_maps_int(ll.hkldata, ll.b_aniso, ll.fc_labs, ll.D_labs, centric_and_selections,
|
|
216
|
+
use=use_in_target)
|
|
217
|
+
else:
|
|
218
|
+
calculate_maps(ll.hkldata, ll.b_aniso, centric_and_selections, ll.fc_labs, ll.D_labs, args.output_prefix + "_stats.log",
|
|
219
|
+
use=use_in_target)
|
|
220
|
+
|
|
221
|
+
# Write mtz file
|
|
222
|
+
if is_int:
|
|
223
|
+
labs = ["I", "SIGI", "FOM"]
|
|
224
|
+
else:
|
|
225
|
+
labs = ["FP", "SIGFP", "FOM"]
|
|
226
|
+
labs.extend(["FWT", "DELFWT", "FC"])
|
|
227
|
+
if "FAN" in hkldata.df:
|
|
228
|
+
labs.append("FAN")
|
|
229
|
+
if not args.no_solvent:
|
|
230
|
+
labs.append("FCbulk")
|
|
231
|
+
if "FREE" in hkldata.df:
|
|
232
|
+
labs.append("FREE")
|
|
233
|
+
labs += ll.D_labs + ["S"] # for debugging, for now
|
|
234
|
+
mtz_out = args.output_prefix+".mtz"
|
|
235
|
+
hkldata.write_mtz(mtz_out, labs=labs, types={"FOM": "W", "FP":"F", "SIGFP":"Q", "I":"J", "SIGI":"Q"})
|
|
236
|
+
|
|
237
|
+
# main()
|
|
238
|
+
|
|
239
|
+
if __name__ == "__main__":
|
|
240
|
+
import sys
|
|
241
|
+
args = parse_args(sys.argv[1:])
|
|
242
|
+
main(args)
|
servalcat/refine/spa.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Author: "Keitaro Yamashita, Garib N. Murshudov"
|
|
3
|
+
MRC Laboratory of Molecular Biology
|
|
4
|
+
|
|
5
|
+
This software is released under the
|
|
6
|
+
Mozilla Public License, version 2.0; see LICENSE.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import absolute_import, division, print_function, generators
|
|
9
|
+
import gemmi
|
|
10
|
+
import numpy
|
|
11
|
+
import json
|
|
12
|
+
import scipy.sparse
|
|
13
|
+
from servalcat.utils import logger
|
|
14
|
+
from servalcat import utils
|
|
15
|
+
from servalcat.spa import fofc
|
|
16
|
+
from servalcat.spa import fsc
|
|
17
|
+
from servalcat import ext
|
|
18
|
+
b_to_u = utils.model.b_to_u
|
|
19
|
+
u_to_b = utils.model.u_to_b
|
|
20
|
+
|
|
21
|
+
def calc_D_and_S(hkldata, lab_obs): # simplified version of fofc.calc_D_and_S()
|
|
22
|
+
bdf = hkldata.binned_df
|
|
23
|
+
bdf["D"] = 0.
|
|
24
|
+
bdf["S"] = 0.
|
|
25
|
+
for i_bin, idxes in hkldata.binned():
|
|
26
|
+
Fo = hkldata.df[lab_obs].to_numpy()[idxes]
|
|
27
|
+
Fc = hkldata.df.FC.to_numpy()[idxes]
|
|
28
|
+
bdf.loc[i_bin, "D"] = numpy.nansum(numpy.real(Fo * numpy.conj(Fc))) / numpy.sum(numpy.abs(Fc)**2)
|
|
29
|
+
bdf.loc[i_bin, "S"] = numpy.nanmean(numpy.abs(Fo - bdf.D[i_bin] * Fc)**2)
|
|
30
|
+
# calc_D_and_S()
|
|
31
|
+
|
|
32
|
+
class LL_SPA:
|
|
33
|
+
def __init__(self, hkldata, st, monlib, lab_obs, source="electron", mott_bethe=True):
|
|
34
|
+
assert source in ("electron", "xray")
|
|
35
|
+
self.source = source
|
|
36
|
+
self.mott_bethe = False if source != "electron" else mott_bethe
|
|
37
|
+
self.hkldata = hkldata
|
|
38
|
+
self.lab_obs = lab_obs
|
|
39
|
+
self.st = st
|
|
40
|
+
self.monlib = monlib
|
|
41
|
+
self.d_min = hkldata.d_min_max()[0]
|
|
42
|
+
self.ll = None
|
|
43
|
+
|
|
44
|
+
def update_ml_params(self):
|
|
45
|
+
# FIXME make sure D > 0
|
|
46
|
+
calc_D_and_S(self.hkldata, self.lab_obs)
|
|
47
|
+
logger.writeln(self.hkldata.binned_df.to_string(columns=["d_max", "d_min", "D", "S"]))
|
|
48
|
+
|
|
49
|
+
def update_fc(self):
|
|
50
|
+
if self.st.ncs:
|
|
51
|
+
st = self.st.clone()
|
|
52
|
+
st.expand_ncs(gemmi.HowToNameCopiedChain.Short, merge_dist=0)
|
|
53
|
+
else:
|
|
54
|
+
st = self.st
|
|
55
|
+
|
|
56
|
+
self.hkldata.df["FC"] = utils.model.calc_fc_fft(st, self.d_min - 1e-6,
|
|
57
|
+
monlib=self.monlib,
|
|
58
|
+
source=self.source,
|
|
59
|
+
mott_bethe=self.mott_bethe,
|
|
60
|
+
miller_array=self.hkldata.miller_array())
|
|
61
|
+
|
|
62
|
+
def overall_scale(self, min_b=0.5):
|
|
63
|
+
k, b = self.hkldata.scale_k_and_b(lab_ref=self.lab_obs, lab_scaled="FC")
|
|
64
|
+
min_b_iso = utils.model.minimum_b(self.st[0]) # actually min of aniso too
|
|
65
|
+
tmp = min_b_iso + b
|
|
66
|
+
if tmp < min_b: # perhaps better only adjust b_iso that went too small, but we need to recalculate Fc
|
|
67
|
+
logger.writeln("Adjusting overall B to avoid too small value")
|
|
68
|
+
b += min_b - tmp
|
|
69
|
+
logger.writeln("Applying overall B to model: {:.2f}".format(b))
|
|
70
|
+
utils.model.shift_b(self.st[0], b)
|
|
71
|
+
# adjust Fc
|
|
72
|
+
k_iso = self.hkldata.debye_waller_factors(b_iso=b)
|
|
73
|
+
self.hkldata.df["FC"] *= k_iso
|
|
74
|
+
# adjust Fo
|
|
75
|
+
self.hkldata.df[self.lab_obs] /= k
|
|
76
|
+
# overall_scale()
|
|
77
|
+
|
|
78
|
+
def calc_target(self): # -LL target for SPA
|
|
79
|
+
ret = 0
|
|
80
|
+
for i_bin, idxes in self.hkldata.binned():
|
|
81
|
+
Fo = self.hkldata.df[self.lab_obs].to_numpy()[idxes]
|
|
82
|
+
DFc = self.hkldata.df.FC.to_numpy()[idxes] * self.hkldata.binned_df.D[i_bin]
|
|
83
|
+
S = self.hkldata.binned_df.S[i_bin]
|
|
84
|
+
ret += numpy.nansum(numpy.abs(Fo - DFc)**2) / S + numpy.log(S) * len(idxes)
|
|
85
|
+
return ret * 2 # friedel mates
|
|
86
|
+
# calc_target()
|
|
87
|
+
|
|
88
|
+
def calc_stats(self, bin_stats=False):
|
|
89
|
+
# ignore bin_stats for now. better stats are calculated after refinement
|
|
90
|
+
stats = fsc.calc_fsc_all(self.hkldata, labs_fc=["FC"], lab_f=self.lab_obs)
|
|
91
|
+
fsca = fsc.fsc_average(stats.ncoeffs, stats.fsc_FC_full)
|
|
92
|
+
logger.writeln("FSCaverage = {:.4f}".format(fsca))
|
|
93
|
+
# XXX in fsc object, _full is misleading - it's not full in cross validation mode
|
|
94
|
+
return {"bin_stats": stats, "summary": {"FSCaverage": fsca, "-LL": self.calc_target()}}
|
|
95
|
+
|
|
96
|
+
def calc_grad(self, atom_pos, refine_xyz, adp_mode, refine_occ, refine_h, specs):
|
|
97
|
+
dll_dab = numpy.empty_like(self.hkldata.df[self.lab_obs])
|
|
98
|
+
d2ll_dab2 = numpy.zeros(len(self.hkldata.df.index))
|
|
99
|
+
blur = utils.model.determine_blur_for_dencalc(self.st, self.d_min / 3) # TODO need more work
|
|
100
|
+
logger.writeln("blur for deriv= {:.2f}".format(blur))
|
|
101
|
+
for i_bin, idxes in self.hkldata.binned():
|
|
102
|
+
D = self.hkldata.binned_df.D[i_bin]
|
|
103
|
+
S = self.hkldata.binned_df.S[i_bin]
|
|
104
|
+
Fc = self.hkldata.df.FC.to_numpy()[idxes]
|
|
105
|
+
Fo = self.hkldata.df[self.lab_obs].to_numpy()[idxes]
|
|
106
|
+
dll_dab[idxes] = -2 * D / S * (Fo - D * Fc)#.conj()
|
|
107
|
+
d2ll_dab2[idxes] = 2 * D**2 / S
|
|
108
|
+
|
|
109
|
+
if self.mott_bethe:
|
|
110
|
+
dll_dab *= self.hkldata.d_spacings()**2 * gemmi.mott_bethe_const()
|
|
111
|
+
d2ll_dab2 *= gemmi.mott_bethe_const()**2
|
|
112
|
+
|
|
113
|
+
# we need V for Hessian and V**2/n for gradient.
|
|
114
|
+
d2ll_dab2 *= self.hkldata.cell.volume
|
|
115
|
+
dll_dab_den = self.hkldata.fft_map(data=dll_dab * self.hkldata.debye_waller_factors(b_iso=-blur))
|
|
116
|
+
dll_dab_den.array[:] *= self.hkldata.cell.volume**2 / dll_dab_den.point_count
|
|
117
|
+
self.ll = ext.LL(self.st, atom_pos, self.mott_bethe, refine_xyz, adp_mode, refine_occ, refine_h)
|
|
118
|
+
self.ll.set_ncs([x.tr for x in self.st.ncs if not x.given])
|
|
119
|
+
self.ll.calc_grad_it92(dll_dab_den, blur)
|
|
120
|
+
|
|
121
|
+
# second derivative
|
|
122
|
+
d2dfw_table = ext.TableS3(*self.hkldata.d_min_max())
|
|
123
|
+
d2dfw_table.make_table(1./self.hkldata.d_spacings(), d2ll_dab2)
|
|
124
|
+
self.ll.make_fisher_table_diag_fast_it92(d2dfw_table)
|
|
125
|
+
self.ll.fisher_diag_from_table_it92()
|
|
126
|
+
#json.dump(dict(b=ll.table_bs, pp1=ll.pp1, bb=ll.bb),
|
|
127
|
+
# open("ll_fisher.json", "w"), indent=True)
|
|
128
|
+
#a, (b,c) = ll.fisher_for_coo()
|
|
129
|
+
#json.dump(([float(x) for x in a], ([int(x) for x in b], [int(x) for x in c])), open("fisher.json", "w"))
|
|
130
|
+
#logger.writeln("disabling spec_correction in spa target")
|
|
131
|
+
if specs is not None:
|
|
132
|
+
self.ll.spec_correction(specs, use_rr=False)
|
servalcat/refine/xtal.py
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Author: "Keitaro Yamashita, Garib N. Murshudov"
|
|
3
|
+
MRC Laboratory of Molecular Biology
|
|
4
|
+
|
|
5
|
+
This software is released under the
|
|
6
|
+
Mozilla Public License, version 2.0; see LICENSE.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import absolute_import, division, print_function, generators
|
|
9
|
+
import gemmi
|
|
10
|
+
import numpy
|
|
11
|
+
import json
|
|
12
|
+
import scipy.sparse
|
|
13
|
+
from servalcat.utils import logger
|
|
14
|
+
from servalcat.xtal import sigmaa
|
|
15
|
+
from servalcat import utils
|
|
16
|
+
from servalcat import ext
|
|
17
|
+
b_to_u = utils.model.b_to_u
|
|
18
|
+
u_to_b = utils.model.u_to_b
|
|
19
|
+
integr = sigmaa.integr
|
|
20
|
+
|
|
21
|
+
class LL_Xtal:
|
|
22
|
+
def __init__(self, hkldata, centric_and_selections, free, st, monlib, source="xray", mott_bethe=True,
|
|
23
|
+
use_solvent=False, use_in_est="all", use_in_target="all"):
|
|
24
|
+
assert source in ("electron", "xray", "neutron")
|
|
25
|
+
self.source = source
|
|
26
|
+
self.mott_bethe = False if source != "electron" else mott_bethe
|
|
27
|
+
self.hkldata = hkldata
|
|
28
|
+
self.is_int = "I" in self.hkldata.df
|
|
29
|
+
self.centric_and_selections = centric_and_selections
|
|
30
|
+
self.free = free
|
|
31
|
+
self.st = st
|
|
32
|
+
self.monlib = monlib
|
|
33
|
+
self.d_min = hkldata.d_min_max()[0]
|
|
34
|
+
self.fc_labs = ["FC0"]
|
|
35
|
+
self.use_solvent = use_solvent
|
|
36
|
+
if use_solvent:
|
|
37
|
+
self.fc_labs.append("FCbulk")
|
|
38
|
+
self.hkldata.df["FCbulk"] = 0j
|
|
39
|
+
self.D_labs = ["D{}".format(i) for i in range(len(self.fc_labs))]
|
|
40
|
+
self.k_overall = numpy.ones(len(self.hkldata.df.index))
|
|
41
|
+
self.b_aniso = None
|
|
42
|
+
self.hkldata.df["k_aniso"] = 1.
|
|
43
|
+
self.use_in_est = use_in_est
|
|
44
|
+
self.use_in_target = use_in_target
|
|
45
|
+
self.ll = None
|
|
46
|
+
self.scaling = sigmaa.LsqScale()
|
|
47
|
+
logger.writeln("will use {} reflections for parameter estimation".format(self.use_in_est))
|
|
48
|
+
logger.writeln("will use {} reflections for refinement".format(self.use_in_target))
|
|
49
|
+
|
|
50
|
+
def update_ml_params(self):
|
|
51
|
+
self.b_aniso = sigmaa.determine_ml_params(self.hkldata, self.is_int, self.fc_labs, self.D_labs, self.b_aniso,
|
|
52
|
+
self.centric_and_selections, use=self.use_in_est,
|
|
53
|
+
)#D_trans="splus", S_trans="splus")
|
|
54
|
+
self.hkldata.df["k_aniso"] = self.hkldata.debye_waller_factors(b_cart=self.b_aniso)
|
|
55
|
+
#determine_mlf_params_from_cc(self.hkldata, self.fc_labs, self.D_labs,
|
|
56
|
+
# self.centric_and_selections)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def update_fc(self):
|
|
60
|
+
if self.st.ncs:
|
|
61
|
+
st = self.st.clone()
|
|
62
|
+
st.expand_ncs(gemmi.HowToNameCopiedChain.Dup, merge_dist=0)
|
|
63
|
+
else:
|
|
64
|
+
st = self.st
|
|
65
|
+
|
|
66
|
+
self.hkldata.df[self.fc_labs[0]] = utils.model.calc_fc_fft(st, self.d_min - 1e-6,
|
|
67
|
+
monlib=self.monlib,
|
|
68
|
+
source=self.source,
|
|
69
|
+
mott_bethe=self.mott_bethe,
|
|
70
|
+
miller_array=self.hkldata.miller_array())
|
|
71
|
+
self.hkldata.df["FC"] = self.hkldata.df[self.fc_labs].sum(axis=1)
|
|
72
|
+
|
|
73
|
+
def overall_scale(self, min_b=0.1):
|
|
74
|
+
fc_list = [self.hkldata.df[self.fc_labs[0]].to_numpy()]
|
|
75
|
+
if self.use_solvent:
|
|
76
|
+
Fmask = sigmaa.calc_Fmask(self.st, self.d_min - 1e-6, self.hkldata.miller_array())
|
|
77
|
+
fc_list.append(Fmask)
|
|
78
|
+
|
|
79
|
+
self.scaling.set_data(self.hkldata, fc_list, self.is_int, sigma_cutoff=0)
|
|
80
|
+
self.scaling.scale()
|
|
81
|
+
self.b_aniso = self.scaling.b_aniso
|
|
82
|
+
b = self.scaling.b_iso
|
|
83
|
+
min_b_iso = utils.model.minimum_b(self.st[0]) # actually min of aniso too
|
|
84
|
+
tmp = min_b_iso + b
|
|
85
|
+
if tmp < min_b: # perhaps better only adjust b_iso that went too small, but we need to recalculate Fc
|
|
86
|
+
logger.writeln(" Adjusting overall B to avoid too small value")
|
|
87
|
+
b += min_b - tmp
|
|
88
|
+
logger.writeln(" Applying overall B to model: {:.2f}".format(b))
|
|
89
|
+
utils.model.shift_b(self.st[0], b)
|
|
90
|
+
k_iso = self.hkldata.debye_waller_factors(b_iso=b)
|
|
91
|
+
self.hkldata.df["k_aniso"] = self.hkldata.debye_waller_factors(b_cart=self.b_aniso)
|
|
92
|
+
if self.use_solvent:
|
|
93
|
+
solvent_scale = self.scaling.get_solvent_scale(self.scaling.k_sol, self.scaling.b_sol,
|
|
94
|
+
1. / self.hkldata.d_spacings().to_numpy()**2)
|
|
95
|
+
self.hkldata.df[self.fc_labs[-1]] = Fmask * solvent_scale
|
|
96
|
+
if self.is_int:
|
|
97
|
+
o_labs = self.hkldata.df.columns.intersection(["I", "SIGI",
|
|
98
|
+
"I(+)","SIGI(+)", "I(-)", "SIGI(-)"])
|
|
99
|
+
self.hkldata.df[o_labs] /= self.scaling.k_overall**2
|
|
100
|
+
else:
|
|
101
|
+
o_labs = self.hkldata.df.columns.intersection(["FP", "SIGFP",
|
|
102
|
+
"F(+)","SIGF(+)", "F(-)", "SIGF(-)"])
|
|
103
|
+
self.hkldata.df[o_labs] /= self.scaling.k_overall
|
|
104
|
+
|
|
105
|
+
for lab in self.fc_labs: self.hkldata.df[lab] *= k_iso
|
|
106
|
+
self.hkldata.df["FC"] = self.hkldata.df[self.fc_labs].sum(axis=1)
|
|
107
|
+
|
|
108
|
+
# for next cycle
|
|
109
|
+
self.scaling.k_overall = 1.
|
|
110
|
+
self.scaling.b_iso = 0.
|
|
111
|
+
# overall_scale()
|
|
112
|
+
|
|
113
|
+
def calc_target(self): # -LL target for MLF or MLI
|
|
114
|
+
ret = 0
|
|
115
|
+
k_aniso = self.hkldata.debye_waller_factors(b_cart=self.b_aniso)
|
|
116
|
+
f = sigmaa.mli if self.is_int else sigmaa.mlf
|
|
117
|
+
for i_bin, _ in self.hkldata.binned():
|
|
118
|
+
if self.use_in_target == "all":
|
|
119
|
+
idxes = numpy.concatenate([sel[i] for sel in self.centric_and_selections[i_bin] for i in (1,2)])
|
|
120
|
+
else:
|
|
121
|
+
i = 1 if self.use_in_target == "work" else 2
|
|
122
|
+
idxes = numpy.concatenate([sel[i] for sel in self.centric_and_selections[i_bin]])
|
|
123
|
+
ret += f(self.hkldata.df,
|
|
124
|
+
self.fc_labs,
|
|
125
|
+
numpy.vstack([self.hkldata.df[lab].to_numpy()[idxes] for lab in self.D_labs]).T,
|
|
126
|
+
self.hkldata.df.S.to_numpy()[idxes],
|
|
127
|
+
k_aniso,
|
|
128
|
+
idxes)
|
|
129
|
+
return ret * 2 # friedel mates
|
|
130
|
+
# calc_target()
|
|
131
|
+
|
|
132
|
+
def calc_stats(self, bin_stats=False):
|
|
133
|
+
stats, overall = sigmaa.calc_r_and_cc(self.hkldata, self.centric_and_selections)
|
|
134
|
+
ret = {"summary": overall}
|
|
135
|
+
ret["summary"]["-LL"] = self.calc_target()
|
|
136
|
+
if bin_stats:
|
|
137
|
+
ret["bin_stats"] = stats
|
|
138
|
+
for lab in "R", "CC":
|
|
139
|
+
logger.writeln(" ".join("{} = {:.4f}".format(x, overall[x]) for x in overall if x.startswith(lab)))
|
|
140
|
+
return ret
|
|
141
|
+
|
|
142
|
+
def calc_grad(self, atom_pos, refine_xyz, adp_mode, refine_occ, refine_h, specs=None):
|
|
143
|
+
dll_dab = numpy.zeros(len(self.hkldata.df.FC), dtype=numpy.complex128)
|
|
144
|
+
d2ll_dab2 = numpy.empty(len(self.hkldata.df.index))
|
|
145
|
+
d2ll_dab2[:] = numpy.nan
|
|
146
|
+
blur = utils.model.determine_blur_for_dencalc(self.st, self.d_min / 3) # TODO need more work
|
|
147
|
+
logger.writeln("blur for deriv= {:.2f}".format(blur))
|
|
148
|
+
k_ani = self.hkldata.debye_waller_factors(b_cart=self.b_aniso)
|
|
149
|
+
for i_bin, _ in self.hkldata.binned():
|
|
150
|
+
for c, work, test in self.centric_and_selections[i_bin]:
|
|
151
|
+
if self.use_in_target == "all":
|
|
152
|
+
cidxes = numpy.concatenate([work, test])
|
|
153
|
+
else:
|
|
154
|
+
cidxes = work if self.use_in_target == "work" else test
|
|
155
|
+
epsilon = self.hkldata.df.epsilon.to_numpy()[cidxes]
|
|
156
|
+
Fcs = numpy.vstack([self.hkldata.df[lab].to_numpy()[cidxes] for lab in self.fc_labs]).T
|
|
157
|
+
Ds = numpy.vstack([self.hkldata.df[lab].to_numpy()[cidxes] for lab in self.D_labs]).T
|
|
158
|
+
S = self.hkldata.df["S"].to_numpy()[cidxes]
|
|
159
|
+
Fc = (Ds * Fcs).sum(axis=1)
|
|
160
|
+
Fc_abs = numpy.abs(Fc)
|
|
161
|
+
expip = numpy.exp(1j * numpy.angle(Fc))
|
|
162
|
+
if self.is_int:
|
|
163
|
+
Io = self.hkldata.df.I.to_numpy()
|
|
164
|
+
sigIo = self.hkldata.df.SIGI.to_numpy()
|
|
165
|
+
to = Io[cidxes] / sigIo[cidxes] - sigIo[cidxes] / (c+1) / k_ani[cidxes]**2 / S / epsilon
|
|
166
|
+
tf = k_ani[cidxes] * Fc_abs / numpy.sqrt(sigIo[cidxes])
|
|
167
|
+
sig1 = k_ani[cidxes]**2 * epsilon * S / sigIo[cidxes]
|
|
168
|
+
k_num = 0.5 if c == 0 else 0. # acentric:0.5, centric: 0.
|
|
169
|
+
r = ext.integ_J_ratio(k_num, k_num - 0.5, True, to, tf, sig1, c+1,
|
|
170
|
+
integr.exp2_threshold, integr.h, integr.N, integr.ewmax)
|
|
171
|
+
r *= numpy.sqrt(sigIo[cidxes]) / k_ani[cidxes]
|
|
172
|
+
g = (2-c) * (Fc_abs - r) / epsilon / S * Ds[:,0]
|
|
173
|
+
dll_dab[cidxes] = g * expip
|
|
174
|
+
#d2ll_dab2[cidxes] = (2-c)**2 / S / epsilon * Ds[0]**2 # approximation
|
|
175
|
+
#d2ll_dab2[cidxes] = ((2-c) / S / epsilon + ((2-c) * r / k_ani[cidxes] / epsilon / S)**2) * Ds[0]**2
|
|
176
|
+
d2ll_dab2[cidxes] = g**2
|
|
177
|
+
else:
|
|
178
|
+
Fo = self.hkldata.df.FP.to_numpy()[cidxes] / k_ani[cidxes]
|
|
179
|
+
SigFo = self.hkldata.df.SIGFP.to_numpy()[cidxes] / k_ani[cidxes]
|
|
180
|
+
if c == 0: # acentric
|
|
181
|
+
Sigma = 2 * SigFo**2 + epsilon * S
|
|
182
|
+
X = 2 * Fo * Fc_abs / Sigma
|
|
183
|
+
m = gemmi.bessel_i1_over_i0(X)
|
|
184
|
+
g = 2 * (Fc_abs - m * Fo) / Sigma * Ds[:,0] # XXX assuming 0 is atomic structure
|
|
185
|
+
dll_dab[cidxes] = g * expip
|
|
186
|
+
d2ll_dab2[cidxes] = (2 / Sigma - (1 - m / X - m**2) * (2 * Fo / Sigma)**2) * Ds[:,0]**2
|
|
187
|
+
else:
|
|
188
|
+
Sigma = SigFo**2 + epsilon * S
|
|
189
|
+
X = Fo * Fc_abs / Sigma
|
|
190
|
+
#X = X.astype(numpy.float64)
|
|
191
|
+
m = numpy.tanh(X)
|
|
192
|
+
g = (Fc_abs - m * Fo) / Sigma * Ds[:,0]
|
|
193
|
+
dll_dab[cidxes] = g * expip
|
|
194
|
+
d2ll_dab2[cidxes] = (1. / Sigma - (Fo / (Sigma * numpy.cosh(X)))**2) * Ds[:,0]**2
|
|
195
|
+
|
|
196
|
+
if self.mott_bethe:
|
|
197
|
+
dll_dab *= self.hkldata.d_spacings()**2 * gemmi.mott_bethe_const()
|
|
198
|
+
d2ll_dab2 *= gemmi.mott_bethe_const()**2
|
|
199
|
+
|
|
200
|
+
# we need V**2/n for gradient.
|
|
201
|
+
dll_dab_den = self.hkldata.fft_map(data=dll_dab * self.hkldata.debye_waller_factors(b_iso=-blur))
|
|
202
|
+
dll_dab_den.array[:] *= self.hkldata.cell.volume**2 / dll_dab_den.point_count
|
|
203
|
+
#asu = dll_dab_den.masked_asu()
|
|
204
|
+
#dll_dab_den.array[:] *= 1 - asu.mask_array # 0 to use
|
|
205
|
+
|
|
206
|
+
self.ll = ext.LL(self.st, atom_pos, self.mott_bethe, refine_xyz, adp_mode, refine_occ, refine_h)
|
|
207
|
+
self.ll.set_ncs([x.tr for x in self.st.ncs if not x.given])
|
|
208
|
+
if self.source == "neutron":
|
|
209
|
+
self.ll.calc_grad_n92(dll_dab_den, blur)
|
|
210
|
+
else:
|
|
211
|
+
self.ll.calc_grad_it92(dll_dab_den, blur)
|
|
212
|
+
|
|
213
|
+
# second derivative
|
|
214
|
+
if self.source == "neutron":
|
|
215
|
+
self.ll.make_fisher_table_diag_direct_n92(1./self.hkldata.d_spacings().to_numpy(),
|
|
216
|
+
d2ll_dab2)
|
|
217
|
+
self.ll.fisher_diag_from_table_n92()
|
|
218
|
+
else:
|
|
219
|
+
self.ll.make_fisher_table_diag_direct_it92(1./self.hkldata.d_spacings().to_numpy(),
|
|
220
|
+
d2ll_dab2)
|
|
221
|
+
self.ll.fisher_diag_from_table_it92()
|
|
222
|
+
#json.dump(dict(b=ll.table_bs, pp1=ll.pp1, bb=ll.bb),
|
|
223
|
+
# open("ll_fisher.json", "w"), indent=True)
|
|
224
|
+
#a, (b,c) = ll.fisher_for_coo()
|
|
225
|
+
#json.dump(([float(x) for x in a], ([int(x) for x in b], [int(x) for x in c])), open("fisher.json", "w"))
|
|
226
|
+
if specs is not None:
|
|
227
|
+
self.ll.spec_correction(specs)
|
|
File without changes
|